From 10be98a77c558f8cfb823cd2777171fbb35040f6 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 28 May 2019 10:29:49 +0100 Subject: drm/i915: Move more GEM objects under gem/ Continuing the theme of separating out the GEM clutter. Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20190528092956.14910-8-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/Makefile | 26 +- drivers/gpu/drm/i915/Makefile.header-test | 2 - drivers/gpu/drm/i915/gem/i915_gem_clflush.c | 160 ++ drivers/gpu/drm/i915/gem/i915_gem_clflush.h | 20 + drivers/gpu/drm/i915/gem/i915_gem_context.c | 2453 +++++++++++++++++ drivers/gpu/drm/i915/gem/i915_gem_context.h | 240 ++ drivers/gpu/drm/i915/gem/i915_gem_context_types.h | 208 ++ drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c | 318 +++ drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c | 2768 +++++++++++++++++++ drivers/gpu/drm/i915/gem/i915_gem_internal.c | 197 ++ drivers/gpu/drm/i915/gem/i915_gem_object.c | 10 +- drivers/gpu/drm/i915/gem/i915_gem_pm.c | 251 ++ drivers/gpu/drm/i915/gem/i915_gem_pm.h | 25 + drivers/gpu/drm/i915/gem/i915_gem_shrinker.c | 555 ++++ drivers/gpu/drm/i915/gem/i915_gem_stolen.c | 704 +++++ drivers/gpu/drm/i915/gem/i915_gem_tiling.c | 440 +++ drivers/gpu/drm/i915/gem/i915_gem_userptr.c | 832 ++++++ drivers/gpu/drm/i915/gem/i915_gemfs.c | 57 + drivers/gpu/drm/i915/gem/i915_gemfs.h | 16 + .../gpu/drm/i915/gem/selftests/huge_gem_object.c | 121 + .../gpu/drm/i915/gem/selftests/huge_gem_object.h | 27 + drivers/gpu/drm/i915/gem/selftests/huge_pages.c | 1780 +++++++++++++ .../drm/i915/gem/selftests/i915_gem_coherency.c | 379 +++ .../gpu/drm/i915/gem/selftests/i915_gem_context.c | 1736 ++++++++++++ .../gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c | 386 +++ drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c | 2 +- .../gpu/drm/i915/gem/selftests/i915_gem_object.c | 99 + drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c | 34 + drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.h | 17 + drivers/gpu/drm/i915/gem/selftests/mock_context.c | 111 + drivers/gpu/drm/i915/gem/selftests/mock_context.h | 24 + drivers/gpu/drm/i915/gem/selftests/mock_dmabuf.c | 144 + drivers/gpu/drm/i915/gem/selftests/mock_dmabuf.h | 22 + .../gpu/drm/i915/gem/selftests/mock_gem_object.h | 14 + drivers/gpu/drm/i915/gt/intel_context.c | 4 +- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 3 + drivers/gpu/drm/i915/gt/intel_lrc.c | 2 + drivers/gpu/drm/i915/gt/intel_lrc.h | 14 +- drivers/gpu/drm/i915/gt/intel_reset.c | 2 + drivers/gpu/drm/i915/gt/intel_ringbuffer.c | 3 + drivers/gpu/drm/i915/gt/intel_workarounds.c | 1 + drivers/gpu/drm/i915/gt/mock_engine.c | 3 +- drivers/gpu/drm/i915/gt/selftest_hangcheck.c | 6 +- drivers/gpu/drm/i915/gt/selftest_lrc.c | 7 +- drivers/gpu/drm/i915/gt/selftest_workarounds.c | 6 +- drivers/gpu/drm/i915/gvt/mmio_context.c | 1 + drivers/gpu/drm/i915/gvt/scheduler.c | 5 +- drivers/gpu/drm/i915/i915_debugfs.c | 2 +- drivers/gpu/drm/i915/i915_drv.c | 1 + drivers/gpu/drm/i915/i915_drv.h | 2 +- drivers/gpu/drm/i915/i915_gem.c | 11 +- drivers/gpu/drm/i915/i915_gem_clflush.c | 178 -- drivers/gpu/drm/i915/i915_gem_clflush.h | 36 - drivers/gpu/drm/i915/i915_gem_context.c | 2474 ----------------- drivers/gpu/drm/i915/i915_gem_context.h | 258 -- drivers/gpu/drm/i915/i915_gem_context_types.h | 208 -- drivers/gpu/drm/i915/i915_gem_dmabuf.c | 337 --- drivers/gpu/drm/i915/i915_gem_evict.c | 2 + drivers/gpu/drm/i915/i915_gem_execbuffer.c | 2788 -------------------- drivers/gpu/drm/i915/i915_gem_internal.c | 207 -- drivers/gpu/drm/i915/i915_gem_pm.c | 251 -- drivers/gpu/drm/i915/i915_gem_pm.h | 25 - drivers/gpu/drm/i915/i915_gem_shrinker.c | 574 ---- drivers/gpu/drm/i915/i915_gem_stolen.c | 721 ----- drivers/gpu/drm/i915/i915_gem_tiling.c | 460 ---- drivers/gpu/drm/i915/i915_gem_userptr.c | 851 ------ drivers/gpu/drm/i915/i915_gemfs.c | 75 - drivers/gpu/drm/i915/i915_gemfs.h | 34 - drivers/gpu/drm/i915/i915_globals.c | 2 +- drivers/gpu/drm/i915/i915_gpu_error.c | 2 + drivers/gpu/drm/i915/i915_perf.c | 2 + drivers/gpu/drm/i915/i915_request.c | 3 + drivers/gpu/drm/i915/intel_display.c | 1 - drivers/gpu/drm/i915/intel_guc_submission.c | 2 + drivers/gpu/drm/i915/intel_overlay.c | 2 + drivers/gpu/drm/i915/selftests/huge_gem_object.c | 139 - drivers/gpu/drm/i915/selftests/huge_gem_object.h | 45 - drivers/gpu/drm/i915/selftests/huge_pages.c | 1793 ------------- drivers/gpu/drm/i915/selftests/i915_active.c | 4 +- drivers/gpu/drm/i915/selftests/i915_gem.c | 8 +- .../gpu/drm/i915/selftests/i915_gem_coherency.c | 397 --- drivers/gpu/drm/i915/selftests/i915_gem_context.c | 1752 ------------ drivers/gpu/drm/i915/selftests/i915_gem_dmabuf.c | 404 --- drivers/gpu/drm/i915/selftests/i915_gem_evict.c | 8 +- drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 5 +- drivers/gpu/drm/i915/selftests/i915_gem_object.c | 117 - drivers/gpu/drm/i915/selftests/i915_request.c | 6 +- drivers/gpu/drm/i915/selftests/i915_timeline.c | 4 +- drivers/gpu/drm/i915/selftests/i915_vma.c | 5 +- drivers/gpu/drm/i915/selftests/igt_flush_test.c | 6 +- drivers/gpu/drm/i915/selftests/igt_gem_utils.c | 34 - drivers/gpu/drm/i915/selftests/igt_gem_utils.h | 17 - drivers/gpu/drm/i915/selftests/igt_spinner.c | 3 +- drivers/gpu/drm/i915/selftests/igt_spinner.h | 9 +- drivers/gpu/drm/i915/selftests/intel_guc.c | 3 +- drivers/gpu/drm/i915/selftests/mock_context.c | 129 - drivers/gpu/drm/i915/selftests/mock_context.h | 42 - drivers/gpu/drm/i915/selftests/mock_dmabuf.c | 162 -- drivers/gpu/drm/i915/selftests/mock_dmabuf.h | 41 - drivers/gpu/drm/i915/selftests/mock_gem_device.c | 5 +- drivers/gpu/drm/i915/selftests/mock_gem_object.h | 9 - drivers/gpu/drm/i915/selftests/mock_request.c | 2 +- 102 files changed, 14267 insertions(+), 14626 deletions(-) create mode 100644 drivers/gpu/drm/i915/gem/i915_gem_clflush.c create mode 100644 drivers/gpu/drm/i915/gem/i915_gem_clflush.h create mode 100644 drivers/gpu/drm/i915/gem/i915_gem_context.c create mode 100644 drivers/gpu/drm/i915/gem/i915_gem_context.h create mode 100644 drivers/gpu/drm/i915/gem/i915_gem_context_types.h create mode 100644 drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c create mode 100644 drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c create mode 100644 drivers/gpu/drm/i915/gem/i915_gem_internal.c create mode 100644 drivers/gpu/drm/i915/gem/i915_gem_pm.c create mode 100644 drivers/gpu/drm/i915/gem/i915_gem_pm.h create mode 100644 drivers/gpu/drm/i915/gem/i915_gem_shrinker.c create mode 100644 drivers/gpu/drm/i915/gem/i915_gem_stolen.c create mode 100644 drivers/gpu/drm/i915/gem/i915_gem_tiling.c create mode 100644 drivers/gpu/drm/i915/gem/i915_gem_userptr.c create mode 100644 drivers/gpu/drm/i915/gem/i915_gemfs.c create mode 100644 drivers/gpu/drm/i915/gem/i915_gemfs.h create mode 100644 drivers/gpu/drm/i915/gem/selftests/huge_gem_object.c create mode 100644 drivers/gpu/drm/i915/gem/selftests/huge_gem_object.h create mode 100644 drivers/gpu/drm/i915/gem/selftests/huge_pages.c create mode 100644 drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c create mode 100644 drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c create mode 100644 drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c create mode 100644 drivers/gpu/drm/i915/gem/selftests/i915_gem_object.c create mode 100644 drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c create mode 100644 drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.h create mode 100644 drivers/gpu/drm/i915/gem/selftests/mock_context.c create mode 100644 drivers/gpu/drm/i915/gem/selftests/mock_context.h create mode 100644 drivers/gpu/drm/i915/gem/selftests/mock_dmabuf.c create mode 100644 drivers/gpu/drm/i915/gem/selftests/mock_dmabuf.h create mode 100644 drivers/gpu/drm/i915/gem/selftests/mock_gem_object.h delete mode 100644 drivers/gpu/drm/i915/i915_gem_clflush.c delete mode 100644 drivers/gpu/drm/i915/i915_gem_clflush.h delete mode 100644 drivers/gpu/drm/i915/i915_gem_context.c delete mode 100644 drivers/gpu/drm/i915/i915_gem_context.h delete mode 100644 drivers/gpu/drm/i915/i915_gem_context_types.h delete mode 100644 drivers/gpu/drm/i915/i915_gem_dmabuf.c delete mode 100644 drivers/gpu/drm/i915/i915_gem_execbuffer.c delete mode 100644 drivers/gpu/drm/i915/i915_gem_internal.c delete mode 100644 drivers/gpu/drm/i915/i915_gem_pm.c delete mode 100644 drivers/gpu/drm/i915/i915_gem_pm.h delete mode 100644 drivers/gpu/drm/i915/i915_gem_shrinker.c delete mode 100644 drivers/gpu/drm/i915/i915_gem_stolen.c delete mode 100644 drivers/gpu/drm/i915/i915_gem_tiling.c delete mode 100644 drivers/gpu/drm/i915/i915_gem_userptr.c delete mode 100644 drivers/gpu/drm/i915/i915_gemfs.c delete mode 100644 drivers/gpu/drm/i915/i915_gemfs.h delete mode 100644 drivers/gpu/drm/i915/selftests/huge_gem_object.c delete mode 100644 drivers/gpu/drm/i915/selftests/huge_gem_object.h delete mode 100644 drivers/gpu/drm/i915/selftests/huge_pages.c delete mode 100644 drivers/gpu/drm/i915/selftests/i915_gem_coherency.c delete mode 100644 drivers/gpu/drm/i915/selftests/i915_gem_context.c delete mode 100644 drivers/gpu/drm/i915/selftests/i915_gem_dmabuf.c delete mode 100644 drivers/gpu/drm/i915/selftests/i915_gem_object.c delete mode 100644 drivers/gpu/drm/i915/selftests/igt_gem_utils.c delete mode 100644 drivers/gpu/drm/i915/selftests/igt_gem_utils.h delete mode 100644 drivers/gpu/drm/i915/selftests/mock_context.c delete mode 100644 drivers/gpu/drm/i915/selftests/mock_context.h delete mode 100644 drivers/gpu/drm/i915/selftests/mock_dmabuf.c delete mode 100644 drivers/gpu/drm/i915/selftests/mock_dmabuf.h delete mode 100644 drivers/gpu/drm/i915/selftests/mock_gem_object.h diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 5ffd7e9b19ad..3f3d378f467d 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -87,33 +87,33 @@ i915-y += $(gt-y) # GEM (Graphics Execution Management) code obj-y += gem/ gem-y += \ + gem/i915_gem_clflush.o \ + gem/i915_gem_context.o \ + gem/i915_gem_dmabuf.o \ gem/i915_gem_domain.o \ + gem/i915_gem_execbuffer.o \ + gem/i915_gem_internal.o \ gem/i915_gem_object.o \ gem/i915_gem_mman.o \ gem/i915_gem_pages.o \ gem/i915_gem_phys.o \ - gem/i915_gem_shmem.o + gem/i915_gem_pm.o \ + gem/i915_gem_shmem.o \ + gem/i915_gem_shrinker.o \ + gem/i915_gem_stolen.o \ + gem/i915_gem_tiling.o \ + gem/i915_gem_userptr.o \ + gem/i915_gemfs.o i915-y += \ $(gem-y) \ i915_active.o \ i915_cmd_parser.o \ i915_gem_batch_pool.o \ - i915_gem_clflush.o \ - i915_gem_context.o \ - i915_gem_dmabuf.o \ i915_gem_evict.o \ - i915_gem_execbuffer.o \ i915_gem_fence_reg.o \ i915_gem_gtt.o \ - i915_gem_internal.o \ i915_gem.o \ - i915_gem_pm.o \ i915_gem_render_state.o \ - i915_gem_shrinker.o \ - i915_gem_stolen.o \ - i915_gem_tiling.o \ - i915_gem_userptr.o \ - i915_gemfs.o \ i915_globals.o \ i915_query.o \ i915_request.o \ @@ -199,10 +199,10 @@ i915-y += dvo_ch7017.o \ # Post-mortem debug and GPU hang state capture i915-$(CONFIG_DRM_I915_CAPTURE_ERROR) += i915_gpu_error.o i915-$(CONFIG_DRM_I915_SELFTEST) += \ + gem/selftests/igt_gem_utils.o \ selftests/i915_random.o \ selftests/i915_selftest.o \ selftests/igt_flush_test.o \ - selftests/igt_gem_utils.o \ selftests/igt_live_test.o \ selftests/igt_reset.o \ selftests/igt_spinner.o diff --git a/drivers/gpu/drm/i915/Makefile.header-test b/drivers/gpu/drm/i915/Makefile.header-test index 3a9663002d4a..e01cd91dc1c8 100644 --- a/drivers/gpu/drm/i915/Makefile.header-test +++ b/drivers/gpu/drm/i915/Makefile.header-test @@ -6,8 +6,6 @@ header_test := \ i915_active_types.h \ i915_debugfs.h \ i915_drv.h \ - i915_gem_context_types.h \ - i915_gem_pm.h \ i915_irq.h \ i915_params.h \ i915_priolist_types.h \ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_clflush.c b/drivers/gpu/drm/i915/gem/i915_gem_clflush.c new file mode 100644 index 000000000000..45d238d784fc --- /dev/null +++ b/drivers/gpu/drm/i915/gem/i915_gem_clflush.c @@ -0,0 +1,160 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2016 Intel Corporation + */ + +#include "i915_drv.h" +#include "i915_gem_clflush.h" +#include "intel_frontbuffer.h" + +static DEFINE_SPINLOCK(clflush_lock); + +struct clflush { + struct dma_fence dma; /* Must be first for dma_fence_free() */ + struct i915_sw_fence wait; + struct work_struct work; + struct drm_i915_gem_object *obj; +}; + +static const char *i915_clflush_get_driver_name(struct dma_fence *fence) +{ + return DRIVER_NAME; +} + +static const char *i915_clflush_get_timeline_name(struct dma_fence *fence) +{ + return "clflush"; +} + +static void i915_clflush_release(struct dma_fence *fence) +{ + struct clflush *clflush = container_of(fence, typeof(*clflush), dma); + + i915_sw_fence_fini(&clflush->wait); + + BUILD_BUG_ON(offsetof(typeof(*clflush), dma)); + dma_fence_free(&clflush->dma); +} + +static const struct dma_fence_ops i915_clflush_ops = { + .get_driver_name = i915_clflush_get_driver_name, + .get_timeline_name = i915_clflush_get_timeline_name, + .release = i915_clflush_release, +}; + +static void __i915_do_clflush(struct drm_i915_gem_object *obj) +{ + GEM_BUG_ON(!i915_gem_object_has_pages(obj)); + drm_clflush_sg(obj->mm.pages); + intel_fb_obj_flush(obj, ORIGIN_CPU); +} + +static void i915_clflush_work(struct work_struct *work) +{ + struct clflush *clflush = container_of(work, typeof(*clflush), work); + struct drm_i915_gem_object *obj = clflush->obj; + + if (i915_gem_object_pin_pages(obj)) { + DRM_ERROR("Failed to acquire obj->pages for clflushing\n"); + goto out; + } + + __i915_do_clflush(obj); + + i915_gem_object_unpin_pages(obj); + +out: + i915_gem_object_put(obj); + + dma_fence_signal(&clflush->dma); + dma_fence_put(&clflush->dma); +} + +static int __i915_sw_fence_call +i915_clflush_notify(struct i915_sw_fence *fence, + enum i915_sw_fence_notify state) +{ + struct clflush *clflush = container_of(fence, typeof(*clflush), wait); + + switch (state) { + case FENCE_COMPLETE: + schedule_work(&clflush->work); + break; + + case FENCE_FREE: + dma_fence_put(&clflush->dma); + break; + } + + return NOTIFY_DONE; +} + +bool i915_gem_clflush_object(struct drm_i915_gem_object *obj, + unsigned int flags) +{ + struct clflush *clflush; + + /* + * Stolen memory is always coherent with the GPU as it is explicitly + * marked as wc by the system, or the system is cache-coherent. + * Similarly, we only access struct pages through the CPU cache, so + * anything not backed by physical memory we consider to be always + * coherent and not need clflushing. + */ + if (!i915_gem_object_has_struct_page(obj)) { + obj->cache_dirty = false; + return false; + } + + /* If the GPU is snooping the contents of the CPU cache, + * we do not need to manually clear the CPU cache lines. However, + * the caches are only snooped when the render cache is + * flushed/invalidated. As we always have to emit invalidations + * and flushes when moving into and out of the RENDER domain, correct + * snooping behaviour occurs naturally as the result of our domain + * tracking. + */ + if (!(flags & I915_CLFLUSH_FORCE) && + obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ) + return false; + + trace_i915_gem_object_clflush(obj); + + clflush = NULL; + if (!(flags & I915_CLFLUSH_SYNC)) + clflush = kmalloc(sizeof(*clflush), GFP_KERNEL); + if (clflush) { + GEM_BUG_ON(!obj->cache_dirty); + + dma_fence_init(&clflush->dma, + &i915_clflush_ops, + &clflush_lock, + to_i915(obj->base.dev)->mm.unordered_timeline, + 0); + i915_sw_fence_init(&clflush->wait, i915_clflush_notify); + + clflush->obj = i915_gem_object_get(obj); + INIT_WORK(&clflush->work, i915_clflush_work); + + dma_fence_get(&clflush->dma); + + i915_sw_fence_await_reservation(&clflush->wait, + obj->resv, NULL, + true, I915_FENCE_TIMEOUT, + I915_FENCE_GFP); + + reservation_object_lock(obj->resv, NULL); + reservation_object_add_excl_fence(obj->resv, &clflush->dma); + reservation_object_unlock(obj->resv); + + i915_sw_fence_commit(&clflush->wait); + } else if (obj->mm.pages) { + __i915_do_clflush(obj); + } else { + GEM_BUG_ON(obj->write_domain != I915_GEM_DOMAIN_CPU); + } + + obj->cache_dirty = false; + return true; +} diff --git a/drivers/gpu/drm/i915/gem/i915_gem_clflush.h b/drivers/gpu/drm/i915/gem/i915_gem_clflush.h new file mode 100644 index 000000000000..e6c382973129 --- /dev/null +++ b/drivers/gpu/drm/i915/gem/i915_gem_clflush.h @@ -0,0 +1,20 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2016 Intel Corporation + */ + +#ifndef __I915_GEM_CLFLUSH_H__ +#define __I915_GEM_CLFLUSH_H__ + +#include + +struct drm_i915_private; +struct drm_i915_gem_object; + +bool i915_gem_clflush_object(struct drm_i915_gem_object *obj, + unsigned int flags); +#define I915_CLFLUSH_FORCE BIT(0) +#define I915_CLFLUSH_SYNC BIT(1) + +#endif /* __I915_GEM_CLFLUSH_H__ */ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c new file mode 100644 index 000000000000..5dcdf6540f43 --- /dev/null +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -0,0 +1,2453 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2011-2012 Intel Corporation + */ + +/* + * This file implements HW context support. On gen5+ a HW context consists of an + * opaque GPU object which is referenced at times of context saves and restores. + * With RC6 enabled, the context is also referenced as the GPU enters and exists + * from RC6 (GPU has it's own internal power context, except on gen5). Though + * something like a context does exist for the media ring, the code only + * supports contexts for the render ring. + * + * In software, there is a distinction between contexts created by the user, + * and the default HW context. The default HW context is used by GPU clients + * that do not request setup of their own hardware context. The default + * context's state is never restored to help prevent programming errors. This + * would happen if a client ran and piggy-backed off another clients GPU state. + * The default context only exists to give the GPU some offset to load as the + * current to invoke a save of the context we actually care about. In fact, the + * code could likely be constructed, albeit in a more complicated fashion, to + * never use the default context, though that limits the driver's ability to + * swap out, and/or destroy other contexts. + * + * All other contexts are created as a request by the GPU client. These contexts + * store GPU state, and thus allow GPU clients to not re-emit state (and + * potentially query certain state) at any time. The kernel driver makes + * certain that the appropriate commands are inserted. + * + * The context life cycle is semi-complicated in that context BOs may live + * longer than the context itself because of the way the hardware, and object + * tracking works. Below is a very crude representation of the state machine + * describing the context life. + * refcount pincount active + * S0: initial state 0 0 0 + * S1: context created 1 0 0 + * S2: context is currently running 2 1 X + * S3: GPU referenced, but not current 2 0 1 + * S4: context is current, but destroyed 1 1 0 + * S5: like S3, but destroyed 1 0 1 + * + * The most common (but not all) transitions: + * S0->S1: client creates a context + * S1->S2: client submits execbuf with context + * S2->S3: other clients submits execbuf with context + * S3->S1: context object was retired + * S3->S2: clients submits another execbuf + * S2->S4: context destroy called with current context + * S3->S5->S0: destroy path + * S4->S5->S0: destroy path on current context + * + * There are two confusing terms used above: + * The "current context" means the context which is currently running on the + * GPU. The GPU has loaded its state already and has stored away the gtt + * offset of the BO. The GPU is not actively referencing the data at this + * offset, but it will on the next context switch. The only way to avoid this + * is to do a GPU reset. + * + * An "active context' is one which was previously the "current context" and is + * on the active list waiting for the next context switch to occur. Until this + * happens, the object must remain at the same gtt offset. It is therefore + * possible to destroy a context, but it is still active. + * + */ + +#include +#include + +#include + +#include "gt/intel_lrc_reg.h" + +#include "i915_gem_context.h" +#include "i915_globals.h" +#include "i915_trace.h" +#include "i915_user_extensions.h" + +#define ALL_L3_SLICES(dev) (1 << NUM_L3_SLICES(dev)) - 1 + +static struct i915_global_gem_context { + struct i915_global base; + struct kmem_cache *slab_luts; +} global; + +struct i915_lut_handle *i915_lut_handle_alloc(void) +{ + return kmem_cache_alloc(global.slab_luts, GFP_KERNEL); +} + +void i915_lut_handle_free(struct i915_lut_handle *lut) +{ + return kmem_cache_free(global.slab_luts, lut); +} + +static void lut_close(struct i915_gem_context *ctx) +{ + struct i915_lut_handle *lut, *ln; + struct radix_tree_iter iter; + void __rcu **slot; + + list_for_each_entry_safe(lut, ln, &ctx->handles_list, ctx_link) { + list_del(&lut->obj_link); + i915_lut_handle_free(lut); + } + INIT_LIST_HEAD(&ctx->handles_list); + + rcu_read_lock(); + radix_tree_for_each_slot(slot, &ctx->handles_vma, &iter, 0) { + struct i915_vma *vma = rcu_dereference_raw(*slot); + + radix_tree_iter_delete(&ctx->handles_vma, &iter, slot); + + vma->open_count--; + __i915_gem_object_release_unless_active(vma->obj); + } + rcu_read_unlock(); +} + +static struct intel_context * +lookup_user_engine(struct i915_gem_context *ctx, + unsigned long flags, + const struct i915_engine_class_instance *ci) +#define LOOKUP_USER_INDEX BIT(0) +{ + int idx; + + if (!!(flags & LOOKUP_USER_INDEX) != i915_gem_context_user_engines(ctx)) + return ERR_PTR(-EINVAL); + + if (!i915_gem_context_user_engines(ctx)) { + struct intel_engine_cs *engine; + + engine = intel_engine_lookup_user(ctx->i915, + ci->engine_class, + ci->engine_instance); + if (!engine) + return ERR_PTR(-EINVAL); + + idx = engine->id; + } else { + idx = ci->engine_instance; + } + + return i915_gem_context_get_engine(ctx, idx); +} + +static inline int new_hw_id(struct drm_i915_private *i915, gfp_t gfp) +{ + unsigned int max; + + lockdep_assert_held(&i915->contexts.mutex); + + if (INTEL_GEN(i915) >= 11) + max = GEN11_MAX_CONTEXT_HW_ID; + else if (USES_GUC_SUBMISSION(i915)) + /* + * When using GuC in proxy submission, GuC consumes the + * highest bit in the context id to indicate proxy submission. + */ + max = MAX_GUC_CONTEXT_HW_ID; + else + max = MAX_CONTEXT_HW_ID; + + return ida_simple_get(&i915->contexts.hw_ida, 0, max, gfp); +} + +static int steal_hw_id(struct drm_i915_private *i915) +{ + struct i915_gem_context *ctx, *cn; + LIST_HEAD(pinned); + int id = -ENOSPC; + + lockdep_assert_held(&i915->contexts.mutex); + + list_for_each_entry_safe(ctx, cn, + &i915->contexts.hw_id_list, hw_id_link) { + if (atomic_read(&ctx->hw_id_pin_count)) { + list_move_tail(&ctx->hw_id_link, &pinned); + continue; + } + + GEM_BUG_ON(!ctx->hw_id); /* perma-pinned kernel context */ + list_del_init(&ctx->hw_id_link); + id = ctx->hw_id; + break; + } + + /* + * Remember how far we got up on the last repossesion scan, so the + * list is kept in a "least recently scanned" order. + */ + list_splice_tail(&pinned, &i915->contexts.hw_id_list); + return id; +} + +static int assign_hw_id(struct drm_i915_private *i915, unsigned int *out) +{ + int ret; + + lockdep_assert_held(&i915->contexts.mutex); + + /* + * We prefer to steal/stall ourselves and our users over that of the + * entire system. That may be a little unfair to our users, and + * even hurt high priority clients. The choice is whether to oomkill + * something else, or steal a context id. + */ + ret = new_hw_id(i915, GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN); + if (unlikely(ret < 0)) { + ret = steal_hw_id(i915); + if (ret < 0) /* once again for the correct errno code */ + ret = new_hw_id(i915, GFP_KERNEL); + if (ret < 0) + return ret; + } + + *out = ret; + return 0; +} + +static void release_hw_id(struct i915_gem_context *ctx) +{ + struct drm_i915_private *i915 = ctx->i915; + + if (list_empty(&ctx->hw_id_link)) + return; + + mutex_lock(&i915->contexts.mutex); + if (!list_empty(&ctx->hw_id_link)) { + ida_simple_remove(&i915->contexts.hw_ida, ctx->hw_id); + list_del_init(&ctx->hw_id_link); + } + mutex_unlock(&i915->contexts.mutex); +} + +static void __free_engines(struct i915_gem_engines *e, unsigned int count) +{ + while (count--) { + if (!e->engines[count]) + continue; + + intel_context_put(e->engines[count]); + } + kfree(e); +} + +static void free_engines(struct i915_gem_engines *e) +{ + __free_engines(e, e->num_engines); +} + +static void free_engines_rcu(struct work_struct *wrk) +{ + struct i915_gem_engines *e = + container_of(wrk, struct i915_gem_engines, rcu.work); + struct drm_i915_private *i915 = e->i915; + + mutex_lock(&i915->drm.struct_mutex); + free_engines(e); + mutex_unlock(&i915->drm.struct_mutex); +} + +static struct i915_gem_engines *default_engines(struct i915_gem_context *ctx) +{ + struct intel_engine_cs *engine; + struct i915_gem_engines *e; + enum intel_engine_id id; + + e = kzalloc(struct_size(e, engines, I915_NUM_ENGINES), GFP_KERNEL); + if (!e) + return ERR_PTR(-ENOMEM); + + e->i915 = ctx->i915; + for_each_engine(engine, ctx->i915, id) { + struct intel_context *ce; + + ce = intel_context_create(ctx, engine); + if (IS_ERR(ce)) { + __free_engines(e, id); + return ERR_CAST(ce); + } + + e->engines[id] = ce; + } + e->num_engines = id; + + return e; +} + +static void i915_gem_context_free(struct i915_gem_context *ctx) +{ + lockdep_assert_held(&ctx->i915->drm.struct_mutex); + GEM_BUG_ON(!i915_gem_context_is_closed(ctx)); + + release_hw_id(ctx); + i915_ppgtt_put(ctx->ppgtt); + + free_engines(rcu_access_pointer(ctx->engines)); + mutex_destroy(&ctx->engines_mutex); + + if (ctx->timeline) + i915_timeline_put(ctx->timeline); + + kfree(ctx->name); + put_pid(ctx->pid); + + list_del(&ctx->link); + mutex_destroy(&ctx->mutex); + + kfree_rcu(ctx, rcu); +} + +static void contexts_free(struct drm_i915_private *i915) +{ + struct llist_node *freed = llist_del_all(&i915->contexts.free_list); + struct i915_gem_context *ctx, *cn; + + lockdep_assert_held(&i915->drm.struct_mutex); + + llist_for_each_entry_safe(ctx, cn, freed, free_link) + i915_gem_context_free(ctx); +} + +static void contexts_free_first(struct drm_i915_private *i915) +{ + struct i915_gem_context *ctx; + struct llist_node *freed; + + lockdep_assert_held(&i915->drm.struct_mutex); + + freed = llist_del_first(&i915->contexts.free_list); + if (!freed) + return; + + ctx = container_of(freed, typeof(*ctx), free_link); + i915_gem_context_free(ctx); +} + +static void contexts_free_worker(struct work_struct *work) +{ + struct drm_i915_private *i915 = + container_of(work, typeof(*i915), contexts.free_work); + + mutex_lock(&i915->drm.struct_mutex); + contexts_free(i915); + mutex_unlock(&i915->drm.struct_mutex); +} + +void i915_gem_context_release(struct kref *ref) +{ + struct i915_gem_context *ctx = container_of(ref, typeof(*ctx), ref); + struct drm_i915_private *i915 = ctx->i915; + + trace_i915_context_free(ctx); + if (llist_add(&ctx->free_link, &i915->contexts.free_list)) + queue_work(i915->wq, &i915->contexts.free_work); +} + +static void context_close(struct i915_gem_context *ctx) +{ + i915_gem_context_set_closed(ctx); + + /* + * This context will never again be assinged to HW, so we can + * reuse its ID for the next context. + */ + release_hw_id(ctx); + + /* + * The LUT uses the VMA as a backpointer to unref the object, + * so we need to clear the LUT before we close all the VMA (inside + * the ppgtt). + */ + lut_close(ctx); + + ctx->file_priv = ERR_PTR(-EBADF); + i915_gem_context_put(ctx); +} + +static u32 default_desc_template(const struct drm_i915_private *i915, + const struct i915_hw_ppgtt *ppgtt) +{ + u32 address_mode; + u32 desc; + + desc = GEN8_CTX_VALID | GEN8_CTX_PRIVILEGE; + + address_mode = INTEL_LEGACY_32B_CONTEXT; + if (ppgtt && i915_vm_is_4lvl(&ppgtt->vm)) + address_mode = INTEL_LEGACY_64B_CONTEXT; + desc |= address_mode << GEN8_CTX_ADDRESSING_MODE_SHIFT; + + if (IS_GEN(i915, 8)) + desc |= GEN8_CTX_L3LLC_COHERENT; + + /* TODO: WaDisableLiteRestore when we start using semaphore + * signalling between Command Streamers + * ring->ctx_desc_template |= GEN8_CTX_FORCE_RESTORE; + */ + + return desc; +} + +static struct i915_gem_context * +__create_context(struct drm_i915_private *dev_priv) +{ + struct i915_gem_context *ctx; + struct i915_gem_engines *e; + int err; + int i; + + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) + return ERR_PTR(-ENOMEM); + + kref_init(&ctx->ref); + list_add_tail(&ctx->link, &dev_priv->contexts.list); + ctx->i915 = dev_priv; + ctx->sched.priority = I915_USER_PRIORITY(I915_PRIORITY_NORMAL); + mutex_init(&ctx->mutex); + + mutex_init(&ctx->engines_mutex); + e = default_engines(ctx); + if (IS_ERR(e)) { + err = PTR_ERR(e); + goto err_free; + } + RCU_INIT_POINTER(ctx->engines, e); + + INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL); + INIT_LIST_HEAD(&ctx->handles_list); + INIT_LIST_HEAD(&ctx->hw_id_link); + + /* NB: Mark all slices as needing a remap so that when the context first + * loads it will restore whatever remap state already exists. If there + * is no remap info, it will be a NOP. */ + ctx->remap_slice = ALL_L3_SLICES(dev_priv); + + i915_gem_context_set_bannable(ctx); + i915_gem_context_set_recoverable(ctx); + + ctx->ring_size = 4 * PAGE_SIZE; + ctx->desc_template = + default_desc_template(dev_priv, dev_priv->mm.aliasing_ppgtt); + + for (i = 0; i < ARRAY_SIZE(ctx->hang_timestamp); i++) + ctx->hang_timestamp[i] = jiffies - CONTEXT_FAST_HANG_JIFFIES; + + return ctx; + +err_free: + kfree(ctx); + return ERR_PTR(err); +} + +static struct i915_hw_ppgtt * +__set_ppgtt(struct i915_gem_context *ctx, struct i915_hw_ppgtt *ppgtt) +{ + struct i915_hw_ppgtt *old = ctx->ppgtt; + + ctx->ppgtt = i915_ppgtt_get(ppgtt); + ctx->desc_template = default_desc_template(ctx->i915, ppgtt); + + return old; +} + +static void __assign_ppgtt(struct i915_gem_context *ctx, + struct i915_hw_ppgtt *ppgtt) +{ + if (ppgtt == ctx->ppgtt) + return; + + ppgtt = __set_ppgtt(ctx, ppgtt); + if (ppgtt) + i915_ppgtt_put(ppgtt); +} + +static struct i915_gem_context * +i915_gem_create_context(struct drm_i915_private *dev_priv, unsigned int flags) +{ + struct i915_gem_context *ctx; + + lockdep_assert_held(&dev_priv->drm.struct_mutex); + + if (flags & I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE && + !HAS_EXECLISTS(dev_priv)) + return ERR_PTR(-EINVAL); + + /* Reap the most stale context */ + contexts_free_first(dev_priv); + + ctx = __create_context(dev_priv); + if (IS_ERR(ctx)) + return ctx; + + if (HAS_FULL_PPGTT(dev_priv)) { + struct i915_hw_ppgtt *ppgtt; + + ppgtt = i915_ppgtt_create(dev_priv); + if (IS_ERR(ppgtt)) { + DRM_DEBUG_DRIVER("PPGTT setup failed (%ld)\n", + PTR_ERR(ppgtt)); + context_close(ctx); + return ERR_CAST(ppgtt); + } + + __assign_ppgtt(ctx, ppgtt); + i915_ppgtt_put(ppgtt); + } + + if (flags & I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE) { + struct i915_timeline *timeline; + + timeline = i915_timeline_create(dev_priv, NULL); + if (IS_ERR(timeline)) { + context_close(ctx); + return ERR_CAST(timeline); + } + + ctx->timeline = timeline; + } + + trace_i915_context_create(ctx); + + return ctx; +} + +/** + * i915_gem_context_create_gvt - create a GVT GEM context + * @dev: drm device * + * + * This function is used to create a GVT specific GEM context. + * + * Returns: + * pointer to i915_gem_context on success, error pointer if failed + * + */ +struct i915_gem_context * +i915_gem_context_create_gvt(struct drm_device *dev) +{ + struct i915_gem_context *ctx; + int ret; + + if (!IS_ENABLED(CONFIG_DRM_I915_GVT)) + return ERR_PTR(-ENODEV); + + ret = i915_mutex_lock_interruptible(dev); + if (ret) + return ERR_PTR(ret); + + ctx = i915_gem_create_context(to_i915(dev), 0); + if (IS_ERR(ctx)) + goto out; + + ret = i915_gem_context_pin_hw_id(ctx); + if (ret) { + context_close(ctx); + ctx = ERR_PTR(ret); + goto out; + } + + ctx->file_priv = ERR_PTR(-EBADF); + i915_gem_context_set_closed(ctx); /* not user accessible */ + i915_gem_context_clear_bannable(ctx); + i915_gem_context_set_force_single_submission(ctx); + if (!USES_GUC_SUBMISSION(to_i915(dev))) + ctx->ring_size = 512 * PAGE_SIZE; /* Max ring buffer size */ + + GEM_BUG_ON(i915_gem_context_is_kernel(ctx)); +out: + mutex_unlock(&dev->struct_mutex); + return ctx; +} + +static void +destroy_kernel_context(struct i915_gem_context **ctxp) +{ + struct i915_gem_context *ctx; + + /* Keep the context ref so that we can free it immediately ourselves */ + ctx = i915_gem_context_get(fetch_and_zero(ctxp)); + GEM_BUG_ON(!i915_gem_context_is_kernel(ctx)); + + context_close(ctx); + i915_gem_context_free(ctx); +} + +struct i915_gem_context * +i915_gem_context_create_kernel(struct drm_i915_private *i915, int prio) +{ + struct i915_gem_context *ctx; + int err; + + ctx = i915_gem_create_context(i915, 0); + if (IS_ERR(ctx)) + return ctx; + + err = i915_gem_context_pin_hw_id(ctx); + if (err) { + destroy_kernel_context(&ctx); + return ERR_PTR(err); + } + + i915_gem_context_clear_bannable(ctx); + ctx->sched.priority = I915_USER_PRIORITY(prio); + ctx->ring_size = PAGE_SIZE; + + GEM_BUG_ON(!i915_gem_context_is_kernel(ctx)); + + return ctx; +} + +static void init_contexts(struct drm_i915_private *i915) +{ + mutex_init(&i915->contexts.mutex); + INIT_LIST_HEAD(&i915->contexts.list); + + /* Using the simple ida interface, the max is limited by sizeof(int) */ + BUILD_BUG_ON(MAX_CONTEXT_HW_ID > INT_MAX); + BUILD_BUG_ON(GEN11_MAX_CONTEXT_HW_ID > INT_MAX); + ida_init(&i915->contexts.hw_ida); + INIT_LIST_HEAD(&i915->contexts.hw_id_list); + + INIT_WORK(&i915->contexts.free_work, contexts_free_worker); + init_llist_head(&i915->contexts.free_list); +} + +static bool needs_preempt_context(struct drm_i915_private *i915) +{ + return HAS_EXECLISTS(i915); +} + +int i915_gem_contexts_init(struct drm_i915_private *dev_priv) +{ + struct i915_gem_context *ctx; + + /* Reassure ourselves we are only called once */ + GEM_BUG_ON(dev_priv->kernel_context); + GEM_BUG_ON(dev_priv->preempt_context); + + intel_engine_init_ctx_wa(dev_priv->engine[RCS0]); + init_contexts(dev_priv); + + /* lowest priority; idle task */ + ctx = i915_gem_context_create_kernel(dev_priv, I915_PRIORITY_MIN); + if (IS_ERR(ctx)) { + DRM_ERROR("Failed to create default global context\n"); + return PTR_ERR(ctx); + } + /* + * For easy recognisablity, we want the kernel context to be 0 and then + * all user contexts will have non-zero hw_id. Kernel contexts are + * permanently pinned, so that we never suffer a stall and can + * use them from any allocation context (e.g. for evicting other + * contexts and from inside the shrinker). + */ + GEM_BUG_ON(ctx->hw_id); + GEM_BUG_ON(!atomic_read(&ctx->hw_id_pin_count)); + dev_priv->kernel_context = ctx; + + /* highest priority; preempting task */ + if (needs_preempt_context(dev_priv)) { + ctx = i915_gem_context_create_kernel(dev_priv, INT_MAX); + if (!IS_ERR(ctx)) + dev_priv->preempt_context = ctx; + else + DRM_ERROR("Failed to create preempt context; disabling preemption\n"); + } + + DRM_DEBUG_DRIVER("%s context support initialized\n", + DRIVER_CAPS(dev_priv)->has_logical_contexts ? + "logical" : "fake"); + return 0; +} + +void i915_gem_contexts_lost(struct drm_i915_private *dev_priv) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + + lockdep_assert_held(&dev_priv->drm.struct_mutex); + + for_each_engine(engine, dev_priv, id) + intel_engine_lost_context(engine); +} + +void i915_gem_contexts_fini(struct drm_i915_private *i915) +{ + lockdep_assert_held(&i915->drm.struct_mutex); + + if (i915->preempt_context) + destroy_kernel_context(&i915->preempt_context); + destroy_kernel_context(&i915->kernel_context); + + /* Must free all deferred contexts (via flush_workqueue) first */ + GEM_BUG_ON(!list_empty(&i915->contexts.hw_id_list)); + ida_destroy(&i915->contexts.hw_ida); +} + +static int context_idr_cleanup(int id, void *p, void *data) +{ + context_close(p); + return 0; +} + +static int vm_idr_cleanup(int id, void *p, void *data) +{ + i915_ppgtt_put(p); + return 0; +} + +static int gem_context_register(struct i915_gem_context *ctx, + struct drm_i915_file_private *fpriv) +{ + int ret; + + ctx->file_priv = fpriv; + if (ctx->ppgtt) + ctx->ppgtt->vm.file = fpriv; + + ctx->pid = get_task_pid(current, PIDTYPE_PID); + ctx->name = kasprintf(GFP_KERNEL, "%s[%d]", + current->comm, pid_nr(ctx->pid)); + if (!ctx->name) { + ret = -ENOMEM; + goto err_pid; + } + + /* And finally expose ourselves to userspace via the idr */ + mutex_lock(&fpriv->context_idr_lock); + ret = idr_alloc(&fpriv->context_idr, ctx, 0, 0, GFP_KERNEL); + mutex_unlock(&fpriv->context_idr_lock); + if (ret >= 0) + goto out; + + kfree(fetch_and_zero(&ctx->name)); +err_pid: + put_pid(fetch_and_zero(&ctx->pid)); +out: + return ret; +} + +int i915_gem_context_open(struct drm_i915_private *i915, + struct drm_file *file) +{ + struct drm_i915_file_private *file_priv = file->driver_priv; + struct i915_gem_context *ctx; + int err; + + mutex_init(&file_priv->context_idr_lock); + mutex_init(&file_priv->vm_idr_lock); + + idr_init(&file_priv->context_idr); + idr_init_base(&file_priv->vm_idr, 1); + + mutex_lock(&i915->drm.struct_mutex); + ctx = i915_gem_create_context(i915, 0); + mutex_unlock(&i915->drm.struct_mutex); + if (IS_ERR(ctx)) { + err = PTR_ERR(ctx); + goto err; + } + + err = gem_context_register(ctx, file_priv); + if (err < 0) + goto err_ctx; + + GEM_BUG_ON(i915_gem_context_is_kernel(ctx)); + GEM_BUG_ON(err > 0); + + return 0; + +err_ctx: + mutex_lock(&i915->drm.struct_mutex); + context_close(ctx); + mutex_unlock(&i915->drm.struct_mutex); +err: + idr_destroy(&file_priv->vm_idr); + idr_destroy(&file_priv->context_idr); + mutex_destroy(&file_priv->vm_idr_lock); + mutex_destroy(&file_priv->context_idr_lock); + return err; +} + +void i915_gem_context_close(struct drm_file *file) +{ + struct drm_i915_file_private *file_priv = file->driver_priv; + + lockdep_assert_held(&file_priv->dev_priv->drm.struct_mutex); + + idr_for_each(&file_priv->context_idr, context_idr_cleanup, NULL); + idr_destroy(&file_priv->context_idr); + mutex_destroy(&file_priv->context_idr_lock); + + idr_for_each(&file_priv->vm_idr, vm_idr_cleanup, NULL); + idr_destroy(&file_priv->vm_idr); + mutex_destroy(&file_priv->vm_idr_lock); +} + +int i915_gem_vm_create_ioctl(struct drm_device *dev, void *data, + struct drm_file *file) +{ + struct drm_i915_private *i915 = to_i915(dev); + struct drm_i915_gem_vm_control *args = data; + struct drm_i915_file_private *file_priv = file->driver_priv; + struct i915_hw_ppgtt *ppgtt; + int err; + + if (!HAS_FULL_PPGTT(i915)) + return -ENODEV; + + if (args->flags) + return -EINVAL; + + ppgtt = i915_ppgtt_create(i915); + if (IS_ERR(ppgtt)) + return PTR_ERR(ppgtt); + + ppgtt->vm.file = file_priv; + + if (args->extensions) { + err = i915_user_extensions(u64_to_user_ptr(args->extensions), + NULL, 0, + ppgtt); + if (err) + goto err_put; + } + + err = mutex_lock_interruptible(&file_priv->vm_idr_lock); + if (err) + goto err_put; + + err = idr_alloc(&file_priv->vm_idr, ppgtt, 0, 0, GFP_KERNEL); + if (err < 0) + goto err_unlock; + + GEM_BUG_ON(err == 0); /* reserved for invalid/unassigned ppgtt */ + + mutex_unlock(&file_priv->vm_idr_lock); + + args->vm_id = err; + return 0; + +err_unlock: + mutex_unlock(&file_priv->vm_idr_lock); +err_put: + i915_ppgtt_put(ppgtt); + return err; +} + +int i915_gem_vm_destroy_ioctl(struct drm_device *dev, void *data, + struct drm_file *file) +{ + struct drm_i915_file_private *file_priv = file->driver_priv; + struct drm_i915_gem_vm_control *args = data; + struct i915_hw_ppgtt *ppgtt; + int err; + u32 id; + + if (args->flags) + return -EINVAL; + + if (args->extensions) + return -EINVAL; + + id = args->vm_id; + if (!id) + return -ENOENT; + + err = mutex_lock_interruptible(&file_priv->vm_idr_lock); + if (err) + return err; + + ppgtt = idr_remove(&file_priv->vm_idr, id); + + mutex_unlock(&file_priv->vm_idr_lock); + if (!ppgtt) + return -ENOENT; + + i915_ppgtt_put(ppgtt); + return 0; +} + +struct context_barrier_task { + struct i915_active base; + void (*task)(void *data); + void *data; +}; + +static void cb_retire(struct i915_active *base) +{ + struct context_barrier_task *cb = container_of(base, typeof(*cb), base); + + if (cb->task) + cb->task(cb->data); + + i915_active_fini(&cb->base); + kfree(cb); +} + +I915_SELFTEST_DECLARE(static intel_engine_mask_t context_barrier_inject_fault); +static int context_barrier_task(struct i915_gem_context *ctx, + intel_engine_mask_t engines, + int (*emit)(struct i915_request *rq, void *data), + void (*task)(void *data), + void *data) +{ + struct drm_i915_private *i915 = ctx->i915; + struct context_barrier_task *cb; + struct i915_gem_engines_iter it; + struct intel_context *ce; + int err = 0; + + lockdep_assert_held(&i915->drm.struct_mutex); + GEM_BUG_ON(!task); + + cb = kmalloc(sizeof(*cb), GFP_KERNEL); + if (!cb) + return -ENOMEM; + + i915_active_init(i915, &cb->base, cb_retire); + i915_active_acquire(&cb->base); + + for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) { + struct i915_request *rq; + + if (I915_SELFTEST_ONLY(context_barrier_inject_fault & + ce->engine->mask)) { + err = -ENXIO; + break; + } + + if (!(ce->engine->mask & engines) || !ce->state) + continue; + + rq = intel_context_create_request(ce); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + break; + } + + err = 0; + if (emit) + err = emit(rq, data); + if (err == 0) + err = i915_active_ref(&cb->base, rq->fence.context, rq); + + i915_request_add(rq); + if (err) + break; + } + i915_gem_context_unlock_engines(ctx); + + cb->task = err ? NULL : task; /* caller needs to unwind instead */ + cb->data = data; + + i915_active_release(&cb->base); + + return err; +} + +static int get_ppgtt(struct drm_i915_file_private *file_priv, + struct i915_gem_context *ctx, + struct drm_i915_gem_context_param *args) +{ + struct i915_hw_ppgtt *ppgtt; + int ret; + + if (!ctx->ppgtt) + return -ENODEV; + + /* XXX rcu acquire? */ + ret = mutex_lock_interruptible(&ctx->i915->drm.struct_mutex); + if (ret) + return ret; + + ppgtt = i915_ppgtt_get(ctx->ppgtt); + mutex_unlock(&ctx->i915->drm.struct_mutex); + + ret = mutex_lock_interruptible(&file_priv->vm_idr_lock); + if (ret) + goto err_put; + + ret = idr_alloc(&file_priv->vm_idr, ppgtt, 0, 0, GFP_KERNEL); + GEM_BUG_ON(!ret); + if (ret < 0) + goto err_unlock; + + i915_ppgtt_get(ppgtt); + + args->size = 0; + args->value = ret; + + ret = 0; +err_unlock: + mutex_unlock(&file_priv->vm_idr_lock); +err_put: + i915_ppgtt_put(ppgtt); + return ret; +} + +static void set_ppgtt_barrier(void *data) +{ + struct i915_hw_ppgtt *old = data; + + if (INTEL_GEN(old->vm.i915) < 8) + gen6_ppgtt_unpin_all(old); + + i915_ppgtt_put(old); +} + +static int emit_ppgtt_update(struct i915_request *rq, void *data) +{ + struct i915_hw_ppgtt *ppgtt = rq->gem_context->ppgtt; + struct intel_engine_cs *engine = rq->engine; + u32 base = engine->mmio_base; + u32 *cs; + int i; + + if (i915_vm_is_4lvl(&ppgtt->vm)) { + const dma_addr_t pd_daddr = px_dma(&ppgtt->pml4); + + cs = intel_ring_begin(rq, 6); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + *cs++ = MI_LOAD_REGISTER_IMM(2); + + *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, 0)); + *cs++ = upper_32_bits(pd_daddr); + *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, 0)); + *cs++ = lower_32_bits(pd_daddr); + + *cs++ = MI_NOOP; + intel_ring_advance(rq, cs); + } else if (HAS_LOGICAL_RING_CONTEXTS(engine->i915)) { + cs = intel_ring_begin(rq, 4 * GEN8_3LVL_PDPES + 2); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + *cs++ = MI_LOAD_REGISTER_IMM(2 * GEN8_3LVL_PDPES); + for (i = GEN8_3LVL_PDPES; i--; ) { + const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i); + + *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, i)); + *cs++ = upper_32_bits(pd_daddr); + *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, i)); + *cs++ = lower_32_bits(pd_daddr); + } + *cs++ = MI_NOOP; + intel_ring_advance(rq, cs); + } else { + /* ppGTT is not part of the legacy context image */ + gen6_ppgtt_pin(ppgtt); + } + + return 0; +} + +static int set_ppgtt(struct drm_i915_file_private *file_priv, + struct i915_gem_context *ctx, + struct drm_i915_gem_context_param *args) +{ + struct i915_hw_ppgtt *ppgtt, *old; + int err; + + if (args->size) + return -EINVAL; + + if (!ctx->ppgtt) + return -ENODEV; + + if (upper_32_bits(args->value)) + return -ENOENT; + + err = mutex_lock_interruptible(&file_priv->vm_idr_lock); + if (err) + return err; + + ppgtt = idr_find(&file_priv->vm_idr, args->value); + if (ppgtt) + i915_ppgtt_get(ppgtt); + mutex_unlock(&file_priv->vm_idr_lock); + if (!ppgtt) + return -ENOENT; + + err = mutex_lock_interruptible(&ctx->i915->drm.struct_mutex); + if (err) + goto out; + + if (ppgtt == ctx->ppgtt) + goto unlock; + + /* Teardown the existing obj:vma cache, it will have to be rebuilt. */ + lut_close(ctx); + + old = __set_ppgtt(ctx, ppgtt); + + /* + * We need to flush any requests using the current ppgtt before + * we release it as the requests do not hold a reference themselves, + * only indirectly through the context. + */ + err = context_barrier_task(ctx, ALL_ENGINES, + emit_ppgtt_update, + set_ppgtt_barrier, + old); + if (err) { + ctx->ppgtt = old; + ctx->desc_template = default_desc_template(ctx->i915, old); + i915_ppgtt_put(ppgtt); + } + +unlock: + mutex_unlock(&ctx->i915->drm.struct_mutex); + +out: + i915_ppgtt_put(ppgtt); + return err; +} + +static int gen8_emit_rpcs_config(struct i915_request *rq, + struct intel_context *ce, + struct intel_sseu sseu) +{ + u64 offset; + u32 *cs; + + cs = intel_ring_begin(rq, 4); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + offset = i915_ggtt_offset(ce->state) + + LRC_STATE_PN * PAGE_SIZE + + (CTX_R_PWR_CLK_STATE + 1) * 4; + + *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; + *cs++ = lower_32_bits(offset); + *cs++ = upper_32_bits(offset); + *cs++ = intel_sseu_make_rpcs(rq->i915, &sseu); + + intel_ring_advance(rq, cs); + + return 0; +} + +static int +gen8_modify_rpcs(struct intel_context *ce, struct intel_sseu sseu) +{ + struct i915_request *rq; + int ret; + + lockdep_assert_held(&ce->pin_mutex); + + /* + * If the context is not idle, we have to submit an ordered request to + * modify its context image via the kernel context (writing to our own + * image, or into the registers directory, does not stick). Pristine + * and idle contexts will be configured on pinning. + */ + if (!intel_context_is_pinned(ce)) + return 0; + + rq = i915_request_create(ce->engine->kernel_context); + if (IS_ERR(rq)) + return PTR_ERR(rq); + + /* Queue this switch after all other activity by this context. */ + ret = i915_active_request_set(&ce->ring->timeline->last_request, rq); + if (ret) + goto out_add; + + ret = gen8_emit_rpcs_config(rq, ce, sseu); + if (ret) + goto out_add; + + /* + * Guarantee context image and the timeline remains pinned until the + * modifying request is retired by setting the ce activity tracker. + * + * But we only need to take one pin on the account of it. Or in other + * words transfer the pinned ce object to tracked active request. + */ + if (!i915_active_request_isset(&ce->active_tracker)) + __intel_context_pin(ce); + __i915_active_request_set(&ce->active_tracker, rq); + +out_add: + i915_request_add(rq); + return ret; +} + +static int +__intel_context_reconfigure_sseu(struct intel_context *ce, + struct intel_sseu sseu) +{ + int ret; + + GEM_BUG_ON(INTEL_GEN(ce->gem_context->i915) < 8); + + ret = intel_context_lock_pinned(ce); + if (ret) + return ret; + + /* Nothing to do if unmodified. */ + if (!memcmp(&ce->sseu, &sseu, sizeof(sseu))) + goto unlock; + + ret = gen8_modify_rpcs(ce, sseu); + if (!ret) + ce->sseu = sseu; + +unlock: + intel_context_unlock_pinned(ce); + return ret; +} + +static int +intel_context_reconfigure_sseu(struct intel_context *ce, struct intel_sseu sseu) +{ + struct drm_i915_private *i915 = ce->gem_context->i915; + int ret; + + ret = mutex_lock_interruptible(&i915->drm.struct_mutex); + if (ret) + return ret; + + ret = __intel_context_reconfigure_sseu(ce, sseu); + + mutex_unlock(&i915->drm.struct_mutex); + + return ret; +} + +static int +user_to_context_sseu(struct drm_i915_private *i915, + const struct drm_i915_gem_context_param_sseu *user, + struct intel_sseu *context) +{ + const struct sseu_dev_info *device = &RUNTIME_INFO(i915)->sseu; + + /* No zeros in any field. */ + if (!user->slice_mask || !user->subslice_mask || + !user->min_eus_per_subslice || !user->max_eus_per_subslice) + return -EINVAL; + + /* Max > min. */ + if (user->max_eus_per_subslice < user->min_eus_per_subslice) + return -EINVAL; + + /* + * Some future proofing on the types since the uAPI is wider than the + * current internal implementation. + */ + if (overflows_type(user->slice_mask, context->slice_mask) || + overflows_type(user->subslice_mask, context->subslice_mask) || + overflows_type(user->min_eus_per_subslice, + context->min_eus_per_subslice) || + overflows_type(user->max_eus_per_subslice, + context->max_eus_per_subslice)) + return -EINVAL; + + /* Check validity against hardware. */ + if (user->slice_mask & ~device->slice_mask) + return -EINVAL; + + if (user->subslice_mask & ~device->subslice_mask[0]) + return -EINVAL; + + if (user->max_eus_per_subslice > device->max_eus_per_subslice) + return -EINVAL; + + context->slice_mask = user->slice_mask; + context->subslice_mask = user->subslice_mask; + context->min_eus_per_subslice = user->min_eus_per_subslice; + context->max_eus_per_subslice = user->max_eus_per_subslice; + + /* Part specific restrictions. */ + if (IS_GEN(i915, 11)) { + unsigned int hw_s = hweight8(device->slice_mask); + unsigned int hw_ss_per_s = hweight8(device->subslice_mask[0]); + unsigned int req_s = hweight8(context->slice_mask); + unsigned int req_ss = hweight8(context->subslice_mask); + + /* + * Only full subslice enablement is possible if more than one + * slice is turned on. + */ + if (req_s > 1 && req_ss != hw_ss_per_s) + return -EINVAL; + + /* + * If more than four (SScount bitfield limit) subslices are + * requested then the number has to be even. + */ + if (req_ss > 4 && (req_ss & 1)) + return -EINVAL; + + /* + * If only one slice is enabled and subslice count is below the + * device full enablement, it must be at most half of the all + * available subslices. + */ + if (req_s == 1 && req_ss < hw_ss_per_s && + req_ss > (hw_ss_per_s / 2)) + return -EINVAL; + + /* ABI restriction - VME use case only. */ + + /* All slices or one slice only. */ + if (req_s != 1 && req_s != hw_s) + return -EINVAL; + + /* + * Half subslices or full enablement only when one slice is + * enabled. + */ + if (req_s == 1 && + (req_ss != hw_ss_per_s && req_ss != (hw_ss_per_s / 2))) + return -EINVAL; + + /* No EU configuration changes. */ + if ((user->min_eus_per_subslice != + device->max_eus_per_subslice) || + (user->max_eus_per_subslice != + device->max_eus_per_subslice)) + return -EINVAL; + } + + return 0; +} + +static int set_sseu(struct i915_gem_context *ctx, + struct drm_i915_gem_context_param *args) +{ + struct drm_i915_private *i915 = ctx->i915; + struct drm_i915_gem_context_param_sseu user_sseu; + struct intel_context *ce; + struct intel_sseu sseu; + unsigned long lookup; + int ret; + + if (args->size < sizeof(user_sseu)) + return -EINVAL; + + if (!IS_GEN(i915, 11)) + return -ENODEV; + + if (copy_from_user(&user_sseu, u64_to_user_ptr(args->value), + sizeof(user_sseu))) + return -EFAULT; + + if (user_sseu.rsvd) + return -EINVAL; + + if (user_sseu.flags & ~(I915_CONTEXT_SSEU_FLAG_ENGINE_INDEX)) + return -EINVAL; + + lookup = 0; + if (user_sseu.flags & I915_CONTEXT_SSEU_FLAG_ENGINE_INDEX) + lookup |= LOOKUP_USER_INDEX; + + ce = lookup_user_engine(ctx, lookup, &user_sseu.engine); + if (IS_ERR(ce)) + return PTR_ERR(ce); + + /* Only render engine supports RPCS configuration. */ + if (ce->engine->class != RENDER_CLASS) { + ret = -ENODEV; + goto out_ce; + } + + ret = user_to_context_sseu(i915, &user_sseu, &sseu); + if (ret) + goto out_ce; + + ret = intel_context_reconfigure_sseu(ce, sseu); + if (ret) + goto out_ce; + + args->size = sizeof(user_sseu); + +out_ce: + intel_context_put(ce); + return ret; +} + +struct set_engines { + struct i915_gem_context *ctx; + struct i915_gem_engines *engines; +}; + +static int +set_engines__load_balance(struct i915_user_extension __user *base, void *data) +{ + struct i915_context_engines_load_balance __user *ext = + container_of_user(base, typeof(*ext), base); + const struct set_engines *set = data; + struct intel_engine_cs *stack[16]; + struct intel_engine_cs **siblings; + struct intel_context *ce; + u16 num_siblings, idx; + unsigned int n; + int err; + + if (!HAS_EXECLISTS(set->ctx->i915)) + return -ENODEV; + + if (USES_GUC_SUBMISSION(set->ctx->i915)) + return -ENODEV; /* not implement yet */ + + if (get_user(idx, &ext->engine_index)) + return -EFAULT; + + if (idx >= set->engines->num_engines) { + DRM_DEBUG("Invalid placement value, %d >= %d\n", + idx, set->engines->num_engines); + return -EINVAL; + } + + idx = array_index_nospec(idx, set->engines->num_engines); + if (set->engines->engines[idx]) { + DRM_DEBUG("Invalid placement[%d], already occupied\n", idx); + return -EEXIST; + } + + if (get_user(num_siblings, &ext->num_siblings)) + return -EFAULT; + + err = check_user_mbz(&ext->flags); + if (err) + return err; + + err = check_user_mbz(&ext->mbz64); + if (err) + return err; + + siblings = stack; + if (num_siblings > ARRAY_SIZE(stack)) { + siblings = kmalloc_array(num_siblings, + sizeof(*siblings), + GFP_KERNEL); + if (!siblings) + return -ENOMEM; + } + + for (n = 0; n < num_siblings; n++) { + struct i915_engine_class_instance ci; + + if (copy_from_user(&ci, &ext->engines[n], sizeof(ci))) { + err = -EFAULT; + goto out_siblings; + } + + siblings[n] = intel_engine_lookup_user(set->ctx->i915, + ci.engine_class, + ci.engine_instance); + if (!siblings[n]) { + DRM_DEBUG("Invalid sibling[%d]: { class:%d, inst:%d }\n", + n, ci.engine_class, ci.engine_instance); + err = -EINVAL; + goto out_siblings; + } + } + + ce = intel_execlists_create_virtual(set->ctx, siblings, n); + if (IS_ERR(ce)) { + err = PTR_ERR(ce); + goto out_siblings; + } + + if (cmpxchg(&set->engines->engines[idx], NULL, ce)) { + intel_context_put(ce); + err = -EEXIST; + goto out_siblings; + } + +out_siblings: + if (siblings != stack) + kfree(siblings); + + return err; +} + +static int +set_engines__bond(struct i915_user_extension __user *base, void *data) +{ + struct i915_context_engines_bond __user *ext = + container_of_user(base, typeof(*ext), base); + const struct set_engines *set = data; + struct i915_engine_class_instance ci; + struct intel_engine_cs *virtual; + struct intel_engine_cs *master; + u16 idx, num_bonds; + int err, n; + + if (get_user(idx, &ext->virtual_index)) + return -EFAULT; + + if (idx >= set->engines->num_engines) { + DRM_DEBUG("Invalid index for virtual engine: %d >= %d\n", + idx, set->engines->num_engines); + return -EINVAL; + } + + idx = array_index_nospec(idx, set->engines->num_engines); + if (!set->engines->engines[idx]) { + DRM_DEBUG("Invalid engine at %d\n", idx); + return -EINVAL; + } + virtual = set->engines->engines[idx]->engine; + + err = check_user_mbz(&ext->flags); + if (err) + return err; + + for (n = 0; n < ARRAY_SIZE(ext->mbz64); n++) { + err = check_user_mbz(&ext->mbz64[n]); + if (err) + return err; + } + + if (copy_from_user(&ci, &ext->master, sizeof(ci))) + return -EFAULT; + + master = intel_engine_lookup_user(set->ctx->i915, + ci.engine_class, ci.engine_instance); + if (!master) { + DRM_DEBUG("Unrecognised master engine: { class:%u, instance:%u }\n", + ci.engine_class, ci.engine_instance); + return -EINVAL; + } + + if (get_user(num_bonds, &ext->num_bonds)) + return -EFAULT; + + for (n = 0; n < num_bonds; n++) { + struct intel_engine_cs *bond; + + if (copy_from_user(&ci, &ext->engines[n], sizeof(ci))) + return -EFAULT; + + bond = intel_engine_lookup_user(set->ctx->i915, + ci.engine_class, + ci.engine_instance); + if (!bond) { + DRM_DEBUG("Unrecognised engine[%d] for bonding: { class:%d, instance: %d }\n", + n, ci.engine_class, ci.engine_instance); + return -EINVAL; + } + + /* + * A non-virtual engine has no siblings to choose between; and + * a submit fence will always be directed to the one engine. + */ + if (intel_engine_is_virtual(virtual)) { + err = intel_virtual_engine_attach_bond(virtual, + master, + bond); + if (err) + return err; + } + } + + return 0; +} + +static const i915_user_extension_fn set_engines__extensions[] = { + [I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE] = set_engines__load_balance, + [I915_CONTEXT_ENGINES_EXT_BOND] = set_engines__bond, +}; + +static int +set_engines(struct i915_gem_context *ctx, + const struct drm_i915_gem_context_param *args) +{ + struct i915_context_param_engines __user *user = + u64_to_user_ptr(args->value); + struct set_engines set = { .ctx = ctx }; + unsigned int num_engines, n; + u64 extensions; + int err; + + if (!args->size) { /* switch back to legacy user_ring_map */ + if (!i915_gem_context_user_engines(ctx)) + return 0; + + set.engines = default_engines(ctx); + if (IS_ERR(set.engines)) + return PTR_ERR(set.engines); + + goto replace; + } + + BUILD_BUG_ON(!IS_ALIGNED(sizeof(*user), sizeof(*user->engines))); + if (args->size < sizeof(*user) || + !IS_ALIGNED(args->size, sizeof(*user->engines))) { + DRM_DEBUG("Invalid size for engine array: %d\n", + args->size); + return -EINVAL; + } + + /* + * Note that I915_EXEC_RING_MASK limits execbuf to only using the + * first 64 engines defined here. + */ + num_engines = (args->size - sizeof(*user)) / sizeof(*user->engines); + + set.engines = kmalloc(struct_size(set.engines, engines, num_engines), + GFP_KERNEL); + if (!set.engines) + return -ENOMEM; + + set.engines->i915 = ctx->i915; + for (n = 0; n < num_engines; n++) { + struct i915_engine_class_instance ci; + struct intel_engine_cs *engine; + + if (copy_from_user(&ci, &user->engines[n], sizeof(ci))) { + __free_engines(set.engines, n); + return -EFAULT; + } + + if (ci.engine_class == (u16)I915_ENGINE_CLASS_INVALID && + ci.engine_instance == (u16)I915_ENGINE_CLASS_INVALID_NONE) { + set.engines->engines[n] = NULL; + continue; + } + + engine = intel_engine_lookup_user(ctx->i915, + ci.engine_class, + ci.engine_instance); + if (!engine) { + DRM_DEBUG("Invalid engine[%d]: { class:%d, instance:%d }\n", + n, ci.engine_class, ci.engine_instance); + __free_engines(set.engines, n); + return -ENOENT; + } + + set.engines->engines[n] = intel_context_create(ctx, engine); + if (!set.engines->engines[n]) { + __free_engines(set.engines, n); + return -ENOMEM; + } + } + set.engines->num_engines = num_engines; + + err = -EFAULT; + if (!get_user(extensions, &user->extensions)) + err = i915_user_extensions(u64_to_user_ptr(extensions), + set_engines__extensions, + ARRAY_SIZE(set_engines__extensions), + &set); + if (err) { + free_engines(set.engines); + return err; + } + +replace: + mutex_lock(&ctx->engines_mutex); + if (args->size) + i915_gem_context_set_user_engines(ctx); + else + i915_gem_context_clear_user_engines(ctx); + rcu_swap_protected(ctx->engines, set.engines, 1); + mutex_unlock(&ctx->engines_mutex); + + INIT_RCU_WORK(&set.engines->rcu, free_engines_rcu); + queue_rcu_work(system_wq, &set.engines->rcu); + + return 0; +} + +static struct i915_gem_engines * +__copy_engines(struct i915_gem_engines *e) +{ + struct i915_gem_engines *copy; + unsigned int n; + + copy = kmalloc(struct_size(e, engines, e->num_engines), GFP_KERNEL); + if (!copy) + return ERR_PTR(-ENOMEM); + + copy->i915 = e->i915; + for (n = 0; n < e->num_engines; n++) { + if (e->engines[n]) + copy->engines[n] = intel_context_get(e->engines[n]); + else + copy->engines[n] = NULL; + } + copy->num_engines = n; + + return copy; +} + +static int +get_engines(struct i915_gem_context *ctx, + struct drm_i915_gem_context_param *args) +{ + struct i915_context_param_engines __user *user; + struct i915_gem_engines *e; + size_t n, count, size; + int err = 0; + + err = mutex_lock_interruptible(&ctx->engines_mutex); + if (err) + return err; + + e = NULL; + if (i915_gem_context_user_engines(ctx)) + e = __copy_engines(i915_gem_context_engines(ctx)); + mutex_unlock(&ctx->engines_mutex); + if (IS_ERR_OR_NULL(e)) { + args->size = 0; + return PTR_ERR_OR_ZERO(e); + } + + count = e->num_engines; + + /* Be paranoid in case we have an impedance mismatch */ + if (!check_struct_size(user, engines, count, &size)) { + err = -EINVAL; + goto err_free; + } + if (overflows_type(size, args->size)) { + err = -EINVAL; + goto err_free; + } + + if (!args->size) { + args->size = size; + goto err_free; + } + + if (args->size < size) { + err = -EINVAL; + goto err_free; + } + + user = u64_to_user_ptr(args->value); + if (!access_ok(user, size)) { + err = -EFAULT; + goto err_free; + } + + if (put_user(0, &user->extensions)) { + err = -EFAULT; + goto err_free; + } + + for (n = 0; n < count; n++) { + struct i915_engine_class_instance ci = { + .engine_class = I915_ENGINE_CLASS_INVALID, + .engine_instance = I915_ENGINE_CLASS_INVALID_NONE, + }; + + if (e->engines[n]) { + ci.engine_class = e->engines[n]->engine->uabi_class; + ci.engine_instance = e->engines[n]->engine->instance; + } + + if (copy_to_user(&user->engines[n], &ci, sizeof(ci))) { + err = -EFAULT; + goto err_free; + } + } + + args->size = size; + +err_free: + INIT_RCU_WORK(&e->rcu, free_engines_rcu); + queue_rcu_work(system_wq, &e->rcu); + return err; +} + +static int ctx_setparam(struct drm_i915_file_private *fpriv, + struct i915_gem_context *ctx, + struct drm_i915_gem_context_param *args) +{ + int ret = 0; + + switch (args->param) { + case I915_CONTEXT_PARAM_NO_ZEROMAP: + if (args->size) + ret = -EINVAL; + else if (args->value) + set_bit(UCONTEXT_NO_ZEROMAP, &ctx->user_flags); + else + clear_bit(UCONTEXT_NO_ZEROMAP, &ctx->user_flags); + break; + + case I915_CONTEXT_PARAM_NO_ERROR_CAPTURE: + if (args->size) + ret = -EINVAL; + else if (args->value) + i915_gem_context_set_no_error_capture(ctx); + else + i915_gem_context_clear_no_error_capture(ctx); + break; + + case I915_CONTEXT_PARAM_BANNABLE: + if (args->size) + ret = -EINVAL; + else if (!capable(CAP_SYS_ADMIN) && !args->value) + ret = -EPERM; + else if (args->value) + i915_gem_context_set_bannable(ctx); + else + i915_gem_context_clear_bannable(ctx); + break; + + case I915_CONTEXT_PARAM_RECOVERABLE: + if (args->size) + ret = -EINVAL; + else if (args->value) + i915_gem_context_set_recoverable(ctx); + else + i915_gem_context_clear_recoverable(ctx); + break; + + case I915_CONTEXT_PARAM_PRIORITY: + { + s64 priority = args->value; + + if (args->size) + ret = -EINVAL; + else if (!(ctx->i915->caps.scheduler & I915_SCHEDULER_CAP_PRIORITY)) + ret = -ENODEV; + else if (priority > I915_CONTEXT_MAX_USER_PRIORITY || + priority < I915_CONTEXT_MIN_USER_PRIORITY) + ret = -EINVAL; + else if (priority > I915_CONTEXT_DEFAULT_PRIORITY && + !capable(CAP_SYS_NICE)) + ret = -EPERM; + else + ctx->sched.priority = + I915_USER_PRIORITY(priority); + } + break; + + case I915_CONTEXT_PARAM_SSEU: + ret = set_sseu(ctx, args); + break; + + case I915_CONTEXT_PARAM_VM: + ret = set_ppgtt(fpriv, ctx, args); + break; + + case I915_CONTEXT_PARAM_ENGINES: + ret = set_engines(ctx, args); + break; + + case I915_CONTEXT_PARAM_BAN_PERIOD: + default: + ret = -EINVAL; + break; + } + + return ret; +} + +struct create_ext { + struct i915_gem_context *ctx; + struct drm_i915_file_private *fpriv; +}; + +static int create_setparam(struct i915_user_extension __user *ext, void *data) +{ + struct drm_i915_gem_context_create_ext_setparam local; + const struct create_ext *arg = data; + + if (copy_from_user(&local, ext, sizeof(local))) + return -EFAULT; + + if (local.param.ctx_id) + return -EINVAL; + + return ctx_setparam(arg->fpriv, arg->ctx, &local.param); +} + +static int clone_engines(struct i915_gem_context *dst, + struct i915_gem_context *src) +{ + struct i915_gem_engines *e = i915_gem_context_lock_engines(src); + struct i915_gem_engines *clone; + bool user_engines; + unsigned long n; + + clone = kmalloc(struct_size(e, engines, e->num_engines), GFP_KERNEL); + if (!clone) + goto err_unlock; + + clone->i915 = dst->i915; + for (n = 0; n < e->num_engines; n++) { + struct intel_engine_cs *engine; + + if (!e->engines[n]) { + clone->engines[n] = NULL; + continue; + } + engine = e->engines[n]->engine; + + /* + * Virtual engines are singletons; they can only exist + * inside a single context, because they embed their + * HW context... As each virtual context implies a single + * timeline (each engine can only dequeue a single request + * at any time), it would be surprising for two contexts + * to use the same engine. So let's create a copy of + * the virtual engine instead. + */ + if (intel_engine_is_virtual(engine)) + clone->engines[n] = + intel_execlists_clone_virtual(dst, engine); + else + clone->engines[n] = intel_context_create(dst, engine); + if (IS_ERR_OR_NULL(clone->engines[n])) { + __free_engines(clone, n); + goto err_unlock; + } + } + clone->num_engines = n; + + user_engines = i915_gem_context_user_engines(src); + i915_gem_context_unlock_engines(src); + + free_engines(dst->engines); + RCU_INIT_POINTER(dst->engines, clone); + if (user_engines) + i915_gem_context_set_user_engines(dst); + else + i915_gem_context_clear_user_engines(dst); + return 0; + +err_unlock: + i915_gem_context_unlock_engines(src); + return -ENOMEM; +} + +static int clone_flags(struct i915_gem_context *dst, + struct i915_gem_context *src) +{ + dst->user_flags = src->user_flags; + return 0; +} + +static int clone_schedattr(struct i915_gem_context *dst, + struct i915_gem_context *src) +{ + dst->sched = src->sched; + return 0; +} + +static int clone_sseu(struct i915_gem_context *dst, + struct i915_gem_context *src) +{ + struct i915_gem_engines *e = i915_gem_context_lock_engines(src); + struct i915_gem_engines *clone; + unsigned long n; + int err; + + clone = dst->engines; /* no locking required; sole access */ + if (e->num_engines != clone->num_engines) { + err = -EINVAL; + goto unlock; + } + + for (n = 0; n < e->num_engines; n++) { + struct intel_context *ce = e->engines[n]; + + if (clone->engines[n]->engine->class != ce->engine->class) { + /* Must have compatible engine maps! */ + err = -EINVAL; + goto unlock; + } + + /* serialises with set_sseu */ + err = intel_context_lock_pinned(ce); + if (err) + goto unlock; + + clone->engines[n]->sseu = ce->sseu; + intel_context_unlock_pinned(ce); + } + + err = 0; +unlock: + i915_gem_context_unlock_engines(src); + return err; +} + +static int clone_timeline(struct i915_gem_context *dst, + struct i915_gem_context *src) +{ + if (src->timeline) { + GEM_BUG_ON(src->timeline == dst->timeline); + + if (dst->timeline) + i915_timeline_put(dst->timeline); + dst->timeline = i915_timeline_get(src->timeline); + } + + return 0; +} + +static int clone_vm(struct i915_gem_context *dst, + struct i915_gem_context *src) +{ + struct i915_hw_ppgtt *ppgtt; + + rcu_read_lock(); + do { + ppgtt = READ_ONCE(src->ppgtt); + if (!ppgtt) + break; + + if (!kref_get_unless_zero(&ppgtt->ref)) + continue; + + /* + * This ppgtt may have be reallocated between + * the read and the kref, and reassigned to a third + * context. In order to avoid inadvertent sharing + * of this ppgtt with that third context (and not + * src), we have to confirm that we have the same + * ppgtt after passing through the strong memory + * barrier implied by a successful + * kref_get_unless_zero(). + * + * Once we have acquired the current ppgtt of src, + * we no longer care if it is released from src, as + * it cannot be reallocated elsewhere. + */ + + if (ppgtt == READ_ONCE(src->ppgtt)) + break; + + i915_ppgtt_put(ppgtt); + } while (1); + rcu_read_unlock(); + + if (ppgtt) { + __assign_ppgtt(dst, ppgtt); + i915_ppgtt_put(ppgtt); + } + + return 0; +} + +static int create_clone(struct i915_user_extension __user *ext, void *data) +{ + static int (* const fn[])(struct i915_gem_context *dst, + struct i915_gem_context *src) = { +#define MAP(x, y) [ilog2(I915_CONTEXT_CLONE_##x)] = y + MAP(ENGINES, clone_engines), + MAP(FLAGS, clone_flags), + MAP(SCHEDATTR, clone_schedattr), + MAP(SSEU, clone_sseu), + MAP(TIMELINE, clone_timeline), + MAP(VM, clone_vm), +#undef MAP + }; + struct drm_i915_gem_context_create_ext_clone local; + const struct create_ext *arg = data; + struct i915_gem_context *dst = arg->ctx; + struct i915_gem_context *src; + int err, bit; + + if (copy_from_user(&local, ext, sizeof(local))) + return -EFAULT; + + BUILD_BUG_ON(GENMASK(BITS_PER_TYPE(local.flags) - 1, ARRAY_SIZE(fn)) != + I915_CONTEXT_CLONE_UNKNOWN); + + if (local.flags & I915_CONTEXT_CLONE_UNKNOWN) + return -EINVAL; + + if (local.rsvd) + return -EINVAL; + + rcu_read_lock(); + src = __i915_gem_context_lookup_rcu(arg->fpriv, local.clone_id); + rcu_read_unlock(); + if (!src) + return -ENOENT; + + GEM_BUG_ON(src == dst); + + for (bit = 0; bit < ARRAY_SIZE(fn); bit++) { + if (!(local.flags & BIT(bit))) + continue; + + err = fn[bit](dst, src); + if (err) + return err; + } + + return 0; +} + +static const i915_user_extension_fn create_extensions[] = { + [I915_CONTEXT_CREATE_EXT_SETPARAM] = create_setparam, + [I915_CONTEXT_CREATE_EXT_CLONE] = create_clone, +}; + +static bool client_is_banned(struct drm_i915_file_private *file_priv) +{ + return atomic_read(&file_priv->ban_score) >= I915_CLIENT_SCORE_BANNED; +} + +int i915_gem_context_create_ioctl(struct drm_device *dev, void *data, + struct drm_file *file) +{ + struct drm_i915_private *i915 = to_i915(dev); + struct drm_i915_gem_context_create_ext *args = data; + struct create_ext ext_data; + int ret; + + if (!DRIVER_CAPS(i915)->has_logical_contexts) + return -ENODEV; + + if (args->flags & I915_CONTEXT_CREATE_FLAGS_UNKNOWN) + return -EINVAL; + + ret = i915_terminally_wedged(i915); + if (ret) + return ret; + + ext_data.fpriv = file->driver_priv; + if (client_is_banned(ext_data.fpriv)) { + DRM_DEBUG("client %s[%d] banned from creating ctx\n", + current->comm, + pid_nr(get_task_pid(current, PIDTYPE_PID))); + return -EIO; + } + + ret = i915_mutex_lock_interruptible(dev); + if (ret) + return ret; + + ext_data.ctx = i915_gem_create_context(i915, args->flags); + mutex_unlock(&dev->struct_mutex); + if (IS_ERR(ext_data.ctx)) + return PTR_ERR(ext_data.ctx); + + if (args->flags & I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS) { + ret = i915_user_extensions(u64_to_user_ptr(args->extensions), + create_extensions, + ARRAY_SIZE(create_extensions), + &ext_data); + if (ret) + goto err_ctx; + } + + ret = gem_context_register(ext_data.ctx, ext_data.fpriv); + if (ret < 0) + goto err_ctx; + + args->ctx_id = ret; + DRM_DEBUG("HW context %d created\n", args->ctx_id); + + return 0; + +err_ctx: + mutex_lock(&dev->struct_mutex); + context_close(ext_data.ctx); + mutex_unlock(&dev->struct_mutex); + return ret; +} + +int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data, + struct drm_file *file) +{ + struct drm_i915_gem_context_destroy *args = data; + struct drm_i915_file_private *file_priv = file->driver_priv; + struct i915_gem_context *ctx; + + if (args->pad != 0) + return -EINVAL; + + if (!args->ctx_id) + return -ENOENT; + + if (mutex_lock_interruptible(&file_priv->context_idr_lock)) + return -EINTR; + + ctx = idr_remove(&file_priv->context_idr, args->ctx_id); + mutex_unlock(&file_priv->context_idr_lock); + if (!ctx) + return -ENOENT; + + mutex_lock(&dev->struct_mutex); + context_close(ctx); + mutex_unlock(&dev->struct_mutex); + + return 0; +} + +static int get_sseu(struct i915_gem_context *ctx, + struct drm_i915_gem_context_param *args) +{ + struct drm_i915_gem_context_param_sseu user_sseu; + struct intel_context *ce; + unsigned long lookup; + int err; + + if (args->size == 0) + goto out; + else if (args->size < sizeof(user_sseu)) + return -EINVAL; + + if (copy_from_user(&user_sseu, u64_to_user_ptr(args->value), + sizeof(user_sseu))) + return -EFAULT; + + if (user_sseu.rsvd) + return -EINVAL; + + if (user_sseu.flags & ~(I915_CONTEXT_SSEU_FLAG_ENGINE_INDEX)) + return -EINVAL; + + lookup = 0; + if (user_sseu.flags & I915_CONTEXT_SSEU_FLAG_ENGINE_INDEX) + lookup |= LOOKUP_USER_INDEX; + + ce = lookup_user_engine(ctx, lookup, &user_sseu.engine); + if (IS_ERR(ce)) + return PTR_ERR(ce); + + err = intel_context_lock_pinned(ce); /* serialises with set_sseu */ + if (err) { + intel_context_put(ce); + return err; + } + + user_sseu.slice_mask = ce->sseu.slice_mask; + user_sseu.subslice_mask = ce->sseu.subslice_mask; + user_sseu.min_eus_per_subslice = ce->sseu.min_eus_per_subslice; + user_sseu.max_eus_per_subslice = ce->sseu.max_eus_per_subslice; + + intel_context_unlock_pinned(ce); + intel_context_put(ce); + + if (copy_to_user(u64_to_user_ptr(args->value), &user_sseu, + sizeof(user_sseu))) + return -EFAULT; + +out: + args->size = sizeof(user_sseu); + + return 0; +} + +int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data, + struct drm_file *file) +{ + struct drm_i915_file_private *file_priv = file->driver_priv; + struct drm_i915_gem_context_param *args = data; + struct i915_gem_context *ctx; + int ret = 0; + + ctx = i915_gem_context_lookup(file_priv, args->ctx_id); + if (!ctx) + return -ENOENT; + + switch (args->param) { + case I915_CONTEXT_PARAM_NO_ZEROMAP: + args->size = 0; + args->value = test_bit(UCONTEXT_NO_ZEROMAP, &ctx->user_flags); + break; + + case I915_CONTEXT_PARAM_GTT_SIZE: + args->size = 0; + if (ctx->ppgtt) + args->value = ctx->ppgtt->vm.total; + else if (to_i915(dev)->mm.aliasing_ppgtt) + args->value = to_i915(dev)->mm.aliasing_ppgtt->vm.total; + else + args->value = to_i915(dev)->ggtt.vm.total; + break; + + case I915_CONTEXT_PARAM_NO_ERROR_CAPTURE: + args->size = 0; + args->value = i915_gem_context_no_error_capture(ctx); + break; + + case I915_CONTEXT_PARAM_BANNABLE: + args->size = 0; + args->value = i915_gem_context_is_bannable(ctx); + break; + + case I915_CONTEXT_PARAM_RECOVERABLE: + args->size = 0; + args->value = i915_gem_context_is_recoverable(ctx); + break; + + case I915_CONTEXT_PARAM_PRIORITY: + args->size = 0; + args->value = ctx->sched.priority >> I915_USER_PRIORITY_SHIFT; + break; + + case I915_CONTEXT_PARAM_SSEU: + ret = get_sseu(ctx, args); + break; + + case I915_CONTEXT_PARAM_VM: + ret = get_ppgtt(file_priv, ctx, args); + break; + + case I915_CONTEXT_PARAM_ENGINES: + ret = get_engines(ctx, args); + break; + + case I915_CONTEXT_PARAM_BAN_PERIOD: + default: + ret = -EINVAL; + break; + } + + i915_gem_context_put(ctx); + return ret; +} + +int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data, + struct drm_file *file) +{ + struct drm_i915_file_private *file_priv = file->driver_priv; + struct drm_i915_gem_context_param *args = data; + struct i915_gem_context *ctx; + int ret; + + ctx = i915_gem_context_lookup(file_priv, args->ctx_id); + if (!ctx) + return -ENOENT; + + ret = ctx_setparam(file_priv, ctx, args); + + i915_gem_context_put(ctx); + return ret; +} + +int i915_gem_context_reset_stats_ioctl(struct drm_device *dev, + void *data, struct drm_file *file) +{ + struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_reset_stats *args = data; + struct i915_gem_context *ctx; + int ret; + + if (args->flags || args->pad) + return -EINVAL; + + ret = -ENOENT; + rcu_read_lock(); + ctx = __i915_gem_context_lookup_rcu(file->driver_priv, args->ctx_id); + if (!ctx) + goto out; + + /* + * We opt for unserialised reads here. This may result in tearing + * in the extremely unlikely event of a GPU hang on this context + * as we are querying them. If we need that extra layer of protection, + * we should wrap the hangstats with a seqlock. + */ + + if (capable(CAP_SYS_ADMIN)) + args->reset_count = i915_reset_count(&dev_priv->gpu_error); + else + args->reset_count = 0; + + args->batch_active = atomic_read(&ctx->guilty_count); + args->batch_pending = atomic_read(&ctx->active_count); + + ret = 0; +out: + rcu_read_unlock(); + return ret; +} + +int __i915_gem_context_pin_hw_id(struct i915_gem_context *ctx) +{ + struct drm_i915_private *i915 = ctx->i915; + int err = 0; + + mutex_lock(&i915->contexts.mutex); + + GEM_BUG_ON(i915_gem_context_is_closed(ctx)); + + if (list_empty(&ctx->hw_id_link)) { + GEM_BUG_ON(atomic_read(&ctx->hw_id_pin_count)); + + err = assign_hw_id(i915, &ctx->hw_id); + if (err) + goto out_unlock; + + list_add_tail(&ctx->hw_id_link, &i915->contexts.hw_id_list); + } + + GEM_BUG_ON(atomic_read(&ctx->hw_id_pin_count) == ~0u); + atomic_inc(&ctx->hw_id_pin_count); + +out_unlock: + mutex_unlock(&i915->contexts.mutex); + return err; +} + +/* GEM context-engines iterator: for_each_gem_engine() */ +struct intel_context * +i915_gem_engines_iter_next(struct i915_gem_engines_iter *it) +{ + const struct i915_gem_engines *e = it->engines; + struct intel_context *ctx; + + do { + if (it->idx >= e->num_engines) + return NULL; + + ctx = e->engines[it->idx++]; + } while (!ctx); + + return ctx; +} + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftests/mock_context.c" +#include "selftests/i915_gem_context.c" +#endif + +static void i915_global_gem_context_shrink(void) +{ + kmem_cache_shrink(global.slab_luts); +} + +static void i915_global_gem_context_exit(void) +{ + kmem_cache_destroy(global.slab_luts); +} + +static struct i915_global_gem_context global = { { + .shrink = i915_global_gem_context_shrink, + .exit = i915_global_gem_context_exit, +} }; + +int __init i915_global_gem_context_init(void) +{ + global.slab_luts = KMEM_CACHE(i915_lut_handle, 0); + if (!global.slab_luts) + return -ENOMEM; + + i915_global_register(&global.base); + return 0; +} diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.h b/drivers/gpu/drm/i915/gem/i915_gem_context.h new file mode 100644 index 000000000000..630392c77e48 --- /dev/null +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.h @@ -0,0 +1,240 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2016 Intel Corporation + */ + +#ifndef __I915_GEM_CONTEXT_H__ +#define __I915_GEM_CONTEXT_H__ + +#include "i915_gem_context_types.h" + +#include "gt/intel_context.h" + +#include "i915_gem.h" +#include "i915_scheduler.h" +#include "intel_device_info.h" + +struct drm_device; +struct drm_file; + +static inline bool i915_gem_context_is_closed(const struct i915_gem_context *ctx) +{ + return test_bit(CONTEXT_CLOSED, &ctx->flags); +} + +static inline void i915_gem_context_set_closed(struct i915_gem_context *ctx) +{ + GEM_BUG_ON(i915_gem_context_is_closed(ctx)); + set_bit(CONTEXT_CLOSED, &ctx->flags); +} + +static inline bool i915_gem_context_no_error_capture(const struct i915_gem_context *ctx) +{ + return test_bit(UCONTEXT_NO_ERROR_CAPTURE, &ctx->user_flags); +} + +static inline void i915_gem_context_set_no_error_capture(struct i915_gem_context *ctx) +{ + set_bit(UCONTEXT_NO_ERROR_CAPTURE, &ctx->user_flags); +} + +static inline void i915_gem_context_clear_no_error_capture(struct i915_gem_context *ctx) +{ + clear_bit(UCONTEXT_NO_ERROR_CAPTURE, &ctx->user_flags); +} + +static inline bool i915_gem_context_is_bannable(const struct i915_gem_context *ctx) +{ + return test_bit(UCONTEXT_BANNABLE, &ctx->user_flags); +} + +static inline void i915_gem_context_set_bannable(struct i915_gem_context *ctx) +{ + set_bit(UCONTEXT_BANNABLE, &ctx->user_flags); +} + +static inline void i915_gem_context_clear_bannable(struct i915_gem_context *ctx) +{ + clear_bit(UCONTEXT_BANNABLE, &ctx->user_flags); +} + +static inline bool i915_gem_context_is_recoverable(const struct i915_gem_context *ctx) +{ + return test_bit(UCONTEXT_RECOVERABLE, &ctx->user_flags); +} + +static inline void i915_gem_context_set_recoverable(struct i915_gem_context *ctx) +{ + set_bit(UCONTEXT_RECOVERABLE, &ctx->user_flags); +} + +static inline void i915_gem_context_clear_recoverable(struct i915_gem_context *ctx) +{ + clear_bit(UCONTEXT_RECOVERABLE, &ctx->user_flags); +} + +static inline bool i915_gem_context_is_banned(const struct i915_gem_context *ctx) +{ + return test_bit(CONTEXT_BANNED, &ctx->flags); +} + +static inline void i915_gem_context_set_banned(struct i915_gem_context *ctx) +{ + set_bit(CONTEXT_BANNED, &ctx->flags); +} + +static inline bool i915_gem_context_force_single_submission(const struct i915_gem_context *ctx) +{ + return test_bit(CONTEXT_FORCE_SINGLE_SUBMISSION, &ctx->flags); +} + +static inline void i915_gem_context_set_force_single_submission(struct i915_gem_context *ctx) +{ + __set_bit(CONTEXT_FORCE_SINGLE_SUBMISSION, &ctx->flags); +} + +static inline bool +i915_gem_context_user_engines(const struct i915_gem_context *ctx) +{ + return test_bit(CONTEXT_USER_ENGINES, &ctx->flags); +} + +static inline void +i915_gem_context_set_user_engines(struct i915_gem_context *ctx) +{ + set_bit(CONTEXT_USER_ENGINES, &ctx->flags); +} + +static inline void +i915_gem_context_clear_user_engines(struct i915_gem_context *ctx) +{ + clear_bit(CONTEXT_USER_ENGINES, &ctx->flags); +} + +int __i915_gem_context_pin_hw_id(struct i915_gem_context *ctx); +static inline int i915_gem_context_pin_hw_id(struct i915_gem_context *ctx) +{ + if (atomic_inc_not_zero(&ctx->hw_id_pin_count)) + return 0; + + return __i915_gem_context_pin_hw_id(ctx); +} + +static inline void i915_gem_context_unpin_hw_id(struct i915_gem_context *ctx) +{ + GEM_BUG_ON(atomic_read(&ctx->hw_id_pin_count) == 0u); + atomic_dec(&ctx->hw_id_pin_count); +} + +static inline bool i915_gem_context_is_kernel(struct i915_gem_context *ctx) +{ + return !ctx->file_priv; +} + +/* i915_gem_context.c */ +int __must_check i915_gem_contexts_init(struct drm_i915_private *dev_priv); +void i915_gem_contexts_lost(struct drm_i915_private *dev_priv); +void i915_gem_contexts_fini(struct drm_i915_private *dev_priv); + +int i915_gem_context_open(struct drm_i915_private *i915, + struct drm_file *file); +void i915_gem_context_close(struct drm_file *file); + +void i915_gem_context_release(struct kref *ctx_ref); +struct i915_gem_context * +i915_gem_context_create_gvt(struct drm_device *dev); + +int i915_gem_vm_create_ioctl(struct drm_device *dev, void *data, + struct drm_file *file); +int i915_gem_vm_destroy_ioctl(struct drm_device *dev, void *data, + struct drm_file *file); + +int i915_gem_context_create_ioctl(struct drm_device *dev, void *data, + struct drm_file *file); +int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data, + struct drm_file *file); +int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv); +int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv); +int i915_gem_context_reset_stats_ioctl(struct drm_device *dev, void *data, + struct drm_file *file); + +struct i915_gem_context * +i915_gem_context_create_kernel(struct drm_i915_private *i915, int prio); + +static inline struct i915_gem_context * +i915_gem_context_get(struct i915_gem_context *ctx) +{ + kref_get(&ctx->ref); + return ctx; +} + +static inline void i915_gem_context_put(struct i915_gem_context *ctx) +{ + kref_put(&ctx->ref, i915_gem_context_release); +} + +static inline struct i915_gem_engines * +i915_gem_context_engines(struct i915_gem_context *ctx) +{ + return rcu_dereference_protected(ctx->engines, + lockdep_is_held(&ctx->engines_mutex)); +} + +static inline struct i915_gem_engines * +i915_gem_context_lock_engines(struct i915_gem_context *ctx) + __acquires(&ctx->engines_mutex) +{ + mutex_lock(&ctx->engines_mutex); + return i915_gem_context_engines(ctx); +} + +static inline void +i915_gem_context_unlock_engines(struct i915_gem_context *ctx) + __releases(&ctx->engines_mutex) +{ + mutex_unlock(&ctx->engines_mutex); +} + +static inline struct intel_context * +i915_gem_context_lookup_engine(struct i915_gem_context *ctx, unsigned int idx) +{ + return i915_gem_context_engines(ctx)->engines[idx]; +} + +static inline struct intel_context * +i915_gem_context_get_engine(struct i915_gem_context *ctx, unsigned int idx) +{ + struct intel_context *ce = ERR_PTR(-EINVAL); + + rcu_read_lock(); { + struct i915_gem_engines *e = rcu_dereference(ctx->engines); + if (likely(idx < e->num_engines && e->engines[idx])) + ce = intel_context_get(e->engines[idx]); + } rcu_read_unlock(); + + return ce; +} + +static inline void +i915_gem_engines_iter_init(struct i915_gem_engines_iter *it, + struct i915_gem_engines *engines) +{ + GEM_BUG_ON(!engines); + it->engines = engines; + it->idx = 0; +} + +struct intel_context * +i915_gem_engines_iter_next(struct i915_gem_engines_iter *it); + +#define for_each_gem_engine(ce, engines, it) \ + for (i915_gem_engines_iter_init(&(it), (engines)); \ + ((ce) = i915_gem_engines_iter_next(&(it)));) + +struct i915_lut_handle *i915_lut_handle_alloc(void); +void i915_lut_handle_free(struct i915_lut_handle *lut); + +#endif /* !__I915_GEM_CONTEXT_H__ */ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h new file mode 100644 index 000000000000..fb965ded2508 --- /dev/null +++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h @@ -0,0 +1,208 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#ifndef __I915_GEM_CONTEXT_TYPES_H__ +#define __I915_GEM_CONTEXT_TYPES_H__ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "gt/intel_context_types.h" + +#include "i915_scheduler.h" + +struct pid; + +struct drm_i915_private; +struct drm_i915_file_private; +struct i915_hw_ppgtt; +struct i915_timeline; +struct intel_ring; + +struct i915_gem_engines { + struct rcu_work rcu; + struct drm_i915_private *i915; + unsigned int num_engines; + struct intel_context *engines[]; +}; + +struct i915_gem_engines_iter { + unsigned int idx; + const struct i915_gem_engines *engines; +}; + +/** + * struct i915_gem_context - client state + * + * The struct i915_gem_context represents the combined view of the driver and + * logical hardware state for a particular client. + */ +struct i915_gem_context { + /** i915: i915 device backpointer */ + struct drm_i915_private *i915; + + /** file_priv: owning file descriptor */ + struct drm_i915_file_private *file_priv; + + /** + * @engines: User defined engines for this context + * + * Various uAPI offer the ability to lookup up an + * index from this array to select an engine operate on. + * + * Multiple logically distinct instances of the same engine + * may be defined in the array, as well as composite virtual + * engines. + * + * Execbuf uses the I915_EXEC_RING_MASK as an index into this + * array to select which HW context + engine to execute on. For + * the default array, the user_ring_map[] is used to translate + * the legacy uABI onto the approprate index (e.g. both + * I915_EXEC_DEFAULT and I915_EXEC_RENDER select the same + * context, and I915_EXEC_BSD is weird). For a use defined + * array, execbuf uses I915_EXEC_RING_MASK as a plain index. + * + * User defined by I915_CONTEXT_PARAM_ENGINE (when the + * CONTEXT_USER_ENGINES flag is set). + */ + struct i915_gem_engines __rcu *engines; + struct mutex engines_mutex; /* guards writes to engines */ + + struct i915_timeline *timeline; + + /** + * @ppgtt: unique address space (GTT) + * + * In full-ppgtt mode, each context has its own address space ensuring + * complete seperation of one client from all others. + * + * In other modes, this is a NULL pointer with the expectation that + * the caller uses the shared global GTT. + */ + struct i915_hw_ppgtt *ppgtt; + + /** + * @pid: process id of creator + * + * Note that who created the context may not be the principle user, + * as the context may be shared across a local socket. However, + * that should only affect the default context, all contexts created + * explicitly by the client are expected to be isolated. + */ + struct pid *pid; + + /** + * @name: arbitrary name + * + * A name is constructed for the context from the creator's process + * name, pid and user handle in order to uniquely identify the + * context in messages. + */ + const char *name; + + /** link: place with &drm_i915_private.context_list */ + struct list_head link; + struct llist_node free_link; + + /** + * @ref: reference count + * + * A reference to a context is held by both the client who created it + * and on each request submitted to the hardware using the request + * (to ensure the hardware has access to the state until it has + * finished all pending writes). See i915_gem_context_get() and + * i915_gem_context_put() for access. + */ + struct kref ref; + + /** + * @rcu: rcu_head for deferred freeing. + */ + struct rcu_head rcu; + + /** + * @user_flags: small set of booleans controlled by the user + */ + unsigned long user_flags; +#define UCONTEXT_NO_ZEROMAP 0 +#define UCONTEXT_NO_ERROR_CAPTURE 1 +#define UCONTEXT_BANNABLE 2 +#define UCONTEXT_RECOVERABLE 3 + + /** + * @flags: small set of booleans + */ + unsigned long flags; +#define CONTEXT_BANNED 0 +#define CONTEXT_CLOSED 1 +#define CONTEXT_FORCE_SINGLE_SUBMISSION 2 +#define CONTEXT_USER_ENGINES 3 + + /** + * @hw_id: - unique identifier for the context + * + * The hardware needs to uniquely identify the context for a few + * functions like fault reporting, PASID, scheduling. The + * &drm_i915_private.context_hw_ida is used to assign a unqiue + * id for the lifetime of the context. + * + * @hw_id_pin_count: - number of times this context had been pinned + * for use (should be, at most, once per engine). + * + * @hw_id_link: - all contexts with an assigned id are tracked + * for possible repossession. + */ + unsigned int hw_id; + atomic_t hw_id_pin_count; + struct list_head hw_id_link; + + struct mutex mutex; + + struct i915_sched_attr sched; + + /** ring_size: size for allocating the per-engine ring buffer */ + u32 ring_size; + /** desc_template: invariant fields for the HW context descriptor */ + u32 desc_template; + + /** guilty_count: How many times this context has caused a GPU hang. */ + atomic_t guilty_count; + /** + * @active_count: How many times this context was active during a GPU + * hang, but did not cause it. + */ + atomic_t active_count; + + /** + * @hang_timestamp: The last time(s) this context caused a GPU hang + */ + unsigned long hang_timestamp[2]; +#define CONTEXT_FAST_HANG_JIFFIES (120 * HZ) /* 3 hangs within 120s? Banned! */ + + /** remap_slice: Bitmask of cache lines that need remapping */ + u8 remap_slice; + + /** handles_vma: rbtree to look up our context specific obj/vma for + * the user handle. (user handles are per fd, but the binding is + * per vm, which may be one per context or shared with the global GTT) + */ + struct radix_tree_root handles_vma; + + /** handles_list: reverse list of all the rbtree entries in use for + * this context, which allows us to free all the allocations on + * context close. + */ + struct list_head handles_list; +}; + +#endif /* __I915_GEM_CONTEXT_TYPES_H__ */ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c new file mode 100644 index 000000000000..600fc926f81e --- /dev/null +++ b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c @@ -0,0 +1,318 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright 2012 Red Hat Inc + */ + +#include +#include +#include + +#include "i915_drv.h" +#include "i915_gem_object.h" + +static struct drm_i915_gem_object *dma_buf_to_obj(struct dma_buf *buf) +{ + return to_intel_bo(buf->priv); +} + +static struct sg_table *i915_gem_map_dma_buf(struct dma_buf_attachment *attachment, + enum dma_data_direction dir) +{ + struct drm_i915_gem_object *obj = dma_buf_to_obj(attachment->dmabuf); + struct sg_table *st; + struct scatterlist *src, *dst; + int ret, i; + + ret = i915_gem_object_pin_pages(obj); + if (ret) + goto err; + + /* Copy sg so that we make an independent mapping */ + st = kmalloc(sizeof(struct sg_table), GFP_KERNEL); + if (st == NULL) { + ret = -ENOMEM; + goto err_unpin_pages; + } + + ret = sg_alloc_table(st, obj->mm.pages->nents, GFP_KERNEL); + if (ret) + goto err_free; + + src = obj->mm.pages->sgl; + dst = st->sgl; + for (i = 0; i < obj->mm.pages->nents; i++) { + sg_set_page(dst, sg_page(src), src->length, 0); + dst = sg_next(dst); + src = sg_next(src); + } + + if (!dma_map_sg(attachment->dev, st->sgl, st->nents, dir)) { + ret = -ENOMEM; + goto err_free_sg; + } + + return st; + +err_free_sg: + sg_free_table(st); +err_free: + kfree(st); +err_unpin_pages: + i915_gem_object_unpin_pages(obj); +err: + return ERR_PTR(ret); +} + +static void i915_gem_unmap_dma_buf(struct dma_buf_attachment *attachment, + struct sg_table *sg, + enum dma_data_direction dir) +{ + struct drm_i915_gem_object *obj = dma_buf_to_obj(attachment->dmabuf); + + dma_unmap_sg(attachment->dev, sg->sgl, sg->nents, dir); + sg_free_table(sg); + kfree(sg); + + i915_gem_object_unpin_pages(obj); +} + +static void *i915_gem_dmabuf_vmap(struct dma_buf *dma_buf) +{ + struct drm_i915_gem_object *obj = dma_buf_to_obj(dma_buf); + + return i915_gem_object_pin_map(obj, I915_MAP_WB); +} + +static void i915_gem_dmabuf_vunmap(struct dma_buf *dma_buf, void *vaddr) +{ + struct drm_i915_gem_object *obj = dma_buf_to_obj(dma_buf); + + i915_gem_object_flush_map(obj); + i915_gem_object_unpin_map(obj); +} + +static void *i915_gem_dmabuf_kmap(struct dma_buf *dma_buf, unsigned long page_num) +{ + struct drm_i915_gem_object *obj = dma_buf_to_obj(dma_buf); + struct page *page; + + if (page_num >= obj->base.size >> PAGE_SHIFT) + return NULL; + + if (!i915_gem_object_has_struct_page(obj)) + return NULL; + + if (i915_gem_object_pin_pages(obj)) + return NULL; + + /* Synchronisation is left to the caller (via .begin_cpu_access()) */ + page = i915_gem_object_get_page(obj, page_num); + if (IS_ERR(page)) + goto err_unpin; + + return kmap(page); + +err_unpin: + i915_gem_object_unpin_pages(obj); + return NULL; +} + +static void i915_gem_dmabuf_kunmap(struct dma_buf *dma_buf, unsigned long page_num, void *addr) +{ + struct drm_i915_gem_object *obj = dma_buf_to_obj(dma_buf); + + kunmap(virt_to_page(addr)); + i915_gem_object_unpin_pages(obj); +} + +static int i915_gem_dmabuf_mmap(struct dma_buf *dma_buf, struct vm_area_struct *vma) +{ + struct drm_i915_gem_object *obj = dma_buf_to_obj(dma_buf); + int ret; + + if (obj->base.size < vma->vm_end - vma->vm_start) + return -EINVAL; + + if (!obj->base.filp) + return -ENODEV; + + ret = call_mmap(obj->base.filp, vma); + if (ret) + return ret; + + fput(vma->vm_file); + vma->vm_file = get_file(obj->base.filp); + + return 0; +} + +static int i915_gem_begin_cpu_access(struct dma_buf *dma_buf, enum dma_data_direction direction) +{ + struct drm_i915_gem_object *obj = dma_buf_to_obj(dma_buf); + struct drm_device *dev = obj->base.dev; + bool write = (direction == DMA_BIDIRECTIONAL || direction == DMA_TO_DEVICE); + int err; + + err = i915_gem_object_pin_pages(obj); + if (err) + return err; + + err = i915_mutex_lock_interruptible(dev); + if (err) + goto out; + + err = i915_gem_object_set_to_cpu_domain(obj, write); + mutex_unlock(&dev->struct_mutex); + +out: + i915_gem_object_unpin_pages(obj); + return err; +} + +static int i915_gem_end_cpu_access(struct dma_buf *dma_buf, enum dma_data_direction direction) +{ + struct drm_i915_gem_object *obj = dma_buf_to_obj(dma_buf); + struct drm_device *dev = obj->base.dev; + int err; + + err = i915_gem_object_pin_pages(obj); + if (err) + return err; + + err = i915_mutex_lock_interruptible(dev); + if (err) + goto out; + + err = i915_gem_object_set_to_gtt_domain(obj, false); + mutex_unlock(&dev->struct_mutex); + +out: + i915_gem_object_unpin_pages(obj); + return err; +} + +static const struct dma_buf_ops i915_dmabuf_ops = { + .map_dma_buf = i915_gem_map_dma_buf, + .unmap_dma_buf = i915_gem_unmap_dma_buf, + .release = drm_gem_dmabuf_release, + .map = i915_gem_dmabuf_kmap, + .unmap = i915_gem_dmabuf_kunmap, + .mmap = i915_gem_dmabuf_mmap, + .vmap = i915_gem_dmabuf_vmap, + .vunmap = i915_gem_dmabuf_vunmap, + .begin_cpu_access = i915_gem_begin_cpu_access, + .end_cpu_access = i915_gem_end_cpu_access, +}; + +struct dma_buf *i915_gem_prime_export(struct drm_device *dev, + struct drm_gem_object *gem_obj, int flags) +{ + struct drm_i915_gem_object *obj = to_intel_bo(gem_obj); + DEFINE_DMA_BUF_EXPORT_INFO(exp_info); + + exp_info.ops = &i915_dmabuf_ops; + exp_info.size = gem_obj->size; + exp_info.flags = flags; + exp_info.priv = gem_obj; + exp_info.resv = obj->resv; + + if (obj->ops->dmabuf_export) { + int ret = obj->ops->dmabuf_export(obj); + if (ret) + return ERR_PTR(ret); + } + + return drm_gem_dmabuf_export(dev, &exp_info); +} + +static int i915_gem_object_get_pages_dmabuf(struct drm_i915_gem_object *obj) +{ + struct sg_table *pages; + unsigned int sg_page_sizes; + + pages = dma_buf_map_attachment(obj->base.import_attach, + DMA_BIDIRECTIONAL); + if (IS_ERR(pages)) + return PTR_ERR(pages); + + sg_page_sizes = i915_sg_page_sizes(pages->sgl); + + __i915_gem_object_set_pages(obj, pages, sg_page_sizes); + + return 0; +} + +static void i915_gem_object_put_pages_dmabuf(struct drm_i915_gem_object *obj, + struct sg_table *pages) +{ + dma_buf_unmap_attachment(obj->base.import_attach, pages, + DMA_BIDIRECTIONAL); +} + +static const struct drm_i915_gem_object_ops i915_gem_object_dmabuf_ops = { + .get_pages = i915_gem_object_get_pages_dmabuf, + .put_pages = i915_gem_object_put_pages_dmabuf, +}; + +struct drm_gem_object *i915_gem_prime_import(struct drm_device *dev, + struct dma_buf *dma_buf) +{ + struct dma_buf_attachment *attach; + struct drm_i915_gem_object *obj; + int ret; + + /* is this one of own objects? */ + if (dma_buf->ops == &i915_dmabuf_ops) { + obj = dma_buf_to_obj(dma_buf); + /* is it from our device? */ + if (obj->base.dev == dev) { + /* + * Importing dmabuf exported from out own gem increases + * refcount on gem itself instead of f_count of dmabuf. + */ + return &i915_gem_object_get(obj)->base; + } + } + + /* need to attach */ + attach = dma_buf_attach(dma_buf, dev->dev); + if (IS_ERR(attach)) + return ERR_CAST(attach); + + get_dma_buf(dma_buf); + + obj = i915_gem_object_alloc(); + if (obj == NULL) { + ret = -ENOMEM; + goto fail_detach; + } + + drm_gem_private_object_init(dev, &obj->base, dma_buf->size); + i915_gem_object_init(obj, &i915_gem_object_dmabuf_ops); + obj->base.import_attach = attach; + obj->resv = dma_buf->resv; + + /* We use GTT as shorthand for a coherent domain, one that is + * neither in the GPU cache nor in the CPU cache, where all + * writes are immediately visible in memory. (That's not strictly + * true, but it's close! There are internal buffers such as the + * write-combined buffer or a delay through the chipset for GTT + * writes that do require us to treat GTT as a separate cache domain.) + */ + obj->read_domains = I915_GEM_DOMAIN_GTT; + obj->write_domain = 0; + + return &obj->base; + +fail_detach: + dma_buf_detach(dma_buf, attach); + dma_buf_put(dma_buf); + + return ERR_PTR(ret); +} + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftests/mock_dmabuf.c" +#include "selftests/i915_gem_dmabuf.c" +#endif diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c new file mode 100644 index 000000000000..09e64bf33842 --- /dev/null +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -0,0 +1,2768 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2008,2010 Intel Corporation + */ + +#include +#include +#include +#include + +#include +#include + +#include "gem/i915_gem_ioctls.h" +#include "gt/intel_context.h" +#include "gt/intel_gt_pm.h" + +#include "i915_gem_ioctls.h" +#include "i915_gem_clflush.h" +#include "i915_gem_context.h" +#include "i915_trace.h" +#include "intel_drv.h" +#include "intel_frontbuffer.h" + +enum { + FORCE_CPU_RELOC = 1, + FORCE_GTT_RELOC, + FORCE_GPU_RELOC, +#define DBG_FORCE_RELOC 0 /* choose one of the above! */ +}; + +#define __EXEC_OBJECT_HAS_REF BIT(31) +#define __EXEC_OBJECT_HAS_PIN BIT(30) +#define __EXEC_OBJECT_HAS_FENCE BIT(29) +#define __EXEC_OBJECT_NEEDS_MAP BIT(28) +#define __EXEC_OBJECT_NEEDS_BIAS BIT(27) +#define __EXEC_OBJECT_INTERNAL_FLAGS (~0u << 27) /* all of the above */ +#define __EXEC_OBJECT_RESERVED (__EXEC_OBJECT_HAS_PIN | __EXEC_OBJECT_HAS_FENCE) + +#define __EXEC_HAS_RELOC BIT(31) +#define __EXEC_VALIDATED BIT(30) +#define __EXEC_INTERNAL_FLAGS (~0u << 30) +#define UPDATE PIN_OFFSET_FIXED + +#define BATCH_OFFSET_BIAS (256*1024) + +#define __I915_EXEC_ILLEGAL_FLAGS \ + (__I915_EXEC_UNKNOWN_FLAGS | \ + I915_EXEC_CONSTANTS_MASK | \ + I915_EXEC_RESOURCE_STREAMER) + +/* Catch emission of unexpected errors for CI! */ +#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) +#undef EINVAL +#define EINVAL ({ \ + DRM_DEBUG_DRIVER("EINVAL at %s:%d\n", __func__, __LINE__); \ + 22; \ +}) +#endif + +/** + * DOC: User command execution + * + * Userspace submits commands to be executed on the GPU as an instruction + * stream within a GEM object we call a batchbuffer. This instructions may + * refer to other GEM objects containing auxiliary state such as kernels, + * samplers, render targets and even secondary batchbuffers. Userspace does + * not know where in the GPU memory these objects reside and so before the + * batchbuffer is passed to the GPU for execution, those addresses in the + * batchbuffer and auxiliary objects are updated. This is known as relocation, + * or patching. To try and avoid having to relocate each object on the next + * execution, userspace is told the location of those objects in this pass, + * but this remains just a hint as the kernel may choose a new location for + * any object in the future. + * + * At the level of talking to the hardware, submitting a batchbuffer for the + * GPU to execute is to add content to a buffer from which the HW + * command streamer is reading. + * + * 1. Add a command to load the HW context. For Logical Ring Contexts, i.e. + * Execlists, this command is not placed on the same buffer as the + * remaining items. + * + * 2. Add a command to invalidate caches to the buffer. + * + * 3. Add a batchbuffer start command to the buffer; the start command is + * essentially a token together with the GPU address of the batchbuffer + * to be executed. + * + * 4. Add a pipeline flush to the buffer. + * + * 5. Add a memory write command to the buffer to record when the GPU + * is done executing the batchbuffer. The memory write writes the + * global sequence number of the request, ``i915_request::global_seqno``; + * the i915 driver uses the current value in the register to determine + * if the GPU has completed the batchbuffer. + * + * 6. Add a user interrupt command to the buffer. This command instructs + * the GPU to issue an interrupt when the command, pipeline flush and + * memory write are completed. + * + * 7. Inform the hardware of the additional commands added to the buffer + * (by updating the tail pointer). + * + * Processing an execbuf ioctl is conceptually split up into a few phases. + * + * 1. Validation - Ensure all the pointers, handles and flags are valid. + * 2. Reservation - Assign GPU address space for every object + * 3. Relocation - Update any addresses to point to the final locations + * 4. Serialisation - Order the request with respect to its dependencies + * 5. Construction - Construct a request to execute the batchbuffer + * 6. Submission (at some point in the future execution) + * + * Reserving resources for the execbuf is the most complicated phase. We + * neither want to have to migrate the object in the address space, nor do + * we want to have to update any relocations pointing to this object. Ideally, + * we want to leave the object where it is and for all the existing relocations + * to match. If the object is given a new address, or if userspace thinks the + * object is elsewhere, we have to parse all the relocation entries and update + * the addresses. Userspace can set the I915_EXEC_NORELOC flag to hint that + * all the target addresses in all of its objects match the value in the + * relocation entries and that they all match the presumed offsets given by the + * list of execbuffer objects. Using this knowledge, we know that if we haven't + * moved any buffers, all the relocation entries are valid and we can skip + * the update. (If userspace is wrong, the likely outcome is an impromptu GPU + * hang.) The requirement for using I915_EXEC_NO_RELOC are: + * + * The addresses written in the objects must match the corresponding + * reloc.presumed_offset which in turn must match the corresponding + * execobject.offset. + * + * Any render targets written to in the batch must be flagged with + * EXEC_OBJECT_WRITE. + * + * To avoid stalling, execobject.offset should match the current + * address of that object within the active context. + * + * The reservation is done is multiple phases. First we try and keep any + * object already bound in its current location - so as long as meets the + * constraints imposed by the new execbuffer. Any object left unbound after the + * first pass is then fitted into any available idle space. If an object does + * not fit, all objects are removed from the reservation and the process rerun + * after sorting the objects into a priority order (more difficult to fit + * objects are tried first). Failing that, the entire VM is cleared and we try + * to fit the execbuf once last time before concluding that it simply will not + * fit. + * + * A small complication to all of this is that we allow userspace not only to + * specify an alignment and a size for the object in the address space, but + * we also allow userspace to specify the exact offset. This objects are + * simpler to place (the location is known a priori) all we have to do is make + * sure the space is available. + * + * Once all the objects are in place, patching up the buried pointers to point + * to the final locations is a fairly simple job of walking over the relocation + * entry arrays, looking up the right address and rewriting the value into + * the object. Simple! ... The relocation entries are stored in user memory + * and so to access them we have to copy them into a local buffer. That copy + * has to avoid taking any pagefaults as they may lead back to a GEM object + * requiring the struct_mutex (i.e. recursive deadlock). So once again we split + * the relocation into multiple passes. First we try to do everything within an + * atomic context (avoid the pagefaults) which requires that we never wait. If + * we detect that we may wait, or if we need to fault, then we have to fallback + * to a slower path. The slowpath has to drop the mutex. (Can you hear alarm + * bells yet?) Dropping the mutex means that we lose all the state we have + * built up so far for the execbuf and we must reset any global data. However, + * we do leave the objects pinned in their final locations - which is a + * potential issue for concurrent execbufs. Once we have left the mutex, we can + * allocate and copy all the relocation entries into a large array at our + * leisure, reacquire the mutex, reclaim all the objects and other state and + * then proceed to update any incorrect addresses with the objects. + * + * As we process the relocation entries, we maintain a record of whether the + * object is being written to. Using NORELOC, we expect userspace to provide + * this information instead. We also check whether we can skip the relocation + * by comparing the expected value inside the relocation entry with the target's + * final address. If they differ, we have to map the current object and rewrite + * the 4 or 8 byte pointer within. + * + * Serialising an execbuf is quite simple according to the rules of the GEM + * ABI. Execution within each context is ordered by the order of submission. + * Writes to any GEM object are in order of submission and are exclusive. Reads + * from a GEM object are unordered with respect to other reads, but ordered by + * writes. A write submitted after a read cannot occur before the read, and + * similarly any read submitted after a write cannot occur before the write. + * Writes are ordered between engines such that only one write occurs at any + * time (completing any reads beforehand) - using semaphores where available + * and CPU serialisation otherwise. Other GEM access obey the same rules, any + * write (either via mmaps using set-domain, or via pwrite) must flush all GPU + * reads before starting, and any read (either using set-domain or pread) must + * flush all GPU writes before starting. (Note we only employ a barrier before, + * we currently rely on userspace not concurrently starting a new execution + * whilst reading or writing to an object. This may be an advantage or not + * depending on how much you trust userspace not to shoot themselves in the + * foot.) Serialisation may just result in the request being inserted into + * a DAG awaiting its turn, but most simple is to wait on the CPU until + * all dependencies are resolved. + * + * After all of that, is just a matter of closing the request and handing it to + * the hardware (well, leaving it in a queue to be executed). However, we also + * offer the ability for batchbuffers to be run with elevated privileges so + * that they access otherwise hidden registers. (Used to adjust L3 cache etc.) + * Before any batch is given extra privileges we first must check that it + * contains no nefarious instructions, we check that each instruction is from + * our whitelist and all registers are also from an allowed list. We first + * copy the user's batchbuffer to a shadow (so that the user doesn't have + * access to it, either by the CPU or GPU as we scan it) and then parse each + * instruction. If everything is ok, we set a flag telling the hardware to run + * the batchbuffer in trusted mode, otherwise the ioctl is rejected. + */ + +struct i915_execbuffer { + struct drm_i915_private *i915; /** i915 backpointer */ + struct drm_file *file; /** per-file lookup tables and limits */ + struct drm_i915_gem_execbuffer2 *args; /** ioctl parameters */ + struct drm_i915_gem_exec_object2 *exec; /** ioctl execobj[] */ + struct i915_vma **vma; + unsigned int *flags; + + struct intel_engine_cs *engine; /** engine to queue the request to */ + struct intel_context *context; /* logical state for the request */ + struct i915_gem_context *gem_context; /** caller's context */ + struct i915_address_space *vm; /** GTT and vma for the request */ + + struct i915_request *request; /** our request to build */ + struct i915_vma *batch; /** identity of the batch obj/vma */ + + /** actual size of execobj[] as we may extend it for the cmdparser */ + unsigned int buffer_count; + + /** list of vma not yet bound during reservation phase */ + struct list_head unbound; + + /** list of vma that have execobj.relocation_count */ + struct list_head relocs; + + /** + * Track the most recently used object for relocations, as we + * frequently have to perform multiple relocations within the same + * obj/page + */ + struct reloc_cache { + struct drm_mm_node node; /** temporary GTT binding */ + unsigned long vaddr; /** Current kmap address */ + unsigned long page; /** Currently mapped page index */ + unsigned int gen; /** Cached value of INTEL_GEN */ + bool use_64bit_reloc : 1; + bool has_llc : 1; + bool has_fence : 1; + bool needs_unfenced : 1; + + struct i915_request *rq; + u32 *rq_cmd; + unsigned int rq_size; + } reloc_cache; + + u64 invalid_flags; /** Set of execobj.flags that are invalid */ + u32 context_flags; /** Set of execobj.flags to insert from the ctx */ + + u32 batch_start_offset; /** Location within object of batch */ + u32 batch_len; /** Length of batch within object */ + u32 batch_flags; /** Flags composed for emit_bb_start() */ + + /** + * Indicate either the size of the hastable used to resolve + * relocation handles, or if negative that we are using a direct + * index into the execobj[]. + */ + int lut_size; + struct hlist_head *buckets; /** ht for relocation handles */ +}; + +#define exec_entry(EB, VMA) (&(EB)->exec[(VMA)->exec_flags - (EB)->flags]) + +/* + * Used to convert any address to canonical form. + * Starting from gen8, some commands (e.g. STATE_BASE_ADDRESS, + * MI_LOAD_REGISTER_MEM and others, see Broadwell PRM Vol2a) require the + * addresses to be in a canonical form: + * "GraphicsAddress[63:48] are ignored by the HW and assumed to be in correct + * canonical form [63:48] == [47]." + */ +#define GEN8_HIGH_ADDRESS_BIT 47 +static inline u64 gen8_canonical_addr(u64 address) +{ + return sign_extend64(address, GEN8_HIGH_ADDRESS_BIT); +} + +static inline u64 gen8_noncanonical_addr(u64 address) +{ + return address & GENMASK_ULL(GEN8_HIGH_ADDRESS_BIT, 0); +} + +static inline bool eb_use_cmdparser(const struct i915_execbuffer *eb) +{ + return intel_engine_needs_cmd_parser(eb->engine) && eb->batch_len; +} + +static int eb_create(struct i915_execbuffer *eb) +{ + if (!(eb->args->flags & I915_EXEC_HANDLE_LUT)) { + unsigned int size = 1 + ilog2(eb->buffer_count); + + /* + * Without a 1:1 association between relocation handles and + * the execobject[] index, we instead create a hashtable. + * We size it dynamically based on available memory, starting + * first with 1:1 assocative hash and scaling back until + * the allocation succeeds. + * + * Later on we use a positive lut_size to indicate we are + * using this hashtable, and a negative value to indicate a + * direct lookup. + */ + do { + gfp_t flags; + + /* While we can still reduce the allocation size, don't + * raise a warning and allow the allocation to fail. + * On the last pass though, we want to try as hard + * as possible to perform the allocation and warn + * if it fails. + */ + flags = GFP_KERNEL; + if (size > 1) + flags |= __GFP_NORETRY | __GFP_NOWARN; + + eb->buckets = kzalloc(sizeof(struct hlist_head) << size, + flags); + if (eb->buckets) + break; + } while (--size); + + if (unlikely(!size)) + return -ENOMEM; + + eb->lut_size = size; + } else { + eb->lut_size = -eb->buffer_count; + } + + return 0; +} + +static bool +eb_vma_misplaced(const struct drm_i915_gem_exec_object2 *entry, + const struct i915_vma *vma, + unsigned int flags) +{ + if (vma->node.size < entry->pad_to_size) + return true; + + if (entry->alignment && !IS_ALIGNED(vma->node.start, entry->alignment)) + return true; + + if (flags & EXEC_OBJECT_PINNED && + vma->node.start != entry->offset) + return true; + + if (flags & __EXEC_OBJECT_NEEDS_BIAS && + vma->node.start < BATCH_OFFSET_BIAS) + return true; + + if (!(flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS) && + (vma->node.start + vma->node.size - 1) >> 32) + return true; + + if (flags & __EXEC_OBJECT_NEEDS_MAP && + !i915_vma_is_map_and_fenceable(vma)) + return true; + + return false; +} + +static inline bool +eb_pin_vma(struct i915_execbuffer *eb, + const struct drm_i915_gem_exec_object2 *entry, + struct i915_vma *vma) +{ + unsigned int exec_flags = *vma->exec_flags; + u64 pin_flags; + + if (vma->node.size) + pin_flags = vma->node.start; + else + pin_flags = entry->offset & PIN_OFFSET_MASK; + + pin_flags |= PIN_USER | PIN_NOEVICT | PIN_OFFSET_FIXED; + if (unlikely(exec_flags & EXEC_OBJECT_NEEDS_GTT)) + pin_flags |= PIN_GLOBAL; + + if (unlikely(i915_vma_pin(vma, 0, 0, pin_flags))) + return false; + + if (unlikely(exec_flags & EXEC_OBJECT_NEEDS_FENCE)) { + if (unlikely(i915_vma_pin_fence(vma))) { + i915_vma_unpin(vma); + return false; + } + + if (vma->fence) + exec_flags |= __EXEC_OBJECT_HAS_FENCE; + } + + *vma->exec_flags = exec_flags | __EXEC_OBJECT_HAS_PIN; + return !eb_vma_misplaced(entry, vma, exec_flags); +} + +static inline void __eb_unreserve_vma(struct i915_vma *vma, unsigned int flags) +{ + GEM_BUG_ON(!(flags & __EXEC_OBJECT_HAS_PIN)); + + if (unlikely(flags & __EXEC_OBJECT_HAS_FENCE)) + __i915_vma_unpin_fence(vma); + + __i915_vma_unpin(vma); +} + +static inline void +eb_unreserve_vma(struct i915_vma *vma, unsigned int *flags) +{ + if (!(*flags & __EXEC_OBJECT_HAS_PIN)) + return; + + __eb_unreserve_vma(vma, *flags); + *flags &= ~__EXEC_OBJECT_RESERVED; +} + +static int +eb_validate_vma(struct i915_execbuffer *eb, + struct drm_i915_gem_exec_object2 *entry, + struct i915_vma *vma) +{ + if (unlikely(entry->flags & eb->invalid_flags)) + return -EINVAL; + + if (unlikely(entry->alignment && !is_power_of_2(entry->alignment))) + return -EINVAL; + + /* + * Offset can be used as input (EXEC_OBJECT_PINNED), reject + * any non-page-aligned or non-canonical addresses. + */ + if (unlikely(entry->flags & EXEC_OBJECT_PINNED && + entry->offset != gen8_canonical_addr(entry->offset & I915_GTT_PAGE_MASK))) + return -EINVAL; + + /* pad_to_size was once a reserved field, so sanitize it */ + if (entry->flags & EXEC_OBJECT_PAD_TO_SIZE) { + if (unlikely(offset_in_page(entry->pad_to_size))) + return -EINVAL; + } else { + entry->pad_to_size = 0; + } + + if (unlikely(vma->exec_flags)) { + DRM_DEBUG("Object [handle %d, index %d] appears more than once in object list\n", + entry->handle, (int)(entry - eb->exec)); + return -EINVAL; + } + + /* + * From drm_mm perspective address space is continuous, + * so from this point we're always using non-canonical + * form internally. + */ + entry->offset = gen8_noncanonical_addr(entry->offset); + + if (!eb->reloc_cache.has_fence) { + entry->flags &= ~EXEC_OBJECT_NEEDS_FENCE; + } else { + if ((entry->flags & EXEC_OBJECT_NEEDS_FENCE || + eb->reloc_cache.needs_unfenced) && + i915_gem_object_is_tiled(vma->obj)) + entry->flags |= EXEC_OBJECT_NEEDS_GTT | __EXEC_OBJECT_NEEDS_MAP; + } + + if (!(entry->flags & EXEC_OBJECT_PINNED)) + entry->flags |= eb->context_flags; + + return 0; +} + +static int +eb_add_vma(struct i915_execbuffer *eb, + unsigned int i, unsigned batch_idx, + struct i915_vma *vma) +{ + struct drm_i915_gem_exec_object2 *entry = &eb->exec[i]; + int err; + + GEM_BUG_ON(i915_vma_is_closed(vma)); + + if (!(eb->args->flags & __EXEC_VALIDATED)) { + err = eb_validate_vma(eb, entry, vma); + if (unlikely(err)) + return err; + } + + if (eb->lut_size > 0) { + vma->exec_handle = entry->handle; + hlist_add_head(&vma->exec_node, + &eb->buckets[hash_32(entry->handle, + eb->lut_size)]); + } + + if (entry->relocation_count) + list_add_tail(&vma->reloc_link, &eb->relocs); + + /* + * Stash a pointer from the vma to execobj, so we can query its flags, + * size, alignment etc as provided by the user. Also we stash a pointer + * to the vma inside the execobj so that we can use a direct lookup + * to find the right target VMA when doing relocations. + */ + eb->vma[i] = vma; + eb->flags[i] = entry->flags; + vma->exec_flags = &eb->flags[i]; + + /* + * SNA is doing fancy tricks with compressing batch buffers, which leads + * to negative relocation deltas. Usually that works out ok since the + * relocate address is still positive, except when the batch is placed + * very low in the GTT. Ensure this doesn't happen. + * + * Note that actual hangs have only been observed on gen7, but for + * paranoia do it everywhere. + */ + if (i == batch_idx) { + if (entry->relocation_count && + !(eb->flags[i] & EXEC_OBJECT_PINNED)) + eb->flags[i] |= __EXEC_OBJECT_NEEDS_BIAS; + if (eb->reloc_cache.has_fence) + eb->flags[i] |= EXEC_OBJECT_NEEDS_FENCE; + + eb->batch = vma; + } + + err = 0; + if (eb_pin_vma(eb, entry, vma)) { + if (entry->offset != vma->node.start) { + entry->offset = vma->node.start | UPDATE; + eb->args->flags |= __EXEC_HAS_RELOC; + } + } else { + eb_unreserve_vma(vma, vma->exec_flags); + + list_add_tail(&vma->exec_link, &eb->unbound); + if (drm_mm_node_allocated(&vma->node)) + err = i915_vma_unbind(vma); + if (unlikely(err)) + vma->exec_flags = NULL; + } + return err; +} + +static inline int use_cpu_reloc(const struct reloc_cache *cache, + const struct drm_i915_gem_object *obj) +{ + if (!i915_gem_object_has_struct_page(obj)) + return false; + + if (DBG_FORCE_RELOC == FORCE_CPU_RELOC) + return true; + + if (DBG_FORCE_RELOC == FORCE_GTT_RELOC) + return false; + + return (cache->has_llc || + obj->cache_dirty || + obj->cache_level != I915_CACHE_NONE); +} + +static int eb_reserve_vma(const struct i915_execbuffer *eb, + struct i915_vma *vma) +{ + struct drm_i915_gem_exec_object2 *entry = exec_entry(eb, vma); + unsigned int exec_flags = *vma->exec_flags; + u64 pin_flags; + int err; + + pin_flags = PIN_USER | PIN_NONBLOCK; + if (exec_flags & EXEC_OBJECT_NEEDS_GTT) + pin_flags |= PIN_GLOBAL; + + /* + * Wa32bitGeneralStateOffset & Wa32bitInstructionBaseOffset, + * limit address to the first 4GBs for unflagged objects. + */ + if (!(exec_flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS)) + pin_flags |= PIN_ZONE_4G; + + if (exec_flags & __EXEC_OBJECT_NEEDS_MAP) + pin_flags |= PIN_MAPPABLE; + + if (exec_flags & EXEC_OBJECT_PINNED) { + pin_flags |= entry->offset | PIN_OFFSET_FIXED; + pin_flags &= ~PIN_NONBLOCK; /* force overlapping checks */ + } else if (exec_flags & __EXEC_OBJECT_NEEDS_BIAS) { + pin_flags |= BATCH_OFFSET_BIAS | PIN_OFFSET_BIAS; + } + + err = i915_vma_pin(vma, + entry->pad_to_size, entry->alignment, + pin_flags); + if (err) + return err; + + if (entry->offset != vma->node.start) { + entry->offset = vma->node.start | UPDATE; + eb->args->flags |= __EXEC_HAS_RELOC; + } + + if (unlikely(exec_flags & EXEC_OBJECT_NEEDS_FENCE)) { + err = i915_vma_pin_fence(vma); + if (unlikely(err)) { + i915_vma_unpin(vma); + return err; + } + + if (vma->fence) + exec_flags |= __EXEC_OBJECT_HAS_FENCE; + } + + *vma->exec_flags = exec_flags | __EXEC_OBJECT_HAS_PIN; + GEM_BUG_ON(eb_vma_misplaced(entry, vma, exec_flags)); + + return 0; +} + +static int eb_reserve(struct i915_execbuffer *eb) +{ + const unsigned int count = eb->buffer_count; + struct list_head last; + struct i915_vma *vma; + unsigned int i, pass; + int err; + + /* + * Attempt to pin all of the buffers into the GTT. + * This is done in 3 phases: + * + * 1a. Unbind all objects that do not match the GTT constraints for + * the execbuffer (fenceable, mappable, alignment etc). + * 1b. Increment pin count for already bound objects. + * 2. Bind new objects. + * 3. Decrement pin count. + * + * This avoid unnecessary unbinding of later objects in order to make + * room for the earlier objects *unless* we need to defragment. + */ + + pass = 0; + err = 0; + do { + list_for_each_entry(vma, &eb->unbound, exec_link) { + err = eb_reserve_vma(eb, vma); + if (err) + break; + } + if (err != -ENOSPC) + return err; + + /* Resort *all* the objects into priority order */ + INIT_LIST_HEAD(&eb->unbound); + INIT_LIST_HEAD(&last); + for (i = 0; i < count; i++) { + unsigned int flags = eb->flags[i]; + struct i915_vma *vma = eb->vma[i]; + + if (flags & EXEC_OBJECT_PINNED && + flags & __EXEC_OBJECT_HAS_PIN) + continue; + + eb_unreserve_vma(vma, &eb->flags[i]); + + if (flags & EXEC_OBJECT_PINNED) + /* Pinned must have their slot */ + list_add(&vma->exec_link, &eb->unbound); + else if (flags & __EXEC_OBJECT_NEEDS_MAP) + /* Map require the lowest 256MiB (aperture) */ + list_add_tail(&vma->exec_link, &eb->unbound); + else if (!(flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS)) + /* Prioritise 4GiB region for restricted bo */ + list_add(&vma->exec_link, &last); + else + list_add_tail(&vma->exec_link, &last); + } + list_splice_tail(&last, &eb->unbound); + + switch (pass++) { + case 0: + break; + + case 1: + /* Too fragmented, unbind everything and retry */ + err = i915_gem_evict_vm(eb->vm); + if (err) + return err; + break; + + default: + return -ENOSPC; + } + } while (1); +} + +static unsigned int eb_batch_index(const struct i915_execbuffer *eb) +{ + if (eb->args->flags & I915_EXEC_BATCH_FIRST) + return 0; + else + return eb->buffer_count - 1; +} + +static int eb_select_context(struct i915_execbuffer *eb) +{ + struct i915_gem_context *ctx; + + ctx = i915_gem_context_lookup(eb->file->driver_priv, eb->args->rsvd1); + if (unlikely(!ctx)) + return -ENOENT; + + eb->gem_context = ctx; + if (ctx->ppgtt) { + eb->vm = &ctx->ppgtt->vm; + eb->invalid_flags |= EXEC_OBJECT_NEEDS_GTT; + } else { + eb->vm = &eb->i915->ggtt.vm; + } + + eb->context_flags = 0; + if (test_bit(UCONTEXT_NO_ZEROMAP, &ctx->user_flags)) + eb->context_flags |= __EXEC_OBJECT_NEEDS_BIAS; + + return 0; +} + +static struct i915_request *__eb_wait_for_ring(struct intel_ring *ring) +{ + struct i915_request *rq; + + /* + * Completely unscientific finger-in-the-air estimates for suitable + * maximum user request size (to avoid blocking) and then backoff. + */ + if (intel_ring_update_space(ring) >= PAGE_SIZE) + return NULL; + + /* + * Find a request that after waiting upon, there will be at least half + * the ring available. The hysteresis allows us to compete for the + * shared ring and should mean that we sleep less often prior to + * claiming our resources, but not so long that the ring completely + * drains before we can submit our next request. + */ + list_for_each_entry(rq, &ring->request_list, ring_link) { + if (__intel_ring_space(rq->postfix, + ring->emit, ring->size) > ring->size / 2) + break; + } + if (&rq->ring_link == &ring->request_list) + return NULL; /* weird, we will check again later for real */ + + return i915_request_get(rq); +} + +static int eb_wait_for_ring(const struct i915_execbuffer *eb) +{ + struct i915_request *rq; + int ret = 0; + + /* + * Apply a light amount of backpressure to prevent excessive hogs + * from blocking waiting for space whilst holding struct_mutex and + * keeping all of their resources pinned. + */ + + rq = __eb_wait_for_ring(eb->context->ring); + if (rq) { + mutex_unlock(&eb->i915->drm.struct_mutex); + + if (i915_request_wait(rq, + I915_WAIT_INTERRUPTIBLE, + MAX_SCHEDULE_TIMEOUT) < 0) + ret = -EINTR; + + i915_request_put(rq); + + mutex_lock(&eb->i915->drm.struct_mutex); + } + + return ret; +} + +static int eb_lookup_vmas(struct i915_execbuffer *eb) +{ + struct radix_tree_root *handles_vma = &eb->gem_context->handles_vma; + struct drm_i915_gem_object *obj; + unsigned int i, batch; + int err; + + if (unlikely(i915_gem_context_is_closed(eb->gem_context))) + return -ENOENT; + + if (unlikely(i915_gem_context_is_banned(eb->gem_context))) + return -EIO; + + INIT_LIST_HEAD(&eb->relocs); + INIT_LIST_HEAD(&eb->unbound); + + batch = eb_batch_index(eb); + + for (i = 0; i < eb->buffer_count; i++) { + u32 handle = eb->exec[i].handle; + struct i915_lut_handle *lut; + struct i915_vma *vma; + + vma = radix_tree_lookup(handles_vma, handle); + if (likely(vma)) + goto add_vma; + + obj = i915_gem_object_lookup(eb->file, handle); + if (unlikely(!obj)) { + err = -ENOENT; + goto err_vma; + } + + vma = i915_vma_instance(obj, eb->vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto err_obj; + } + + lut = i915_lut_handle_alloc(); + if (unlikely(!lut)) { + err = -ENOMEM; + goto err_obj; + } + + err = radix_tree_insert(handles_vma, handle, vma); + if (unlikely(err)) { + i915_lut_handle_free(lut); + goto err_obj; + } + + /* transfer ref to ctx */ + if (!vma->open_count++) + i915_vma_reopen(vma); + list_add(&lut->obj_link, &obj->lut_list); + list_add(&lut->ctx_link, &eb->gem_context->handles_list); + lut->ctx = eb->gem_context; + lut->handle = handle; + +add_vma: + err = eb_add_vma(eb, i, batch, vma); + if (unlikely(err)) + goto err_vma; + + GEM_BUG_ON(vma != eb->vma[i]); + GEM_BUG_ON(vma->exec_flags != &eb->flags[i]); + GEM_BUG_ON(drm_mm_node_allocated(&vma->node) && + eb_vma_misplaced(&eb->exec[i], vma, eb->flags[i])); + } + + eb->args->flags |= __EXEC_VALIDATED; + return eb_reserve(eb); + +err_obj: + i915_gem_object_put(obj); +err_vma: + eb->vma[i] = NULL; + return err; +} + +static struct i915_vma * +eb_get_vma(const struct i915_execbuffer *eb, unsigned long handle) +{ + if (eb->lut_size < 0) { + if (handle >= -eb->lut_size) + return NULL; + return eb->vma[handle]; + } else { + struct hlist_head *head; + struct i915_vma *vma; + + head = &eb->buckets[hash_32(handle, eb->lut_size)]; + hlist_for_each_entry(vma, head, exec_node) { + if (vma->exec_handle == handle) + return vma; + } + return NULL; + } +} + +static void eb_release_vmas(const struct i915_execbuffer *eb) +{ + const unsigned int count = eb->buffer_count; + unsigned int i; + + for (i = 0; i < count; i++) { + struct i915_vma *vma = eb->vma[i]; + unsigned int flags = eb->flags[i]; + + if (!vma) + break; + + GEM_BUG_ON(vma->exec_flags != &eb->flags[i]); + vma->exec_flags = NULL; + eb->vma[i] = NULL; + + if (flags & __EXEC_OBJECT_HAS_PIN) + __eb_unreserve_vma(vma, flags); + + if (flags & __EXEC_OBJECT_HAS_REF) + i915_vma_put(vma); + } +} + +static void eb_reset_vmas(const struct i915_execbuffer *eb) +{ + eb_release_vmas(eb); + if (eb->lut_size > 0) + memset(eb->buckets, 0, + sizeof(struct hlist_head) << eb->lut_size); +} + +static void eb_destroy(const struct i915_execbuffer *eb) +{ + GEM_BUG_ON(eb->reloc_cache.rq); + + if (eb->lut_size > 0) + kfree(eb->buckets); +} + +static inline u64 +relocation_target(const struct drm_i915_gem_relocation_entry *reloc, + const struct i915_vma *target) +{ + return gen8_canonical_addr((int)reloc->delta + target->node.start); +} + +static void reloc_cache_init(struct reloc_cache *cache, + struct drm_i915_private *i915) +{ + cache->page = -1; + cache->vaddr = 0; + /* Must be a variable in the struct to allow GCC to unroll. */ + cache->gen = INTEL_GEN(i915); + cache->has_llc = HAS_LLC(i915); + cache->use_64bit_reloc = HAS_64BIT_RELOC(i915); + cache->has_fence = cache->gen < 4; + cache->needs_unfenced = INTEL_INFO(i915)->unfenced_needs_alignment; + cache->node.allocated = false; + cache->rq = NULL; + cache->rq_size = 0; +} + +static inline void *unmask_page(unsigned long p) +{ + return (void *)(uintptr_t)(p & PAGE_MASK); +} + +static inline unsigned int unmask_flags(unsigned long p) +{ + return p & ~PAGE_MASK; +} + +#define KMAP 0x4 /* after CLFLUSH_FLAGS */ + +static inline struct i915_ggtt *cache_to_ggtt(struct reloc_cache *cache) +{ + struct drm_i915_private *i915 = + container_of(cache, struct i915_execbuffer, reloc_cache)->i915; + return &i915->ggtt; +} + +static void reloc_gpu_flush(struct reloc_cache *cache) +{ + GEM_BUG_ON(cache->rq_size >= cache->rq->batch->obj->base.size / sizeof(u32)); + cache->rq_cmd[cache->rq_size] = MI_BATCH_BUFFER_END; + + __i915_gem_object_flush_map(cache->rq->batch->obj, 0, cache->rq_size); + i915_gem_object_unpin_map(cache->rq->batch->obj); + + i915_gem_chipset_flush(cache->rq->i915); + + i915_request_add(cache->rq); + cache->rq = NULL; +} + +static void reloc_cache_reset(struct reloc_cache *cache) +{ + void *vaddr; + + if (cache->rq) + reloc_gpu_flush(cache); + + if (!cache->vaddr) + return; + + vaddr = unmask_page(cache->vaddr); + if (cache->vaddr & KMAP) { + if (cache->vaddr & CLFLUSH_AFTER) + mb(); + + kunmap_atomic(vaddr); + i915_gem_object_finish_access((struct drm_i915_gem_object *)cache->node.mm); + } else { + wmb(); + io_mapping_unmap_atomic((void __iomem *)vaddr); + if (cache->node.allocated) { + struct i915_ggtt *ggtt = cache_to_ggtt(cache); + + ggtt->vm.clear_range(&ggtt->vm, + cache->node.start, + cache->node.size); + drm_mm_remove_node(&cache->node); + } else { + i915_vma_unpin((struct i915_vma *)cache->node.mm); + } + } + + cache->vaddr = 0; + cache->page = -1; +} + +static void *reloc_kmap(struct drm_i915_gem_object *obj, + struct reloc_cache *cache, + unsigned long page) +{ + void *vaddr; + + if (cache->vaddr) { + kunmap_atomic(unmask_page(cache->vaddr)); + } else { + unsigned int flushes; + int err; + + err = i915_gem_object_prepare_write(obj, &flushes); + if (err) + return ERR_PTR(err); + + BUILD_BUG_ON(KMAP & CLFLUSH_FLAGS); + BUILD_BUG_ON((KMAP | CLFLUSH_FLAGS) & PAGE_MASK); + + cache->vaddr = flushes | KMAP; + cache->node.mm = (void *)obj; + if (flushes) + mb(); + } + + vaddr = kmap_atomic(i915_gem_object_get_dirty_page(obj, page)); + cache->vaddr = unmask_flags(cache->vaddr) | (unsigned long)vaddr; + cache->page = page; + + return vaddr; +} + +static void *reloc_iomap(struct drm_i915_gem_object *obj, + struct reloc_cache *cache, + unsigned long page) +{ + struct i915_ggtt *ggtt = cache_to_ggtt(cache); + unsigned long offset; + void *vaddr; + + if (cache->vaddr) { + io_mapping_unmap_atomic((void __force __iomem *) unmask_page(cache->vaddr)); + } else { + struct i915_vma *vma; + int err; + + if (use_cpu_reloc(cache, obj)) + return NULL; + + err = i915_gem_object_set_to_gtt_domain(obj, true); + if (err) + return ERR_PTR(err); + + vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, + PIN_MAPPABLE | + PIN_NONBLOCK | + PIN_NONFAULT); + if (IS_ERR(vma)) { + memset(&cache->node, 0, sizeof(cache->node)); + err = drm_mm_insert_node_in_range + (&ggtt->vm.mm, &cache->node, + PAGE_SIZE, 0, I915_COLOR_UNEVICTABLE, + 0, ggtt->mappable_end, + DRM_MM_INSERT_LOW); + if (err) /* no inactive aperture space, use cpu reloc */ + return NULL; + } else { + err = i915_vma_put_fence(vma); + if (err) { + i915_vma_unpin(vma); + return ERR_PTR(err); + } + + cache->node.start = vma->node.start; + cache->node.mm = (void *)vma; + } + } + + offset = cache->node.start; + if (cache->node.allocated) { + wmb(); + ggtt->vm.insert_page(&ggtt->vm, + i915_gem_object_get_dma_address(obj, page), + offset, I915_CACHE_NONE, 0); + } else { + offset += page << PAGE_SHIFT; + } + + vaddr = (void __force *)io_mapping_map_atomic_wc(&ggtt->iomap, + offset); + cache->page = page; + cache->vaddr = (unsigned long)vaddr; + + return vaddr; +} + +static void *reloc_vaddr(struct drm_i915_gem_object *obj, + struct reloc_cache *cache, + unsigned long page) +{ + void *vaddr; + + if (cache->page == page) { + vaddr = unmask_page(cache->vaddr); + } else { + vaddr = NULL; + if ((cache->vaddr & KMAP) == 0) + vaddr = reloc_iomap(obj, cache, page); + if (!vaddr) + vaddr = reloc_kmap(obj, cache, page); + } + + return vaddr; +} + +static void clflush_write32(u32 *addr, u32 value, unsigned int flushes) +{ + if (unlikely(flushes & (CLFLUSH_BEFORE | CLFLUSH_AFTER))) { + if (flushes & CLFLUSH_BEFORE) { + clflushopt(addr); + mb(); + } + + *addr = value; + + /* + * Writes to the same cacheline are serialised by the CPU + * (including clflush). On the write path, we only require + * that it hits memory in an orderly fashion and place + * mb barriers at the start and end of the relocation phase + * to ensure ordering of clflush wrt to the system. + */ + if (flushes & CLFLUSH_AFTER) + clflushopt(addr); + } else + *addr = value; +} + +static int __reloc_gpu_alloc(struct i915_execbuffer *eb, + struct i915_vma *vma, + unsigned int len) +{ + struct reloc_cache *cache = &eb->reloc_cache; + struct drm_i915_gem_object *obj; + struct i915_request *rq; + struct i915_vma *batch; + u32 *cmd; + int err; + + if (DBG_FORCE_RELOC == FORCE_GPU_RELOC) { + obj = vma->obj; + if (obj->cache_dirty & ~obj->cache_coherent) + i915_gem_clflush_object(obj, 0); + obj->write_domain = 0; + } + + GEM_BUG_ON(vma->obj->write_domain & I915_GEM_DOMAIN_CPU); + + obj = i915_gem_batch_pool_get(&eb->engine->batch_pool, PAGE_SIZE); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + cmd = i915_gem_object_pin_map(obj, + cache->has_llc ? + I915_MAP_FORCE_WB : + I915_MAP_FORCE_WC); + i915_gem_object_unpin_pages(obj); + if (IS_ERR(cmd)) + return PTR_ERR(cmd); + + batch = i915_vma_instance(obj, vma->vm, NULL); + if (IS_ERR(batch)) { + err = PTR_ERR(batch); + goto err_unmap; + } + + err = i915_vma_pin(batch, 0, 0, PIN_USER | PIN_NONBLOCK); + if (err) + goto err_unmap; + + rq = i915_request_create(eb->context); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto err_unpin; + } + + err = i915_request_await_object(rq, vma->obj, true); + if (err) + goto err_request; + + err = eb->engine->emit_bb_start(rq, + batch->node.start, PAGE_SIZE, + cache->gen > 5 ? 0 : I915_DISPATCH_SECURE); + if (err) + goto err_request; + + GEM_BUG_ON(!reservation_object_test_signaled_rcu(batch->resv, true)); + err = i915_vma_move_to_active(batch, rq, 0); + if (err) + goto skip_request; + + err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); + if (err) + goto skip_request; + + rq->batch = batch; + i915_vma_unpin(batch); + + cache->rq = rq; + cache->rq_cmd = cmd; + cache->rq_size = 0; + + /* Return with batch mapping (cmd) still pinned */ + return 0; + +skip_request: + i915_request_skip(rq, err); +err_request: + i915_request_add(rq); +err_unpin: + i915_vma_unpin(batch); +err_unmap: + i915_gem_object_unpin_map(obj); + return err; +} + +static u32 *reloc_gpu(struct i915_execbuffer *eb, + struct i915_vma *vma, + unsigned int len) +{ + struct reloc_cache *cache = &eb->reloc_cache; + u32 *cmd; + + if (cache->rq_size > PAGE_SIZE/sizeof(u32) - (len + 1)) + reloc_gpu_flush(cache); + + if (unlikely(!cache->rq)) { + int err; + + /* If we need to copy for the cmdparser, we will stall anyway */ + if (eb_use_cmdparser(eb)) + return ERR_PTR(-EWOULDBLOCK); + + if (!intel_engine_can_store_dword(eb->engine)) + return ERR_PTR(-ENODEV); + + err = __reloc_gpu_alloc(eb, vma, len); + if (unlikely(err)) + return ERR_PTR(err); + } + + cmd = cache->rq_cmd + cache->rq_size; + cache->rq_size += len; + + return cmd; +} + +static u64 +relocate_entry(struct i915_vma *vma, + const struct drm_i915_gem_relocation_entry *reloc, + struct i915_execbuffer *eb, + const struct i915_vma *target) +{ + u64 offset = reloc->offset; + u64 target_offset = relocation_target(reloc, target); + bool wide = eb->reloc_cache.use_64bit_reloc; + void *vaddr; + + if (!eb->reloc_cache.vaddr && + (DBG_FORCE_RELOC == FORCE_GPU_RELOC || + !reservation_object_test_signaled_rcu(vma->resv, true))) { + const unsigned int gen = eb->reloc_cache.gen; + unsigned int len; + u32 *batch; + u64 addr; + + if (wide) + len = offset & 7 ? 8 : 5; + else if (gen >= 4) + len = 4; + else + len = 3; + + batch = reloc_gpu(eb, vma, len); + if (IS_ERR(batch)) + goto repeat; + + addr = gen8_canonical_addr(vma->node.start + offset); + if (wide) { + if (offset & 7) { + *batch++ = MI_STORE_DWORD_IMM_GEN4; + *batch++ = lower_32_bits(addr); + *batch++ = upper_32_bits(addr); + *batch++ = lower_32_bits(target_offset); + + addr = gen8_canonical_addr(addr + 4); + + *batch++ = MI_STORE_DWORD_IMM_GEN4; + *batch++ = lower_32_bits(addr); + *batch++ = upper_32_bits(addr); + *batch++ = upper_32_bits(target_offset); + } else { + *batch++ = (MI_STORE_DWORD_IMM_GEN4 | (1 << 21)) + 1; + *batch++ = lower_32_bits(addr); + *batch++ = upper_32_bits(addr); + *batch++ = lower_32_bits(target_offset); + *batch++ = upper_32_bits(target_offset); + } + } else if (gen >= 6) { + *batch++ = MI_STORE_DWORD_IMM_GEN4; + *batch++ = 0; + *batch++ = addr; + *batch++ = target_offset; + } else if (gen >= 4) { + *batch++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; + *batch++ = 0; + *batch++ = addr; + *batch++ = target_offset; + } else { + *batch++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL; + *batch++ = addr; + *batch++ = target_offset; + } + + goto out; + } + +repeat: + vaddr = reloc_vaddr(vma->obj, &eb->reloc_cache, offset >> PAGE_SHIFT); + if (IS_ERR(vaddr)) + return PTR_ERR(vaddr); + + clflush_write32(vaddr + offset_in_page(offset), + lower_32_bits(target_offset), + eb->reloc_cache.vaddr); + + if (wide) { + offset += sizeof(u32); + target_offset >>= 32; + wide = false; + goto repeat; + } + +out: + return target->node.start | UPDATE; +} + +static u64 +eb_relocate_entry(struct i915_execbuffer *eb, + struct i915_vma *vma, + const struct drm_i915_gem_relocation_entry *reloc) +{ + struct i915_vma *target; + int err; + + /* we've already hold a reference to all valid objects */ + target = eb_get_vma(eb, reloc->target_handle); + if (unlikely(!target)) + return -ENOENT; + + /* Validate that the target is in a valid r/w GPU domain */ + if (unlikely(reloc->write_domain & (reloc->write_domain - 1))) { + DRM_DEBUG("reloc with multiple write domains: " + "target %d offset %d " + "read %08x write %08x", + reloc->target_handle, + (int) reloc->offset, + reloc->read_domains, + reloc->write_domain); + return -EINVAL; + } + if (unlikely((reloc->write_domain | reloc->read_domains) + & ~I915_GEM_GPU_DOMAINS)) { + DRM_DEBUG("reloc with read/write non-GPU domains: " + "target %d offset %d " + "read %08x write %08x", + reloc->target_handle, + (int) reloc->offset, + reloc->read_domains, + reloc->write_domain); + return -EINVAL; + } + + if (reloc->write_domain) { + *target->exec_flags |= EXEC_OBJECT_WRITE; + + /* + * Sandybridge PPGTT errata: We need a global gtt mapping + * for MI and pipe_control writes because the gpu doesn't + * properly redirect them through the ppgtt for non_secure + * batchbuffers. + */ + if (reloc->write_domain == I915_GEM_DOMAIN_INSTRUCTION && + IS_GEN(eb->i915, 6)) { + err = i915_vma_bind(target, target->obj->cache_level, + PIN_GLOBAL); + if (WARN_ONCE(err, + "Unexpected failure to bind target VMA!")) + return err; + } + } + + /* + * If the relocation already has the right value in it, no + * more work needs to be done. + */ + if (!DBG_FORCE_RELOC && + gen8_canonical_addr(target->node.start) == reloc->presumed_offset) + return 0; + + /* Check that the relocation address is valid... */ + if (unlikely(reloc->offset > + vma->size - (eb->reloc_cache.use_64bit_reloc ? 8 : 4))) { + DRM_DEBUG("Relocation beyond object bounds: " + "target %d offset %d size %d.\n", + reloc->target_handle, + (int)reloc->offset, + (int)vma->size); + return -EINVAL; + } + if (unlikely(reloc->offset & 3)) { + DRM_DEBUG("Relocation not 4-byte aligned: " + "target %d offset %d.\n", + reloc->target_handle, + (int)reloc->offset); + return -EINVAL; + } + + /* + * If we write into the object, we need to force the synchronisation + * barrier, either with an asynchronous clflush or if we executed the + * patching using the GPU (though that should be serialised by the + * timeline). To be completely sure, and since we are required to + * do relocations we are already stalling, disable the user's opt + * out of our synchronisation. + */ + *vma->exec_flags &= ~EXEC_OBJECT_ASYNC; + + /* and update the user's relocation entry */ + return relocate_entry(vma, reloc, eb, target); +} + +static int eb_relocate_vma(struct i915_execbuffer *eb, struct i915_vma *vma) +{ +#define N_RELOC(x) ((x) / sizeof(struct drm_i915_gem_relocation_entry)) + struct drm_i915_gem_relocation_entry stack[N_RELOC(512)]; + struct drm_i915_gem_relocation_entry __user *urelocs; + const struct drm_i915_gem_exec_object2 *entry = exec_entry(eb, vma); + unsigned int remain; + + urelocs = u64_to_user_ptr(entry->relocs_ptr); + remain = entry->relocation_count; + if (unlikely(remain > N_RELOC(ULONG_MAX))) + return -EINVAL; + + /* + * We must check that the entire relocation array is safe + * to read. However, if the array is not writable the user loses + * the updated relocation values. + */ + if (unlikely(!access_ok(urelocs, remain*sizeof(*urelocs)))) + return -EFAULT; + + do { + struct drm_i915_gem_relocation_entry *r = stack; + unsigned int count = + min_t(unsigned int, remain, ARRAY_SIZE(stack)); + unsigned int copied; + + /* + * This is the fast path and we cannot handle a pagefault + * whilst holding the struct mutex lest the user pass in the + * relocations contained within a mmaped bo. For in such a case + * we, the page fault handler would call i915_gem_fault() and + * we would try to acquire the struct mutex again. Obviously + * this is bad and so lockdep complains vehemently. + */ + pagefault_disable(); + copied = __copy_from_user_inatomic(r, urelocs, count * sizeof(r[0])); + pagefault_enable(); + if (unlikely(copied)) { + remain = -EFAULT; + goto out; + } + + remain -= count; + do { + u64 offset = eb_relocate_entry(eb, vma, r); + + if (likely(offset == 0)) { + } else if ((s64)offset < 0) { + remain = (int)offset; + goto out; + } else { + /* + * Note that reporting an error now + * leaves everything in an inconsistent + * state as we have *already* changed + * the relocation value inside the + * object. As we have not changed the + * reloc.presumed_offset or will not + * change the execobject.offset, on the + * call we may not rewrite the value + * inside the object, leaving it + * dangling and causing a GPU hang. Unless + * userspace dynamically rebuilds the + * relocations on each execbuf rather than + * presume a static tree. + * + * We did previously check if the relocations + * were writable (access_ok), an error now + * would be a strange race with mprotect, + * having already demonstrated that we + * can read from this userspace address. + */ + offset = gen8_canonical_addr(offset & ~UPDATE); + if (unlikely(__put_user(offset, &urelocs[r-stack].presumed_offset))) { + remain = -EFAULT; + goto out; + } + } + } while (r++, --count); + urelocs += ARRAY_SIZE(stack); + } while (remain); +out: + reloc_cache_reset(&eb->reloc_cache); + return remain; +} + +static int +eb_relocate_vma_slow(struct i915_execbuffer *eb, struct i915_vma *vma) +{ + const struct drm_i915_gem_exec_object2 *entry = exec_entry(eb, vma); + struct drm_i915_gem_relocation_entry *relocs = + u64_to_ptr(typeof(*relocs), entry->relocs_ptr); + unsigned int i; + int err; + + for (i = 0; i < entry->relocation_count; i++) { + u64 offset = eb_relocate_entry(eb, vma, &relocs[i]); + + if ((s64)offset < 0) { + err = (int)offset; + goto err; + } + } + err = 0; +err: + reloc_cache_reset(&eb->reloc_cache); + return err; +} + +static int check_relocations(const struct drm_i915_gem_exec_object2 *entry) +{ + const char __user *addr, *end; + unsigned long size; + char __maybe_unused c; + + size = entry->relocation_count; + if (size == 0) + return 0; + + if (size > N_RELOC(ULONG_MAX)) + return -EINVAL; + + addr = u64_to_user_ptr(entry->relocs_ptr); + size *= sizeof(struct drm_i915_gem_relocation_entry); + if (!access_ok(addr, size)) + return -EFAULT; + + end = addr + size; + for (; addr < end; addr += PAGE_SIZE) { + int err = __get_user(c, addr); + if (err) + return err; + } + return __get_user(c, end - 1); +} + +static int eb_copy_relocations(const struct i915_execbuffer *eb) +{ + const unsigned int count = eb->buffer_count; + unsigned int i; + int err; + + for (i = 0; i < count; i++) { + const unsigned int nreloc = eb->exec[i].relocation_count; + struct drm_i915_gem_relocation_entry __user *urelocs; + struct drm_i915_gem_relocation_entry *relocs; + unsigned long size; + unsigned long copied; + + if (nreloc == 0) + continue; + + err = check_relocations(&eb->exec[i]); + if (err) + goto err; + + urelocs = u64_to_user_ptr(eb->exec[i].relocs_ptr); + size = nreloc * sizeof(*relocs); + + relocs = kvmalloc_array(size, 1, GFP_KERNEL); + if (!relocs) { + err = -ENOMEM; + goto err; + } + + /* copy_from_user is limited to < 4GiB */ + copied = 0; + do { + unsigned int len = + min_t(u64, BIT_ULL(31), size - copied); + + if (__copy_from_user((char *)relocs + copied, + (char __user *)urelocs + copied, + len)) { +end_user: + user_access_end(); +end: + kvfree(relocs); + err = -EFAULT; + goto err; + } + + copied += len; + } while (copied < size); + + /* + * As we do not update the known relocation offsets after + * relocating (due to the complexities in lock handling), + * we need to mark them as invalid now so that we force the + * relocation processing next time. Just in case the target + * object is evicted and then rebound into its old + * presumed_offset before the next execbuffer - if that + * happened we would make the mistake of assuming that the + * relocations were valid. + */ + if (!user_access_begin(urelocs, size)) + goto end; + + for (copied = 0; copied < nreloc; copied++) + unsafe_put_user(-1, + &urelocs[copied].presumed_offset, + end_user); + user_access_end(); + + eb->exec[i].relocs_ptr = (uintptr_t)relocs; + } + + return 0; + +err: + while (i--) { + struct drm_i915_gem_relocation_entry *relocs = + u64_to_ptr(typeof(*relocs), eb->exec[i].relocs_ptr); + if (eb->exec[i].relocation_count) + kvfree(relocs); + } + return err; +} + +static int eb_prefault_relocations(const struct i915_execbuffer *eb) +{ + const unsigned int count = eb->buffer_count; + unsigned int i; + + if (unlikely(i915_modparams.prefault_disable)) + return 0; + + for (i = 0; i < count; i++) { + int err; + + err = check_relocations(&eb->exec[i]); + if (err) + return err; + } + + return 0; +} + +static noinline int eb_relocate_slow(struct i915_execbuffer *eb) +{ + struct drm_device *dev = &eb->i915->drm; + bool have_copy = false; + struct i915_vma *vma; + int err = 0; + +repeat: + if (signal_pending(current)) { + err = -ERESTARTSYS; + goto out; + } + + /* We may process another execbuffer during the unlock... */ + eb_reset_vmas(eb); + mutex_unlock(&dev->struct_mutex); + + /* + * We take 3 passes through the slowpatch. + * + * 1 - we try to just prefault all the user relocation entries and + * then attempt to reuse the atomic pagefault disabled fast path again. + * + * 2 - we copy the user entries to a local buffer here outside of the + * local and allow ourselves to wait upon any rendering before + * relocations + * + * 3 - we already have a local copy of the relocation entries, but + * were interrupted (EAGAIN) whilst waiting for the objects, try again. + */ + if (!err) { + err = eb_prefault_relocations(eb); + } else if (!have_copy) { + err = eb_copy_relocations(eb); + have_copy = err == 0; + } else { + cond_resched(); + err = 0; + } + if (err) { + mutex_lock(&dev->struct_mutex); + goto out; + } + + /* A frequent cause for EAGAIN are currently unavailable client pages */ + flush_workqueue(eb->i915->mm.userptr_wq); + + err = i915_mutex_lock_interruptible(dev); + if (err) { + mutex_lock(&dev->struct_mutex); + goto out; + } + + /* reacquire the objects */ + err = eb_lookup_vmas(eb); + if (err) + goto err; + + GEM_BUG_ON(!eb->batch); + + list_for_each_entry(vma, &eb->relocs, reloc_link) { + if (!have_copy) { + pagefault_disable(); + err = eb_relocate_vma(eb, vma); + pagefault_enable(); + if (err) + goto repeat; + } else { + err = eb_relocate_vma_slow(eb, vma); + if (err) + goto err; + } + } + + /* + * Leave the user relocations as are, this is the painfully slow path, + * and we want to avoid the complication of dropping the lock whilst + * having buffers reserved in the aperture and so causing spurious + * ENOSPC for random operations. + */ + +err: + if (err == -EAGAIN) + goto repeat; + +out: + if (have_copy) { + const unsigned int count = eb->buffer_count; + unsigned int i; + + for (i = 0; i < count; i++) { + const struct drm_i915_gem_exec_object2 *entry = + &eb->exec[i]; + struct drm_i915_gem_relocation_entry *relocs; + + if (!entry->relocation_count) + continue; + + relocs = u64_to_ptr(typeof(*relocs), entry->relocs_ptr); + kvfree(relocs); + } + } + + return err; +} + +static int eb_relocate(struct i915_execbuffer *eb) +{ + if (eb_lookup_vmas(eb)) + goto slow; + + /* The objects are in their final locations, apply the relocations. */ + if (eb->args->flags & __EXEC_HAS_RELOC) { + struct i915_vma *vma; + + list_for_each_entry(vma, &eb->relocs, reloc_link) { + if (eb_relocate_vma(eb, vma)) + goto slow; + } + } + + return 0; + +slow: + return eb_relocate_slow(eb); +} + +static int eb_move_to_gpu(struct i915_execbuffer *eb) +{ + const unsigned int count = eb->buffer_count; + unsigned int i; + int err; + + for (i = 0; i < count; i++) { + unsigned int flags = eb->flags[i]; + struct i915_vma *vma = eb->vma[i]; + struct drm_i915_gem_object *obj = vma->obj; + + if (flags & EXEC_OBJECT_CAPTURE) { + struct i915_capture_list *capture; + + capture = kmalloc(sizeof(*capture), GFP_KERNEL); + if (unlikely(!capture)) + return -ENOMEM; + + capture->next = eb->request->capture_list; + capture->vma = eb->vma[i]; + eb->request->capture_list = capture; + } + + /* + * If the GPU is not _reading_ through the CPU cache, we need + * to make sure that any writes (both previous GPU writes from + * before a change in snooping levels and normal CPU writes) + * caught in that cache are flushed to main memory. + * + * We want to say + * obj->cache_dirty && + * !(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ) + * but gcc's optimiser doesn't handle that as well and emits + * two jumps instead of one. Maybe one day... + */ + if (unlikely(obj->cache_dirty & ~obj->cache_coherent)) { + if (i915_gem_clflush_object(obj, 0)) + flags &= ~EXEC_OBJECT_ASYNC; + } + + if (flags & EXEC_OBJECT_ASYNC) + continue; + + err = i915_request_await_object + (eb->request, obj, flags & EXEC_OBJECT_WRITE); + if (err) + return err; + } + + for (i = 0; i < count; i++) { + unsigned int flags = eb->flags[i]; + struct i915_vma *vma = eb->vma[i]; + + err = i915_vma_move_to_active(vma, eb->request, flags); + if (unlikely(err)) { + i915_request_skip(eb->request, err); + return err; + } + + __eb_unreserve_vma(vma, flags); + vma->exec_flags = NULL; + + if (unlikely(flags & __EXEC_OBJECT_HAS_REF)) + i915_vma_put(vma); + } + eb->exec = NULL; + + /* Unconditionally flush any chipset caches (for streaming writes). */ + i915_gem_chipset_flush(eb->i915); + + return 0; +} + +static bool i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec) +{ + if (exec->flags & __I915_EXEC_ILLEGAL_FLAGS) + return false; + + /* Kernel clipping was a DRI1 misfeature */ + if (!(exec->flags & I915_EXEC_FENCE_ARRAY)) { + if (exec->num_cliprects || exec->cliprects_ptr) + return false; + } + + if (exec->DR4 == 0xffffffff) { + DRM_DEBUG("UXA submitting garbage DR4, fixing up\n"); + exec->DR4 = 0; + } + if (exec->DR1 || exec->DR4) + return false; + + if ((exec->batch_start_offset | exec->batch_len) & 0x7) + return false; + + return true; +} + +static int i915_reset_gen7_sol_offsets(struct i915_request *rq) +{ + u32 *cs; + int i; + + if (!IS_GEN(rq->i915, 7) || rq->engine->id != RCS0) { + DRM_DEBUG("sol reset is gen7/rcs only\n"); + return -EINVAL; + } + + cs = intel_ring_begin(rq, 4 * 2 + 2); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + *cs++ = MI_LOAD_REGISTER_IMM(4); + for (i = 0; i < 4; i++) { + *cs++ = i915_mmio_reg_offset(GEN7_SO_WRITE_OFFSET(i)); + *cs++ = 0; + } + *cs++ = MI_NOOP; + intel_ring_advance(rq, cs); + + return 0; +} + +static struct i915_vma *eb_parse(struct i915_execbuffer *eb, bool is_master) +{ + struct drm_i915_gem_object *shadow_batch_obj; + struct i915_vma *vma; + int err; + + shadow_batch_obj = i915_gem_batch_pool_get(&eb->engine->batch_pool, + PAGE_ALIGN(eb->batch_len)); + if (IS_ERR(shadow_batch_obj)) + return ERR_CAST(shadow_batch_obj); + + err = intel_engine_cmd_parser(eb->engine, + eb->batch->obj, + shadow_batch_obj, + eb->batch_start_offset, + eb->batch_len, + is_master); + if (err) { + if (err == -EACCES) /* unhandled chained batch */ + vma = NULL; + else + vma = ERR_PTR(err); + goto out; + } + + vma = i915_gem_object_ggtt_pin(shadow_batch_obj, NULL, 0, 0, 0); + if (IS_ERR(vma)) + goto out; + + eb->vma[eb->buffer_count] = i915_vma_get(vma); + eb->flags[eb->buffer_count] = + __EXEC_OBJECT_HAS_PIN | __EXEC_OBJECT_HAS_REF; + vma->exec_flags = &eb->flags[eb->buffer_count]; + eb->buffer_count++; + +out: + i915_gem_object_unpin_pages(shadow_batch_obj); + return vma; +} + +static void +add_to_client(struct i915_request *rq, struct drm_file *file) +{ + rq->file_priv = file->driver_priv; + list_add_tail(&rq->client_link, &rq->file_priv->mm.request_list); +} + +static int eb_submit(struct i915_execbuffer *eb) +{ + int err; + + err = eb_move_to_gpu(eb); + if (err) + return err; + + if (eb->args->flags & I915_EXEC_GEN7_SOL_RESET) { + err = i915_reset_gen7_sol_offsets(eb->request); + if (err) + return err; + } + + /* + * After we completed waiting for other engines (using HW semaphores) + * then we can signal that this request/batch is ready to run. This + * allows us to determine if the batch is still waiting on the GPU + * or actually running by checking the breadcrumb. + */ + if (eb->engine->emit_init_breadcrumb) { + err = eb->engine->emit_init_breadcrumb(eb->request); + if (err) + return err; + } + + err = eb->engine->emit_bb_start(eb->request, + eb->batch->node.start + + eb->batch_start_offset, + eb->batch_len, + eb->batch_flags); + if (err) + return err; + + return 0; +} + +/* + * Find one BSD ring to dispatch the corresponding BSD command. + * The engine index is returned. + */ +static unsigned int +gen8_dispatch_bsd_engine(struct drm_i915_private *dev_priv, + struct drm_file *file) +{ + struct drm_i915_file_private *file_priv = file->driver_priv; + + /* Check whether the file_priv has already selected one ring. */ + if ((int)file_priv->bsd_engine < 0) + file_priv->bsd_engine = atomic_fetch_xor(1, + &dev_priv->mm.bsd_engine_dispatch_index); + + return file_priv->bsd_engine; +} + +static const enum intel_engine_id user_ring_map[] = { + [I915_EXEC_DEFAULT] = RCS0, + [I915_EXEC_RENDER] = RCS0, + [I915_EXEC_BLT] = BCS0, + [I915_EXEC_BSD] = VCS0, + [I915_EXEC_VEBOX] = VECS0 +}; + +static int eb_pin_context(struct i915_execbuffer *eb, struct intel_context *ce) +{ + int err; + + /* + * ABI: Before userspace accesses the GPU (e.g. execbuffer), report + * EIO if the GPU is already wedged. + */ + err = i915_terminally_wedged(eb->i915); + if (err) + return err; + + /* + * Pinning the contexts may generate requests in order to acquire + * GGTT space, so do this first before we reserve a seqno for + * ourselves. + */ + err = intel_context_pin(ce); + if (err) + return err; + + eb->engine = ce->engine; + eb->context = ce; + return 0; +} + +static void eb_unpin_context(struct i915_execbuffer *eb) +{ + intel_context_unpin(eb->context); +} + +static unsigned int +eb_select_legacy_ring(struct i915_execbuffer *eb, + struct drm_file *file, + struct drm_i915_gem_execbuffer2 *args) +{ + struct drm_i915_private *i915 = eb->i915; + unsigned int user_ring_id = args->flags & I915_EXEC_RING_MASK; + + if (user_ring_id != I915_EXEC_BSD && + (args->flags & I915_EXEC_BSD_MASK)) { + DRM_DEBUG("execbuf with non bsd ring but with invalid " + "bsd dispatch flags: %d\n", (int)(args->flags)); + return -1; + } + + if (user_ring_id == I915_EXEC_BSD && HAS_ENGINE(i915, VCS1)) { + unsigned int bsd_idx = args->flags & I915_EXEC_BSD_MASK; + + if (bsd_idx == I915_EXEC_BSD_DEFAULT) { + bsd_idx = gen8_dispatch_bsd_engine(i915, file); + } else if (bsd_idx >= I915_EXEC_BSD_RING1 && + bsd_idx <= I915_EXEC_BSD_RING2) { + bsd_idx >>= I915_EXEC_BSD_SHIFT; + bsd_idx--; + } else { + DRM_DEBUG("execbuf with unknown bsd ring: %u\n", + bsd_idx); + return -1; + } + + return _VCS(bsd_idx); + } + + if (user_ring_id >= ARRAY_SIZE(user_ring_map)) { + DRM_DEBUG("execbuf with unknown ring: %u\n", user_ring_id); + return -1; + } + + return user_ring_map[user_ring_id]; +} + +static int +eb_select_engine(struct i915_execbuffer *eb, + struct drm_file *file, + struct drm_i915_gem_execbuffer2 *args) +{ + struct intel_context *ce; + unsigned int idx; + int err; + + if (i915_gem_context_user_engines(eb->gem_context)) + idx = args->flags & I915_EXEC_RING_MASK; + else + idx = eb_select_legacy_ring(eb, file, args); + + ce = i915_gem_context_get_engine(eb->gem_context, idx); + if (IS_ERR(ce)) + return PTR_ERR(ce); + + err = eb_pin_context(eb, ce); + intel_context_put(ce); + + return err; +} + +static void +__free_fence_array(struct drm_syncobj **fences, unsigned int n) +{ + while (n--) + drm_syncobj_put(ptr_mask_bits(fences[n], 2)); + kvfree(fences); +} + +static struct drm_syncobj ** +get_fence_array(struct drm_i915_gem_execbuffer2 *args, + struct drm_file *file) +{ + const unsigned long nfences = args->num_cliprects; + struct drm_i915_gem_exec_fence __user *user; + struct drm_syncobj **fences; + unsigned long n; + int err; + + if (!(args->flags & I915_EXEC_FENCE_ARRAY)) + return NULL; + + /* Check multiplication overflow for access_ok() and kvmalloc_array() */ + BUILD_BUG_ON(sizeof(size_t) > sizeof(unsigned long)); + if (nfences > min_t(unsigned long, + ULONG_MAX / sizeof(*user), + SIZE_MAX / sizeof(*fences))) + return ERR_PTR(-EINVAL); + + user = u64_to_user_ptr(args->cliprects_ptr); + if (!access_ok(user, nfences * sizeof(*user))) + return ERR_PTR(-EFAULT); + + fences = kvmalloc_array(nfences, sizeof(*fences), + __GFP_NOWARN | GFP_KERNEL); + if (!fences) + return ERR_PTR(-ENOMEM); + + for (n = 0; n < nfences; n++) { + struct drm_i915_gem_exec_fence fence; + struct drm_syncobj *syncobj; + + if (__copy_from_user(&fence, user++, sizeof(fence))) { + err = -EFAULT; + goto err; + } + + if (fence.flags & __I915_EXEC_FENCE_UNKNOWN_FLAGS) { + err = -EINVAL; + goto err; + } + + syncobj = drm_syncobj_find(file, fence.handle); + if (!syncobj) { + DRM_DEBUG("Invalid syncobj handle provided\n"); + err = -ENOENT; + goto err; + } + + BUILD_BUG_ON(~(ARCH_KMALLOC_MINALIGN - 1) & + ~__I915_EXEC_FENCE_UNKNOWN_FLAGS); + + fences[n] = ptr_pack_bits(syncobj, fence.flags, 2); + } + + return fences; + +err: + __free_fence_array(fences, n); + return ERR_PTR(err); +} + +static void +put_fence_array(struct drm_i915_gem_execbuffer2 *args, + struct drm_syncobj **fences) +{ + if (fences) + __free_fence_array(fences, args->num_cliprects); +} + +static int +await_fence_array(struct i915_execbuffer *eb, + struct drm_syncobj **fences) +{ + const unsigned int nfences = eb->args->num_cliprects; + unsigned int n; + int err; + + for (n = 0; n < nfences; n++) { + struct drm_syncobj *syncobj; + struct dma_fence *fence; + unsigned int flags; + + syncobj = ptr_unpack_bits(fences[n], &flags, 2); + if (!(flags & I915_EXEC_FENCE_WAIT)) + continue; + + fence = drm_syncobj_fence_get(syncobj); + if (!fence) + return -EINVAL; + + err = i915_request_await_dma_fence(eb->request, fence); + dma_fence_put(fence); + if (err < 0) + return err; + } + + return 0; +} + +static void +signal_fence_array(struct i915_execbuffer *eb, + struct drm_syncobj **fences) +{ + const unsigned int nfences = eb->args->num_cliprects; + struct dma_fence * const fence = &eb->request->fence; + unsigned int n; + + for (n = 0; n < nfences; n++) { + struct drm_syncobj *syncobj; + unsigned int flags; + + syncobj = ptr_unpack_bits(fences[n], &flags, 2); + if (!(flags & I915_EXEC_FENCE_SIGNAL)) + continue; + + drm_syncobj_replace_fence(syncobj, fence); + } +} + +static int +i915_gem_do_execbuffer(struct drm_device *dev, + struct drm_file *file, + struct drm_i915_gem_execbuffer2 *args, + struct drm_i915_gem_exec_object2 *exec, + struct drm_syncobj **fences) +{ + struct i915_execbuffer eb; + struct dma_fence *in_fence = NULL; + struct dma_fence *exec_fence = NULL; + struct sync_file *out_fence = NULL; + int out_fence_fd = -1; + int err; + + BUILD_BUG_ON(__EXEC_INTERNAL_FLAGS & ~__I915_EXEC_ILLEGAL_FLAGS); + BUILD_BUG_ON(__EXEC_OBJECT_INTERNAL_FLAGS & + ~__EXEC_OBJECT_UNKNOWN_FLAGS); + + eb.i915 = to_i915(dev); + eb.file = file; + eb.args = args; + if (DBG_FORCE_RELOC || !(args->flags & I915_EXEC_NO_RELOC)) + args->flags |= __EXEC_HAS_RELOC; + + eb.exec = exec; + eb.vma = (struct i915_vma **)(exec + args->buffer_count + 1); + eb.vma[0] = NULL; + eb.flags = (unsigned int *)(eb.vma + args->buffer_count + 1); + + eb.invalid_flags = __EXEC_OBJECT_UNKNOWN_FLAGS; + reloc_cache_init(&eb.reloc_cache, eb.i915); + + eb.buffer_count = args->buffer_count; + eb.batch_start_offset = args->batch_start_offset; + eb.batch_len = args->batch_len; + + eb.batch_flags = 0; + if (args->flags & I915_EXEC_SECURE) { + if (!drm_is_current_master(file) || !capable(CAP_SYS_ADMIN)) + return -EPERM; + + eb.batch_flags |= I915_DISPATCH_SECURE; + } + if (args->flags & I915_EXEC_IS_PINNED) + eb.batch_flags |= I915_DISPATCH_PINNED; + + if (args->flags & I915_EXEC_FENCE_IN) { + in_fence = sync_file_get_fence(lower_32_bits(args->rsvd2)); + if (!in_fence) + return -EINVAL; + } + + if (args->flags & I915_EXEC_FENCE_SUBMIT) { + if (in_fence) { + err = -EINVAL; + goto err_in_fence; + } + + exec_fence = sync_file_get_fence(lower_32_bits(args->rsvd2)); + if (!exec_fence) { + err = -EINVAL; + goto err_in_fence; + } + } + + if (args->flags & I915_EXEC_FENCE_OUT) { + out_fence_fd = get_unused_fd_flags(O_CLOEXEC); + if (out_fence_fd < 0) { + err = out_fence_fd; + goto err_exec_fence; + } + } + + err = eb_create(&eb); + if (err) + goto err_out_fence; + + GEM_BUG_ON(!eb.lut_size); + + err = eb_select_context(&eb); + if (unlikely(err)) + goto err_destroy; + + /* + * Take a local wakeref for preparing to dispatch the execbuf as + * we expect to access the hardware fairly frequently in the + * process. Upon first dispatch, we acquire another prolonged + * wakeref that we hold until the GPU has been idle for at least + * 100ms. + */ + intel_gt_pm_get(eb.i915); + + err = i915_mutex_lock_interruptible(dev); + if (err) + goto err_rpm; + + err = eb_select_engine(&eb, file, args); + if (unlikely(err)) + goto err_unlock; + + err = eb_wait_for_ring(&eb); /* may temporarily drop struct_mutex */ + if (unlikely(err)) + goto err_engine; + + err = eb_relocate(&eb); + if (err) { + /* + * If the user expects the execobject.offset and + * reloc.presumed_offset to be an exact match, + * as for using NO_RELOC, then we cannot update + * the execobject.offset until we have completed + * relocation. + */ + args->flags &= ~__EXEC_HAS_RELOC; + goto err_vma; + } + + if (unlikely(*eb.batch->exec_flags & EXEC_OBJECT_WRITE)) { + DRM_DEBUG("Attempting to use self-modifying batch buffer\n"); + err = -EINVAL; + goto err_vma; + } + if (eb.batch_start_offset > eb.batch->size || + eb.batch_len > eb.batch->size - eb.batch_start_offset) { + DRM_DEBUG("Attempting to use out-of-bounds batch\n"); + err = -EINVAL; + goto err_vma; + } + + if (eb_use_cmdparser(&eb)) { + struct i915_vma *vma; + + vma = eb_parse(&eb, drm_is_current_master(file)); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto err_vma; + } + + if (vma) { + /* + * Batch parsed and accepted: + * + * Set the DISPATCH_SECURE bit to remove the NON_SECURE + * bit from MI_BATCH_BUFFER_START commands issued in + * the dispatch_execbuffer implementations. We + * specifically don't want that set on batches the + * command parser has accepted. + */ + eb.batch_flags |= I915_DISPATCH_SECURE; + eb.batch_start_offset = 0; + eb.batch = vma; + } + } + + if (eb.batch_len == 0) + eb.batch_len = eb.batch->size - eb.batch_start_offset; + + /* + * snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure + * batch" bit. Hence we need to pin secure batches into the global gtt. + * hsw should have this fixed, but bdw mucks it up again. */ + if (eb.batch_flags & I915_DISPATCH_SECURE) { + struct i915_vma *vma; + + /* + * So on first glance it looks freaky that we pin the batch here + * outside of the reservation loop. But: + * - The batch is already pinned into the relevant ppgtt, so we + * already have the backing storage fully allocated. + * - No other BO uses the global gtt (well contexts, but meh), + * so we don't really have issues with multiple objects not + * fitting due to fragmentation. + * So this is actually safe. + */ + vma = i915_gem_object_ggtt_pin(eb.batch->obj, NULL, 0, 0, 0); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto err_vma; + } + + eb.batch = vma; + } + + /* All GPU relocation batches must be submitted prior to the user rq */ + GEM_BUG_ON(eb.reloc_cache.rq); + + /* Allocate a request for this batch buffer nice and early. */ + eb.request = i915_request_create(eb.context); + if (IS_ERR(eb.request)) { + err = PTR_ERR(eb.request); + goto err_batch_unpin; + } + + if (in_fence) { + err = i915_request_await_dma_fence(eb.request, in_fence); + if (err < 0) + goto err_request; + } + + if (exec_fence) { + err = i915_request_await_execution(eb.request, exec_fence, + eb.engine->bond_execute); + if (err < 0) + goto err_request; + } + + if (fences) { + err = await_fence_array(&eb, fences); + if (err) + goto err_request; + } + + if (out_fence_fd != -1) { + out_fence = sync_file_create(&eb.request->fence); + if (!out_fence) { + err = -ENOMEM; + goto err_request; + } + } + + /* + * Whilst this request exists, batch_obj will be on the + * active_list, and so will hold the active reference. Only when this + * request is retired will the the batch_obj be moved onto the + * inactive_list and lose its active reference. Hence we do not need + * to explicitly hold another reference here. + */ + eb.request->batch = eb.batch; + + trace_i915_request_queue(eb.request, eb.batch_flags); + err = eb_submit(&eb); +err_request: + add_to_client(eb.request, file); + i915_request_add(eb.request); + + if (fences) + signal_fence_array(&eb, fences); + + if (out_fence) { + if (err == 0) { + fd_install(out_fence_fd, out_fence->file); + args->rsvd2 &= GENMASK_ULL(31, 0); /* keep in-fence */ + args->rsvd2 |= (u64)out_fence_fd << 32; + out_fence_fd = -1; + } else { + fput(out_fence->file); + } + } + +err_batch_unpin: + if (eb.batch_flags & I915_DISPATCH_SECURE) + i915_vma_unpin(eb.batch); +err_vma: + if (eb.exec) + eb_release_vmas(&eb); +err_engine: + eb_unpin_context(&eb); +err_unlock: + mutex_unlock(&dev->struct_mutex); +err_rpm: + intel_gt_pm_put(eb.i915); + i915_gem_context_put(eb.gem_context); +err_destroy: + eb_destroy(&eb); +err_out_fence: + if (out_fence_fd != -1) + put_unused_fd(out_fence_fd); +err_exec_fence: + dma_fence_put(exec_fence); +err_in_fence: + dma_fence_put(in_fence); + return err; +} + +static size_t eb_element_size(void) +{ + return (sizeof(struct drm_i915_gem_exec_object2) + + sizeof(struct i915_vma *) + + sizeof(unsigned int)); +} + +static bool check_buffer_count(size_t count) +{ + const size_t sz = eb_element_size(); + + /* + * When using LUT_HANDLE, we impose a limit of INT_MAX for the lookup + * array size (see eb_create()). Otherwise, we can accept an array as + * large as can be addressed (though use large arrays at your peril)! + */ + + return !(count < 1 || count > INT_MAX || count > SIZE_MAX / sz - 1); +} + +/* + * Legacy execbuffer just creates an exec2 list from the original exec object + * list array and passes it to the real function. + */ +int +i915_gem_execbuffer_ioctl(struct drm_device *dev, void *data, + struct drm_file *file) +{ + struct drm_i915_gem_execbuffer *args = data; + struct drm_i915_gem_execbuffer2 exec2; + struct drm_i915_gem_exec_object *exec_list = NULL; + struct drm_i915_gem_exec_object2 *exec2_list = NULL; + const size_t count = args->buffer_count; + unsigned int i; + int err; + + if (!check_buffer_count(count)) { + DRM_DEBUG("execbuf2 with %zd buffers\n", count); + return -EINVAL; + } + + exec2.buffers_ptr = args->buffers_ptr; + exec2.buffer_count = args->buffer_count; + exec2.batch_start_offset = args->batch_start_offset; + exec2.batch_len = args->batch_len; + exec2.DR1 = args->DR1; + exec2.DR4 = args->DR4; + exec2.num_cliprects = args->num_cliprects; + exec2.cliprects_ptr = args->cliprects_ptr; + exec2.flags = I915_EXEC_RENDER; + i915_execbuffer2_set_context_id(exec2, 0); + + if (!i915_gem_check_execbuffer(&exec2)) + return -EINVAL; + + /* Copy in the exec list from userland */ + exec_list = kvmalloc_array(count, sizeof(*exec_list), + __GFP_NOWARN | GFP_KERNEL); + exec2_list = kvmalloc_array(count + 1, eb_element_size(), + __GFP_NOWARN | GFP_KERNEL); + if (exec_list == NULL || exec2_list == NULL) { + DRM_DEBUG("Failed to allocate exec list for %d buffers\n", + args->buffer_count); + kvfree(exec_list); + kvfree(exec2_list); + return -ENOMEM; + } + err = copy_from_user(exec_list, + u64_to_user_ptr(args->buffers_ptr), + sizeof(*exec_list) * count); + if (err) { + DRM_DEBUG("copy %d exec entries failed %d\n", + args->buffer_count, err); + kvfree(exec_list); + kvfree(exec2_list); + return -EFAULT; + } + + for (i = 0; i < args->buffer_count; i++) { + exec2_list[i].handle = exec_list[i].handle; + exec2_list[i].relocation_count = exec_list[i].relocation_count; + exec2_list[i].relocs_ptr = exec_list[i].relocs_ptr; + exec2_list[i].alignment = exec_list[i].alignment; + exec2_list[i].offset = exec_list[i].offset; + if (INTEL_GEN(to_i915(dev)) < 4) + exec2_list[i].flags = EXEC_OBJECT_NEEDS_FENCE; + else + exec2_list[i].flags = 0; + } + + err = i915_gem_do_execbuffer(dev, file, &exec2, exec2_list, NULL); + if (exec2.flags & __EXEC_HAS_RELOC) { + struct drm_i915_gem_exec_object __user *user_exec_list = + u64_to_user_ptr(args->buffers_ptr); + + /* Copy the new buffer offsets back to the user's exec list. */ + for (i = 0; i < args->buffer_count; i++) { + if (!(exec2_list[i].offset & UPDATE)) + continue; + + exec2_list[i].offset = + gen8_canonical_addr(exec2_list[i].offset & PIN_OFFSET_MASK); + exec2_list[i].offset &= PIN_OFFSET_MASK; + if (__copy_to_user(&user_exec_list[i].offset, + &exec2_list[i].offset, + sizeof(user_exec_list[i].offset))) + break; + } + } + + kvfree(exec_list); + kvfree(exec2_list); + return err; +} + +int +i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data, + struct drm_file *file) +{ + struct drm_i915_gem_execbuffer2 *args = data; + struct drm_i915_gem_exec_object2 *exec2_list; + struct drm_syncobj **fences = NULL; + const size_t count = args->buffer_count; + int err; + + if (!check_buffer_count(count)) { + DRM_DEBUG("execbuf2 with %zd buffers\n", count); + return -EINVAL; + } + + if (!i915_gem_check_execbuffer(args)) + return -EINVAL; + + /* Allocate an extra slot for use by the command parser */ + exec2_list = kvmalloc_array(count + 1, eb_element_size(), + __GFP_NOWARN | GFP_KERNEL); + if (exec2_list == NULL) { + DRM_DEBUG("Failed to allocate exec list for %zd buffers\n", + count); + return -ENOMEM; + } + if (copy_from_user(exec2_list, + u64_to_user_ptr(args->buffers_ptr), + sizeof(*exec2_list) * count)) { + DRM_DEBUG("copy %zd exec entries failed\n", count); + kvfree(exec2_list); + return -EFAULT; + } + + if (args->flags & I915_EXEC_FENCE_ARRAY) { + fences = get_fence_array(args, file); + if (IS_ERR(fences)) { + kvfree(exec2_list); + return PTR_ERR(fences); + } + } + + err = i915_gem_do_execbuffer(dev, file, args, exec2_list, fences); + + /* + * Now that we have begun execution of the batchbuffer, we ignore + * any new error after this point. Also given that we have already + * updated the associated relocations, we try to write out the current + * object locations irrespective of any error. + */ + if (args->flags & __EXEC_HAS_RELOC) { + struct drm_i915_gem_exec_object2 __user *user_exec_list = + u64_to_user_ptr(args->buffers_ptr); + unsigned int i; + + /* Copy the new buffer offsets back to the user's exec list. */ + /* + * Note: count * sizeof(*user_exec_list) does not overflow, + * because we checked 'count' in check_buffer_count(). + * + * And this range already got effectively checked earlier + * when we did the "copy_from_user()" above. + */ + if (!user_access_begin(user_exec_list, count * sizeof(*user_exec_list))) + goto end; + + for (i = 0; i < args->buffer_count; i++) { + if (!(exec2_list[i].offset & UPDATE)) + continue; + + exec2_list[i].offset = + gen8_canonical_addr(exec2_list[i].offset & PIN_OFFSET_MASK); + unsafe_put_user(exec2_list[i].offset, + &user_exec_list[i].offset, + end_user); + } +end_user: + user_access_end(); +end:; + } + + args->flags &= ~__I915_EXEC_UNKNOWN_FLAGS; + put_fence_array(args, fences); + kvfree(exec2_list); + return err; +} diff --git a/drivers/gpu/drm/i915/gem/i915_gem_internal.c b/drivers/gpu/drm/i915/gem/i915_gem_internal.c new file mode 100644 index 000000000000..85a05a2435e9 --- /dev/null +++ b/drivers/gpu/drm/i915/gem/i915_gem_internal.c @@ -0,0 +1,197 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2014-2016 Intel Corporation + */ + +#include +#include +#include + +#include + +#include "i915_drv.h" +#include "i915_gem.h" +#include "i915_gem_object.h" +#include "i915_utils.h" + +#define QUIET (__GFP_NORETRY | __GFP_NOWARN) +#define MAYFAIL (__GFP_RETRY_MAYFAIL | __GFP_NOWARN) + +static void internal_free_pages(struct sg_table *st) +{ + struct scatterlist *sg; + + for (sg = st->sgl; sg; sg = __sg_next(sg)) { + if (sg_page(sg)) + __free_pages(sg_page(sg), get_order(sg->length)); + } + + sg_free_table(st); + kfree(st); +} + +static int i915_gem_object_get_pages_internal(struct drm_i915_gem_object *obj) +{ + struct drm_i915_private *i915 = to_i915(obj->base.dev); + struct sg_table *st; + struct scatterlist *sg; + unsigned int sg_page_sizes; + unsigned int npages; + int max_order; + gfp_t gfp; + + max_order = MAX_ORDER; +#ifdef CONFIG_SWIOTLB + if (swiotlb_nr_tbl()) { + unsigned int max_segment; + + max_segment = swiotlb_max_segment(); + if (max_segment) { + max_segment = max_t(unsigned int, max_segment, + PAGE_SIZE) >> PAGE_SHIFT; + max_order = min(max_order, ilog2(max_segment)); + } + } +#endif + + gfp = GFP_KERNEL | __GFP_HIGHMEM | __GFP_RECLAIMABLE; + if (IS_I965GM(i915) || IS_I965G(i915)) { + /* 965gm cannot relocate objects above 4GiB. */ + gfp &= ~__GFP_HIGHMEM; + gfp |= __GFP_DMA32; + } + +create_st: + st = kmalloc(sizeof(*st), GFP_KERNEL); + if (!st) + return -ENOMEM; + + npages = obj->base.size / PAGE_SIZE; + if (sg_alloc_table(st, npages, GFP_KERNEL)) { + kfree(st); + return -ENOMEM; + } + + sg = st->sgl; + st->nents = 0; + sg_page_sizes = 0; + + do { + int order = min(fls(npages) - 1, max_order); + struct page *page; + + do { + page = alloc_pages(gfp | (order ? QUIET : MAYFAIL), + order); + if (page) + break; + if (!order--) + goto err; + + /* Limit subsequent allocations as well */ + max_order = order; + } while (1); + + sg_set_page(sg, page, PAGE_SIZE << order, 0); + sg_page_sizes |= PAGE_SIZE << order; + st->nents++; + + npages -= 1 << order; + if (!npages) { + sg_mark_end(sg); + break; + } + + sg = __sg_next(sg); + } while (1); + + if (i915_gem_gtt_prepare_pages(obj, st)) { + /* Failed to dma-map try again with single page sg segments */ + if (get_order(st->sgl->length)) { + internal_free_pages(st); + max_order = 0; + goto create_st; + } + goto err; + } + + /* Mark the pages as dontneed whilst they are still pinned. As soon + * as they are unpinned they are allowed to be reaped by the shrinker, + * and the caller is expected to repopulate - the contents of this + * object are only valid whilst active and pinned. + */ + obj->mm.madv = I915_MADV_DONTNEED; + + __i915_gem_object_set_pages(obj, st, sg_page_sizes); + + return 0; + +err: + sg_set_page(sg, NULL, 0, 0); + sg_mark_end(sg); + internal_free_pages(st); + + return -ENOMEM; +} + +static void i915_gem_object_put_pages_internal(struct drm_i915_gem_object *obj, + struct sg_table *pages) +{ + i915_gem_gtt_finish_pages(obj, pages); + internal_free_pages(pages); + + obj->mm.dirty = false; + obj->mm.madv = I915_MADV_WILLNEED; +} + +static const struct drm_i915_gem_object_ops i915_gem_object_internal_ops = { + .flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE | + I915_GEM_OBJECT_IS_SHRINKABLE, + .get_pages = i915_gem_object_get_pages_internal, + .put_pages = i915_gem_object_put_pages_internal, +}; + +/** + * i915_gem_object_create_internal: create an object with volatile pages + * @i915: the i915 device + * @size: the size in bytes of backing storage to allocate for the object + * + * Creates a new object that wraps some internal memory for private use. + * This object is not backed by swappable storage, and as such its contents + * are volatile and only valid whilst pinned. If the object is reaped by the + * shrinker, its pages and data will be discarded. Equally, it is not a full + * GEM object and so not valid for access from userspace. This makes it useful + * for hardware interfaces like ringbuffers (which are pinned from the time + * the request is written to the time the hardware stops accessing it), but + * not for contexts (which need to be preserved when not active for later + * reuse). Note that it is not cleared upon allocation. + */ +struct drm_i915_gem_object * +i915_gem_object_create_internal(struct drm_i915_private *i915, + phys_addr_t size) +{ + struct drm_i915_gem_object *obj; + unsigned int cache_level; + + GEM_BUG_ON(!size); + GEM_BUG_ON(!IS_ALIGNED(size, PAGE_SIZE)); + + if (overflows_type(size, obj->base.size)) + return ERR_PTR(-E2BIG); + + obj = i915_gem_object_alloc(); + if (!obj) + return ERR_PTR(-ENOMEM); + + drm_gem_private_object_init(&i915->drm, &obj->base, size); + i915_gem_object_init(obj, &i915_gem_object_internal_ops); + + obj->read_domains = I915_GEM_DOMAIN_CPU; + obj->write_domain = I915_GEM_DOMAIN_CPU; + + cache_level = HAS_LLC(i915) ? I915_CACHE_LLC : I915_CACHE_NONE; + i915_gem_object_set_cache_coherency(obj, cache_level); + + return obj; +} diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c index 4ed28ac9ab3a..457e694a5c3f 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c @@ -23,8 +23,9 @@ */ #include "i915_drv.h" -#include "i915_gem_object.h" #include "i915_gem_clflush.h" +#include "i915_gem_context.h" +#include "i915_gem_object.h" #include "i915_globals.h" #include "intel_frontbuffer.h" @@ -442,3 +443,10 @@ int __init i915_global_objects_init(void) i915_global_register(&global.base); return 0; } + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftests/huge_gem_object.c" +#include "selftests/huge_pages.c" +#include "selftests/i915_gem_object.c" +#include "selftests/i915_gem_coherency.c" +#endif diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_pm.c new file mode 100644 index 000000000000..ad662e558dfb --- /dev/null +++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c @@ -0,0 +1,251 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#include "gem/i915_gem_pm.h" +#include "gt/intel_gt_pm.h" + +#include "i915_drv.h" +#include "i915_globals.h" + +static void i915_gem_park(struct drm_i915_private *i915) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + + lockdep_assert_held(&i915->drm.struct_mutex); + + for_each_engine(engine, i915, id) + i915_gem_batch_pool_fini(&engine->batch_pool); + + i915_timelines_park(i915); + i915_vma_parked(i915); + + i915_globals_park(); +} + +static void idle_work_handler(struct work_struct *work) +{ + struct drm_i915_private *i915 = + container_of(work, typeof(*i915), gem.idle_work); + bool restart = true; + + cancel_delayed_work(&i915->gem.retire_work); + mutex_lock(&i915->drm.struct_mutex); + + intel_wakeref_lock(&i915->gt.wakeref); + if (!intel_wakeref_active(&i915->gt.wakeref) && !work_pending(work)) { + i915_gem_park(i915); + restart = false; + } + intel_wakeref_unlock(&i915->gt.wakeref); + + mutex_unlock(&i915->drm.struct_mutex); + if (restart) + queue_delayed_work(i915->wq, + &i915->gem.retire_work, + round_jiffies_up_relative(HZ)); +} + +static void retire_work_handler(struct work_struct *work) +{ + struct drm_i915_private *i915 = + container_of(work, typeof(*i915), gem.retire_work.work); + + /* Come back later if the device is busy... */ + if (mutex_trylock(&i915->drm.struct_mutex)) { + i915_retire_requests(i915); + mutex_unlock(&i915->drm.struct_mutex); + } + + queue_delayed_work(i915->wq, + &i915->gem.retire_work, + round_jiffies_up_relative(HZ)); +} + +static int pm_notifier(struct notifier_block *nb, + unsigned long action, + void *data) +{ + struct drm_i915_private *i915 = + container_of(nb, typeof(*i915), gem.pm_notifier); + + switch (action) { + case INTEL_GT_UNPARK: + i915_globals_unpark(); + queue_delayed_work(i915->wq, + &i915->gem.retire_work, + round_jiffies_up_relative(HZ)); + break; + + case INTEL_GT_PARK: + queue_work(i915->wq, &i915->gem.idle_work); + break; + } + + return NOTIFY_OK; +} + +static bool switch_to_kernel_context_sync(struct drm_i915_private *i915) +{ + bool result = true; + + do { + if (i915_gem_wait_for_idle(i915, + I915_WAIT_LOCKED | + I915_WAIT_FOR_IDLE_BOOST, + I915_GEM_IDLE_TIMEOUT) == -ETIME) { + /* XXX hide warning from gem_eio */ + if (i915_modparams.reset) { + dev_err(i915->drm.dev, + "Failed to idle engines, declaring wedged!\n"); + GEM_TRACE_DUMP(); + } + + /* + * Forcibly cancel outstanding work and leave + * the gpu quiet. + */ + i915_gem_set_wedged(i915); + result = false; + } + } while (i915_retire_requests(i915) && result); + + GEM_BUG_ON(i915->gt.awake); + return result; +} + +bool i915_gem_load_power_context(struct drm_i915_private *i915) +{ + return switch_to_kernel_context_sync(i915); +} + +void i915_gem_suspend(struct drm_i915_private *i915) +{ + GEM_TRACE("\n"); + + intel_wakeref_auto(&i915->mm.userfault_wakeref, 0); + flush_workqueue(i915->wq); + + mutex_lock(&i915->drm.struct_mutex); + + /* + * We have to flush all the executing contexts to main memory so + * that they can saved in the hibernation image. To ensure the last + * context image is coherent, we have to switch away from it. That + * leaves the i915->kernel_context still active when + * we actually suspend, and its image in memory may not match the GPU + * state. Fortunately, the kernel_context is disposable and we do + * not rely on its state. + */ + switch_to_kernel_context_sync(i915); + + mutex_unlock(&i915->drm.struct_mutex); + + /* + * Assert that we successfully flushed all the work and + * reset the GPU back to its idle, low power state. + */ + GEM_BUG_ON(i915->gt.awake); + flush_work(&i915->gem.idle_work); + + cancel_delayed_work_sync(&i915->gpu_error.hangcheck_work); + + i915_gem_drain_freed_objects(i915); + + intel_uc_suspend(i915); +} + +void i915_gem_suspend_late(struct drm_i915_private *i915) +{ + struct drm_i915_gem_object *obj; + struct list_head *phases[] = { + &i915->mm.unbound_list, + &i915->mm.bound_list, + NULL + }, **phase; + + /* + * Neither the BIOS, ourselves or any other kernel + * expects the system to be in execlists mode on startup, + * so we need to reset the GPU back to legacy mode. And the only + * known way to disable logical contexts is through a GPU reset. + * + * So in order to leave the system in a known default configuration, + * always reset the GPU upon unload and suspend. Afterwards we then + * clean up the GEM state tracking, flushing off the requests and + * leaving the system in a known idle state. + * + * Note that is of the upmost importance that the GPU is idle and + * all stray writes are flushed *before* we dismantle the backing + * storage for the pinned objects. + * + * However, since we are uncertain that resetting the GPU on older + * machines is a good idea, we don't - just in case it leaves the + * machine in an unusable condition. + */ + + mutex_lock(&i915->drm.struct_mutex); + for (phase = phases; *phase; phase++) { + list_for_each_entry(obj, *phase, mm.link) + WARN_ON(i915_gem_object_set_to_gtt_domain(obj, false)); + } + mutex_unlock(&i915->drm.struct_mutex); + + intel_uc_sanitize(i915); + i915_gem_sanitize(i915); +} + +void i915_gem_resume(struct drm_i915_private *i915) +{ + GEM_TRACE("\n"); + + WARN_ON(i915->gt.awake); + + mutex_lock(&i915->drm.struct_mutex); + intel_uncore_forcewake_get(&i915->uncore, FORCEWAKE_ALL); + + i915_gem_restore_gtt_mappings(i915); + i915_gem_restore_fences(i915); + + /* + * As we didn't flush the kernel context before suspend, we cannot + * guarantee that the context image is complete. So let's just reset + * it and start again. + */ + intel_gt_resume(i915); + + if (i915_gem_init_hw(i915)) + goto err_wedged; + + intel_uc_resume(i915); + + /* Always reload a context for powersaving. */ + if (!i915_gem_load_power_context(i915)) + goto err_wedged; + +out_unlock: + intel_uncore_forcewake_put(&i915->uncore, FORCEWAKE_ALL); + mutex_unlock(&i915->drm.struct_mutex); + return; + +err_wedged: + if (!i915_reset_failed(i915)) { + dev_err(i915->drm.dev, + "Failed to re-initialize GPU, declaring it wedged!\n"); + i915_gem_set_wedged(i915); + } + goto out_unlock; +} + +void i915_gem_init__pm(struct drm_i915_private *i915) +{ + INIT_WORK(&i915->gem.idle_work, idle_work_handler); + INIT_DELAYED_WORK(&i915->gem.retire_work, retire_work_handler); + + i915->gem.pm_notifier.notifier_call = pm_notifier; + blocking_notifier_chain_register(&i915->gt.pm_notifications, + &i915->gem.pm_notifier); +} diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.h b/drivers/gpu/drm/i915/gem/i915_gem_pm.h new file mode 100644 index 000000000000..6f7d5d11ac3b --- /dev/null +++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.h @@ -0,0 +1,25 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#ifndef __I915_GEM_PM_H__ +#define __I915_GEM_PM_H__ + +#include + +struct drm_i915_private; +struct work_struct; + +void i915_gem_init__pm(struct drm_i915_private *i915); + +bool i915_gem_load_power_context(struct drm_i915_private *i915); +void i915_gem_resume(struct drm_i915_private *i915); + +void i915_gem_idle_work_handler(struct work_struct *work); + +void i915_gem_suspend(struct drm_i915_private *i915); +void i915_gem_suspend_late(struct drm_i915_private *i915); + +#endif /* __I915_GEM_PM_H__ */ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c new file mode 100644 index 000000000000..cd42299f019a --- /dev/null +++ b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c @@ -0,0 +1,555 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2008-2015 Intel Corporation + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "i915_trace.h" + +static bool shrinker_lock(struct drm_i915_private *i915, + unsigned int flags, + bool *unlock) +{ + struct mutex *m = &i915->drm.struct_mutex; + + switch (mutex_trylock_recursive(m)) { + case MUTEX_TRYLOCK_RECURSIVE: + *unlock = false; + return true; + + case MUTEX_TRYLOCK_FAILED: + *unlock = false; + if (flags & I915_SHRINK_ACTIVE && + mutex_lock_killable_nested(m, I915_MM_SHRINKER) == 0) + *unlock = true; + return *unlock; + + case MUTEX_TRYLOCK_SUCCESS: + *unlock = true; + return true; + } + + BUG(); +} + +static void shrinker_unlock(struct drm_i915_private *i915, bool unlock) +{ + if (!unlock) + return; + + mutex_unlock(&i915->drm.struct_mutex); +} + +static bool swap_available(void) +{ + return get_nr_swap_pages() > 0; +} + +static bool can_release_pages(struct drm_i915_gem_object *obj) +{ + /* Consider only shrinkable ojects. */ + if (!i915_gem_object_is_shrinkable(obj)) + return false; + + /* Only report true if by unbinding the object and putting its pages + * we can actually make forward progress towards freeing physical + * pages. + * + * If the pages are pinned for any other reason than being bound + * to the GPU, simply unbinding from the GPU is not going to succeed + * in releasing our pin count on the pages themselves. + */ + if (atomic_read(&obj->mm.pages_pin_count) > obj->bind_count) + return false; + + /* If any vma are "permanently" pinned, it will prevent us from + * reclaiming the obj->mm.pages. We only allow scanout objects to claim + * a permanent pin, along with a few others like the context objects. + * To simplify the scan, and to avoid walking the list of vma under the + * object, we just check the count of its permanently pinned. + */ + if (READ_ONCE(obj->pin_global)) + return false; + + /* We can only return physical pages to the system if we can either + * discard the contents (because the user has marked them as being + * purgeable) or if we can move their contents out to swap. + */ + return swap_available() || obj->mm.madv == I915_MADV_DONTNEED; +} + +static bool unsafe_drop_pages(struct drm_i915_gem_object *obj) +{ + if (i915_gem_object_unbind(obj) == 0) + __i915_gem_object_put_pages(obj, I915_MM_SHRINKER); + return !i915_gem_object_has_pages(obj); +} + +static void try_to_writeback(struct drm_i915_gem_object *obj, + unsigned int flags) +{ + switch (obj->mm.madv) { + case I915_MADV_DONTNEED: + i915_gem_object_truncate(obj); + case __I915_MADV_PURGED: + return; + } + + if (flags & I915_SHRINK_WRITEBACK) + i915_gem_object_writeback(obj); +} + +/** + * i915_gem_shrink - Shrink buffer object caches + * @i915: i915 device + * @target: amount of memory to make available, in pages + * @nr_scanned: optional output for number of pages scanned (incremental) + * @flags: control flags for selecting cache types + * + * This function is the main interface to the shrinker. It will try to release + * up to @target pages of main memory backing storage from buffer objects. + * Selection of the specific caches can be done with @flags. This is e.g. useful + * when purgeable objects should be removed from caches preferentially. + * + * Note that it's not guaranteed that released amount is actually available as + * free system memory - the pages might still be in-used to due to other reasons + * (like cpu mmaps) or the mm core has reused them before we could grab them. + * Therefore code that needs to explicitly shrink buffer objects caches (e.g. to + * avoid deadlocks in memory reclaim) must fall back to i915_gem_shrink_all(). + * + * Also note that any kind of pinning (both per-vma address space pins and + * backing storage pins at the buffer object level) result in the shrinker code + * having to skip the object. + * + * Returns: + * The number of pages of backing storage actually released. + */ +unsigned long +i915_gem_shrink(struct drm_i915_private *i915, + unsigned long target, + unsigned long *nr_scanned, + unsigned flags) +{ + const struct { + struct list_head *list; + unsigned int bit; + } phases[] = { + { &i915->mm.unbound_list, I915_SHRINK_UNBOUND }, + { &i915->mm.bound_list, I915_SHRINK_BOUND }, + { NULL, 0 }, + }, *phase; + intel_wakeref_t wakeref = 0; + unsigned long count = 0; + unsigned long scanned = 0; + bool unlock; + + if (!shrinker_lock(i915, flags, &unlock)) + return 0; + + /* + * When shrinking the active list, also consider active contexts. + * Active contexts are pinned until they are retired, and so can + * not be simply unbound to retire and unpin their pages. To shrink + * the contexts, we must wait until the gpu is idle. + * + * We don't care about errors here; if we cannot wait upon the GPU, + * we will free as much as we can and hope to get a second chance. + */ + if (flags & I915_SHRINK_ACTIVE) + i915_gem_wait_for_idle(i915, + I915_WAIT_LOCKED, + MAX_SCHEDULE_TIMEOUT); + + trace_i915_gem_shrink(i915, target, flags); + i915_retire_requests(i915); + + /* + * Unbinding of objects will require HW access; Let us not wake the + * device just to recover a little memory. If absolutely necessary, + * we will force the wake during oom-notifier. + */ + if (flags & I915_SHRINK_BOUND) { + wakeref = intel_runtime_pm_get_if_in_use(i915); + if (!wakeref) + flags &= ~I915_SHRINK_BOUND; + } + + /* + * As we may completely rewrite the (un)bound list whilst unbinding + * (due to retiring requests) we have to strictly process only + * one element of the list at the time, and recheck the list + * on every iteration. + * + * In particular, we must hold a reference whilst removing the + * object as we may end up waiting for and/or retiring the objects. + * This might release the final reference (held by the active list) + * and result in the object being freed from under us. This is + * similar to the precautions the eviction code must take whilst + * removing objects. + * + * Also note that although these lists do not hold a reference to + * the object we can safely grab one here: The final object + * unreferencing and the bound_list are both protected by the + * dev->struct_mutex and so we won't ever be able to observe an + * object on the bound_list with a reference count equals 0. + */ + for (phase = phases; phase->list; phase++) { + struct list_head still_in_list; + struct drm_i915_gem_object *obj; + + if ((flags & phase->bit) == 0) + continue; + + INIT_LIST_HEAD(&still_in_list); + + /* + * We serialize our access to unreferenced objects through + * the use of the struct_mutex. While the objects are not + * yet freed (due to RCU then a workqueue) we still want + * to be able to shrink their pages, so they remain on + * the unbound/bound list until actually freed. + */ + spin_lock(&i915->mm.obj_lock); + while (count < target && + (obj = list_first_entry_or_null(phase->list, + typeof(*obj), + mm.link))) { + list_move_tail(&obj->mm.link, &still_in_list); + + if (flags & I915_SHRINK_PURGEABLE && + obj->mm.madv != I915_MADV_DONTNEED) + continue; + + if (flags & I915_SHRINK_VMAPS && + !is_vmalloc_addr(obj->mm.mapping)) + continue; + + if (!(flags & I915_SHRINK_ACTIVE) && + (i915_gem_object_is_active(obj) || + i915_gem_object_is_framebuffer(obj))) + continue; + + if (!can_release_pages(obj)) + continue; + + spin_unlock(&i915->mm.obj_lock); + + if (unsafe_drop_pages(obj)) { + /* May arrive from get_pages on another bo */ + mutex_lock_nested(&obj->mm.lock, + I915_MM_SHRINKER); + if (!i915_gem_object_has_pages(obj)) { + try_to_writeback(obj, flags); + count += obj->base.size >> PAGE_SHIFT; + } + mutex_unlock(&obj->mm.lock); + } + scanned += obj->base.size >> PAGE_SHIFT; + + spin_lock(&i915->mm.obj_lock); + } + list_splice_tail(&still_in_list, phase->list); + spin_unlock(&i915->mm.obj_lock); + } + + if (flags & I915_SHRINK_BOUND) + intel_runtime_pm_put(i915, wakeref); + + i915_retire_requests(i915); + + shrinker_unlock(i915, unlock); + + if (nr_scanned) + *nr_scanned += scanned; + return count; +} + +/** + * i915_gem_shrink_all - Shrink buffer object caches completely + * @i915: i915 device + * + * This is a simple wraper around i915_gem_shrink() to aggressively shrink all + * caches completely. It also first waits for and retires all outstanding + * requests to also be able to release backing storage for active objects. + * + * This should only be used in code to intentionally quiescent the gpu or as a + * last-ditch effort when memory seems to have run out. + * + * Returns: + * The number of pages of backing storage actually released. + */ +unsigned long i915_gem_shrink_all(struct drm_i915_private *i915) +{ + intel_wakeref_t wakeref; + unsigned long freed = 0; + + with_intel_runtime_pm(i915, wakeref) { + freed = i915_gem_shrink(i915, -1UL, NULL, + I915_SHRINK_BOUND | + I915_SHRINK_UNBOUND | + I915_SHRINK_ACTIVE); + } + + return freed; +} + +static unsigned long +i915_gem_shrinker_count(struct shrinker *shrinker, struct shrink_control *sc) +{ + struct drm_i915_private *i915 = + container_of(shrinker, struct drm_i915_private, mm.shrinker); + struct drm_i915_gem_object *obj; + unsigned long num_objects = 0; + unsigned long count = 0; + + spin_lock(&i915->mm.obj_lock); + list_for_each_entry(obj, &i915->mm.unbound_list, mm.link) + if (can_release_pages(obj)) { + count += obj->base.size >> PAGE_SHIFT; + num_objects++; + } + + list_for_each_entry(obj, &i915->mm.bound_list, mm.link) + if (!i915_gem_object_is_active(obj) && can_release_pages(obj)) { + count += obj->base.size >> PAGE_SHIFT; + num_objects++; + } + spin_unlock(&i915->mm.obj_lock); + + /* Update our preferred vmscan batch size for the next pass. + * Our rough guess for an effective batch size is roughly 2 + * available GEM objects worth of pages. That is we don't want + * the shrinker to fire, until it is worth the cost of freeing an + * entire GEM object. + */ + if (num_objects) { + unsigned long avg = 2 * count / num_objects; + + i915->mm.shrinker.batch = + max((i915->mm.shrinker.batch + avg) >> 1, + 128ul /* default SHRINK_BATCH */); + } + + return count; +} + +static unsigned long +i915_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc) +{ + struct drm_i915_private *i915 = + container_of(shrinker, struct drm_i915_private, mm.shrinker); + unsigned long freed; + bool unlock; + + sc->nr_scanned = 0; + + if (!shrinker_lock(i915, 0, &unlock)) + return SHRINK_STOP; + + freed = i915_gem_shrink(i915, + sc->nr_to_scan, + &sc->nr_scanned, + I915_SHRINK_BOUND | + I915_SHRINK_UNBOUND | + I915_SHRINK_PURGEABLE | + I915_SHRINK_WRITEBACK); + if (sc->nr_scanned < sc->nr_to_scan) + freed += i915_gem_shrink(i915, + sc->nr_to_scan - sc->nr_scanned, + &sc->nr_scanned, + I915_SHRINK_BOUND | + I915_SHRINK_UNBOUND | + I915_SHRINK_WRITEBACK); + if (sc->nr_scanned < sc->nr_to_scan && current_is_kswapd()) { + intel_wakeref_t wakeref; + + with_intel_runtime_pm(i915, wakeref) { + freed += i915_gem_shrink(i915, + sc->nr_to_scan - sc->nr_scanned, + &sc->nr_scanned, + I915_SHRINK_ACTIVE | + I915_SHRINK_BOUND | + I915_SHRINK_UNBOUND | + I915_SHRINK_WRITEBACK); + } + } + + shrinker_unlock(i915, unlock); + + return sc->nr_scanned ? freed : SHRINK_STOP; +} + +static int +i915_gem_shrinker_oom(struct notifier_block *nb, unsigned long event, void *ptr) +{ + struct drm_i915_private *i915 = + container_of(nb, struct drm_i915_private, mm.oom_notifier); + struct drm_i915_gem_object *obj; + unsigned long unevictable, bound, unbound, freed_pages; + intel_wakeref_t wakeref; + + freed_pages = 0; + with_intel_runtime_pm(i915, wakeref) + freed_pages += i915_gem_shrink(i915, -1UL, NULL, + I915_SHRINK_BOUND | + I915_SHRINK_UNBOUND | + I915_SHRINK_WRITEBACK); + + /* Because we may be allocating inside our own driver, we cannot + * assert that there are no objects with pinned pages that are not + * being pointed to by hardware. + */ + unbound = bound = unevictable = 0; + spin_lock(&i915->mm.obj_lock); + list_for_each_entry(obj, &i915->mm.unbound_list, mm.link) { + if (!can_release_pages(obj)) + unevictable += obj->base.size >> PAGE_SHIFT; + else + unbound += obj->base.size >> PAGE_SHIFT; + } + list_for_each_entry(obj, &i915->mm.bound_list, mm.link) { + if (!can_release_pages(obj)) + unevictable += obj->base.size >> PAGE_SHIFT; + else + bound += obj->base.size >> PAGE_SHIFT; + } + spin_unlock(&i915->mm.obj_lock); + + if (freed_pages || unbound || bound) + pr_info("Purging GPU memory, %lu pages freed, " + "%lu pages still pinned.\n", + freed_pages, unevictable); + + *(unsigned long *)ptr += freed_pages; + return NOTIFY_DONE; +} + +static int +i915_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr) +{ + struct drm_i915_private *i915 = + container_of(nb, struct drm_i915_private, mm.vmap_notifier); + struct i915_vma *vma, *next; + unsigned long freed_pages = 0; + intel_wakeref_t wakeref; + bool unlock; + + if (!shrinker_lock(i915, 0, &unlock)) + return NOTIFY_DONE; + + /* Force everything onto the inactive lists */ + if (i915_gem_wait_for_idle(i915, + I915_WAIT_LOCKED, + MAX_SCHEDULE_TIMEOUT)) + goto out; + + with_intel_runtime_pm(i915, wakeref) + freed_pages += i915_gem_shrink(i915, -1UL, NULL, + I915_SHRINK_BOUND | + I915_SHRINK_UNBOUND | + I915_SHRINK_VMAPS); + + /* We also want to clear any cached iomaps as they wrap vmap */ + mutex_lock(&i915->ggtt.vm.mutex); + list_for_each_entry_safe(vma, next, + &i915->ggtt.vm.bound_list, vm_link) { + unsigned long count = vma->node.size >> PAGE_SHIFT; + + if (!vma->iomap || i915_vma_is_active(vma)) + continue; + + mutex_unlock(&i915->ggtt.vm.mutex); + if (i915_vma_unbind(vma) == 0) + freed_pages += count; + mutex_lock(&i915->ggtt.vm.mutex); + } + mutex_unlock(&i915->ggtt.vm.mutex); + +out: + shrinker_unlock(i915, unlock); + + *(unsigned long *)ptr += freed_pages; + return NOTIFY_DONE; +} + +/** + * i915_gem_shrinker_register - Register the i915 shrinker + * @i915: i915 device + * + * This function registers and sets up the i915 shrinker and OOM handler. + */ +void i915_gem_shrinker_register(struct drm_i915_private *i915) +{ + i915->mm.shrinker.scan_objects = i915_gem_shrinker_scan; + i915->mm.shrinker.count_objects = i915_gem_shrinker_count; + i915->mm.shrinker.seeks = DEFAULT_SEEKS; + i915->mm.shrinker.batch = 4096; + WARN_ON(register_shrinker(&i915->mm.shrinker)); + + i915->mm.oom_notifier.notifier_call = i915_gem_shrinker_oom; + WARN_ON(register_oom_notifier(&i915->mm.oom_notifier)); + + i915->mm.vmap_notifier.notifier_call = i915_gem_shrinker_vmap; + WARN_ON(register_vmap_purge_notifier(&i915->mm.vmap_notifier)); +} + +/** + * i915_gem_shrinker_unregister - Unregisters the i915 shrinker + * @i915: i915 device + * + * This function unregisters the i915 shrinker and OOM handler. + */ +void i915_gem_shrinker_unregister(struct drm_i915_private *i915) +{ + WARN_ON(unregister_vmap_purge_notifier(&i915->mm.vmap_notifier)); + WARN_ON(unregister_oom_notifier(&i915->mm.oom_notifier)); + unregister_shrinker(&i915->mm.shrinker); +} + +void i915_gem_shrinker_taints_mutex(struct drm_i915_private *i915, + struct mutex *mutex) +{ + bool unlock = false; + + if (!IS_ENABLED(CONFIG_LOCKDEP)) + return; + + if (!lockdep_is_held_type(&i915->drm.struct_mutex, -1)) { + mutex_acquire(&i915->drm.struct_mutex.dep_map, + I915_MM_NORMAL, 0, _RET_IP_); + unlock = true; + } + + fs_reclaim_acquire(GFP_KERNEL); + + /* + * As we invariably rely on the struct_mutex within the shrinker, + * but have a complicated recursion dance, taint all the mutexes used + * within the shrinker with the struct_mutex. For completeness, we + * taint with all subclass of struct_mutex, even though we should + * only need tainting by I915_MM_NORMAL to catch possible ABBA + * deadlocks from using struct_mutex inside @mutex. + */ + mutex_acquire(&i915->drm.struct_mutex.dep_map, + I915_MM_SHRINKER, 0, _RET_IP_); + + mutex_acquire(&mutex->dep_map, 0, 0, _RET_IP_); + mutex_release(&mutex->dep_map, 0, _RET_IP_); + + mutex_release(&i915->drm.struct_mutex.dep_map, 0, _RET_IP_); + + fs_reclaim_release(GFP_KERNEL); + + if (unlock) + mutex_release(&i915->drm.struct_mutex.dep_map, 0, _RET_IP_); +} diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c new file mode 100644 index 000000000000..9080a736663a --- /dev/null +++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c @@ -0,0 +1,704 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2008-2012 Intel Corporation + */ + +#include +#include + +#include +#include + +#include "i915_drv.h" + +/* + * The BIOS typically reserves some of the system's memory for the exclusive + * use of the integrated graphics. This memory is no longer available for + * use by the OS and so the user finds that his system has less memory + * available than he put in. We refer to this memory as stolen. + * + * The BIOS will allocate its framebuffer from the stolen memory. Our + * goal is try to reuse that object for our own fbcon which must always + * be available for panics. Anything else we can reuse the stolen memory + * for is a boon. + */ + +int i915_gem_stolen_insert_node_in_range(struct drm_i915_private *dev_priv, + struct drm_mm_node *node, u64 size, + unsigned alignment, u64 start, u64 end) +{ + int ret; + + if (!drm_mm_initialized(&dev_priv->mm.stolen)) + return -ENODEV; + + /* WaSkipStolenMemoryFirstPage:bdw+ */ + if (INTEL_GEN(dev_priv) >= 8 && start < 4096) + start = 4096; + + mutex_lock(&dev_priv->mm.stolen_lock); + ret = drm_mm_insert_node_in_range(&dev_priv->mm.stolen, node, + size, alignment, 0, + start, end, DRM_MM_INSERT_BEST); + mutex_unlock(&dev_priv->mm.stolen_lock); + + return ret; +} + +int i915_gem_stolen_insert_node(struct drm_i915_private *dev_priv, + struct drm_mm_node *node, u64 size, + unsigned alignment) +{ + return i915_gem_stolen_insert_node_in_range(dev_priv, node, size, + alignment, 0, U64_MAX); +} + +void i915_gem_stolen_remove_node(struct drm_i915_private *dev_priv, + struct drm_mm_node *node) +{ + mutex_lock(&dev_priv->mm.stolen_lock); + drm_mm_remove_node(node); + mutex_unlock(&dev_priv->mm.stolen_lock); +} + +static int i915_adjust_stolen(struct drm_i915_private *dev_priv, + struct resource *dsm) +{ + struct i915_ggtt *ggtt = &dev_priv->ggtt; + struct resource *r; + + if (dsm->start == 0 || dsm->end <= dsm->start) + return -EINVAL; + + /* + * TODO: We have yet too encounter the case where the GTT wasn't at the + * end of stolen. With that assumption we could simplify this. + */ + + /* Make sure we don't clobber the GTT if it's within stolen memory */ + if (INTEL_GEN(dev_priv) <= 4 && + !IS_G33(dev_priv) && !IS_PINEVIEW(dev_priv) && !IS_G4X(dev_priv)) { + struct resource stolen[2] = {*dsm, *dsm}; + struct resource ggtt_res; + resource_size_t ggtt_start; + + ggtt_start = I915_READ(PGTBL_CTL); + if (IS_GEN(dev_priv, 4)) + ggtt_start = (ggtt_start & PGTBL_ADDRESS_LO_MASK) | + (ggtt_start & PGTBL_ADDRESS_HI_MASK) << 28; + else + ggtt_start &= PGTBL_ADDRESS_LO_MASK; + + ggtt_res = + (struct resource) DEFINE_RES_MEM(ggtt_start, + ggtt_total_entries(ggtt) * 4); + + if (ggtt_res.start >= stolen[0].start && ggtt_res.start < stolen[0].end) + stolen[0].end = ggtt_res.start; + if (ggtt_res.end > stolen[1].start && ggtt_res.end <= stolen[1].end) + stolen[1].start = ggtt_res.end; + + /* Pick the larger of the two chunks */ + if (resource_size(&stolen[0]) > resource_size(&stolen[1])) + *dsm = stolen[0]; + else + *dsm = stolen[1]; + + if (stolen[0].start != stolen[1].start || + stolen[0].end != stolen[1].end) { + DRM_DEBUG_DRIVER("GTT within stolen memory at %pR\n", &ggtt_res); + DRM_DEBUG_DRIVER("Stolen memory adjusted to %pR\n", dsm); + } + } + + /* + * Verify that nothing else uses this physical address. Stolen + * memory should be reserved by the BIOS and hidden from the + * kernel. So if the region is already marked as busy, something + * is seriously wrong. + */ + r = devm_request_mem_region(dev_priv->drm.dev, dsm->start, + resource_size(dsm), + "Graphics Stolen Memory"); + if (r == NULL) { + /* + * One more attempt but this time requesting region from + * start + 1, as we have seen that this resolves the region + * conflict with the PCI Bus. + * This is a BIOS w/a: Some BIOS wrap stolen in the root + * PCI bus, but have an off-by-one error. Hence retry the + * reservation starting from 1 instead of 0. + * There's also BIOS with off-by-one on the other end. + */ + r = devm_request_mem_region(dev_priv->drm.dev, dsm->start + 1, + resource_size(dsm) - 2, + "Graphics Stolen Memory"); + /* + * GEN3 firmware likes to smash pci bridges into the stolen + * range. Apparently this works. + */ + if (r == NULL && !IS_GEN(dev_priv, 3)) { + DRM_ERROR("conflict detected with stolen region: %pR\n", + dsm); + + return -EBUSY; + } + } + + return 0; +} + +void i915_gem_cleanup_stolen(struct drm_i915_private *dev_priv) +{ + if (!drm_mm_initialized(&dev_priv->mm.stolen)) + return; + + drm_mm_takedown(&dev_priv->mm.stolen); +} + +static void g4x_get_stolen_reserved(struct drm_i915_private *dev_priv, + resource_size_t *base, + resource_size_t *size) +{ + u32 reg_val = I915_READ(IS_GM45(dev_priv) ? + CTG_STOLEN_RESERVED : + ELK_STOLEN_RESERVED); + resource_size_t stolen_top = dev_priv->dsm.end + 1; + + DRM_DEBUG_DRIVER("%s_STOLEN_RESERVED = %08x\n", + IS_GM45(dev_priv) ? "CTG" : "ELK", reg_val); + + if ((reg_val & G4X_STOLEN_RESERVED_ENABLE) == 0) + return; + + /* + * Whether ILK really reuses the ELK register for this is unclear. + * Let's see if we catch anyone with this supposedly enabled on ILK. + */ + WARN(IS_GEN(dev_priv, 5), "ILK stolen reserved found? 0x%08x\n", + reg_val); + + if (!(reg_val & G4X_STOLEN_RESERVED_ADDR2_MASK)) + return; + + *base = (reg_val & G4X_STOLEN_RESERVED_ADDR2_MASK) << 16; + WARN_ON((reg_val & G4X_STOLEN_RESERVED_ADDR1_MASK) < *base); + + *size = stolen_top - *base; +} + +static void gen6_get_stolen_reserved(struct drm_i915_private *dev_priv, + resource_size_t *base, + resource_size_t *size) +{ + u32 reg_val = I915_READ(GEN6_STOLEN_RESERVED); + + DRM_DEBUG_DRIVER("GEN6_STOLEN_RESERVED = %08x\n", reg_val); + + if (!(reg_val & GEN6_STOLEN_RESERVED_ENABLE)) + return; + + *base = reg_val & GEN6_STOLEN_RESERVED_ADDR_MASK; + + switch (reg_val & GEN6_STOLEN_RESERVED_SIZE_MASK) { + case GEN6_STOLEN_RESERVED_1M: + *size = 1024 * 1024; + break; + case GEN6_STOLEN_RESERVED_512K: + *size = 512 * 1024; + break; + case GEN6_STOLEN_RESERVED_256K: + *size = 256 * 1024; + break; + case GEN6_STOLEN_RESERVED_128K: + *size = 128 * 1024; + break; + default: + *size = 1024 * 1024; + MISSING_CASE(reg_val & GEN6_STOLEN_RESERVED_SIZE_MASK); + } +} + +static void vlv_get_stolen_reserved(struct drm_i915_private *dev_priv, + resource_size_t *base, + resource_size_t *size) +{ + u32 reg_val = I915_READ(GEN6_STOLEN_RESERVED); + resource_size_t stolen_top = dev_priv->dsm.end + 1; + + DRM_DEBUG_DRIVER("GEN6_STOLEN_RESERVED = %08x\n", reg_val); + + if (!(reg_val & GEN6_STOLEN_RESERVED_ENABLE)) + return; + + switch (reg_val & GEN7_STOLEN_RESERVED_SIZE_MASK) { + default: + MISSING_CASE(reg_val & GEN7_STOLEN_RESERVED_SIZE_MASK); + /* fall through */ + case GEN7_STOLEN_RESERVED_1M: + *size = 1024 * 1024; + break; + } + + /* + * On vlv, the ADDR_MASK portion is left as 0 and HW deduces the + * reserved location as (top - size). + */ + *base = stolen_top - *size; +} + +static void gen7_get_stolen_reserved(struct drm_i915_private *dev_priv, + resource_size_t *base, + resource_size_t *size) +{ + u32 reg_val = I915_READ(GEN6_STOLEN_RESERVED); + + DRM_DEBUG_DRIVER("GEN6_STOLEN_RESERVED = %08x\n", reg_val); + + if (!(reg_val & GEN6_STOLEN_RESERVED_ENABLE)) + return; + + *base = reg_val & GEN7_STOLEN_RESERVED_ADDR_MASK; + + switch (reg_val & GEN7_STOLEN_RESERVED_SIZE_MASK) { + case GEN7_STOLEN_RESERVED_1M: + *size = 1024 * 1024; + break; + case GEN7_STOLEN_RESERVED_256K: + *size = 256 * 1024; + break; + default: + *size = 1024 * 1024; + MISSING_CASE(reg_val & GEN7_STOLEN_RESERVED_SIZE_MASK); + } +} + +static void chv_get_stolen_reserved(struct drm_i915_private *dev_priv, + resource_size_t *base, + resource_size_t *size) +{ + u32 reg_val = I915_READ(GEN6_STOLEN_RESERVED); + + DRM_DEBUG_DRIVER("GEN6_STOLEN_RESERVED = %08x\n", reg_val); + + if (!(reg_val & GEN6_STOLEN_RESERVED_ENABLE)) + return; + + *base = reg_val & GEN6_STOLEN_RESERVED_ADDR_MASK; + + switch (reg_val & GEN8_STOLEN_RESERVED_SIZE_MASK) { + case GEN8_STOLEN_RESERVED_1M: + *size = 1024 * 1024; + break; + case GEN8_STOLEN_RESERVED_2M: + *size = 2 * 1024 * 1024; + break; + case GEN8_STOLEN_RESERVED_4M: + *size = 4 * 1024 * 1024; + break; + case GEN8_STOLEN_RESERVED_8M: + *size = 8 * 1024 * 1024; + break; + default: + *size = 8 * 1024 * 1024; + MISSING_CASE(reg_val & GEN8_STOLEN_RESERVED_SIZE_MASK); + } +} + +static void bdw_get_stolen_reserved(struct drm_i915_private *dev_priv, + resource_size_t *base, + resource_size_t *size) +{ + u32 reg_val = I915_READ(GEN6_STOLEN_RESERVED); + resource_size_t stolen_top = dev_priv->dsm.end + 1; + + DRM_DEBUG_DRIVER("GEN6_STOLEN_RESERVED = %08x\n", reg_val); + + if (!(reg_val & GEN6_STOLEN_RESERVED_ENABLE)) + return; + + if (!(reg_val & GEN6_STOLEN_RESERVED_ADDR_MASK)) + return; + + *base = reg_val & GEN6_STOLEN_RESERVED_ADDR_MASK; + *size = stolen_top - *base; +} + +static void icl_get_stolen_reserved(struct drm_i915_private *dev_priv, + resource_size_t *base, + resource_size_t *size) +{ + u64 reg_val = I915_READ64(GEN6_STOLEN_RESERVED); + + DRM_DEBUG_DRIVER("GEN6_STOLEN_RESERVED = 0x%016llx\n", reg_val); + + *base = reg_val & GEN11_STOLEN_RESERVED_ADDR_MASK; + + switch (reg_val & GEN8_STOLEN_RESERVED_SIZE_MASK) { + case GEN8_STOLEN_RESERVED_1M: + *size = 1024 * 1024; + break; + case GEN8_STOLEN_RESERVED_2M: + *size = 2 * 1024 * 1024; + break; + case GEN8_STOLEN_RESERVED_4M: + *size = 4 * 1024 * 1024; + break; + case GEN8_STOLEN_RESERVED_8M: + *size = 8 * 1024 * 1024; + break; + default: + *size = 8 * 1024 * 1024; + MISSING_CASE(reg_val & GEN8_STOLEN_RESERVED_SIZE_MASK); + } +} + +int i915_gem_init_stolen(struct drm_i915_private *dev_priv) +{ + resource_size_t reserved_base, stolen_top; + resource_size_t reserved_total, reserved_size; + + mutex_init(&dev_priv->mm.stolen_lock); + + if (intel_vgpu_active(dev_priv)) { + DRM_INFO("iGVT-g active, disabling use of stolen memory\n"); + return 0; + } + + if (intel_vtd_active() && INTEL_GEN(dev_priv) < 8) { + DRM_INFO("DMAR active, disabling use of stolen memory\n"); + return 0; + } + + if (resource_size(&intel_graphics_stolen_res) == 0) + return 0; + + dev_priv->dsm = intel_graphics_stolen_res; + + if (i915_adjust_stolen(dev_priv, &dev_priv->dsm)) + return 0; + + GEM_BUG_ON(dev_priv->dsm.start == 0); + GEM_BUG_ON(dev_priv->dsm.end <= dev_priv->dsm.start); + + stolen_top = dev_priv->dsm.end + 1; + reserved_base = stolen_top; + reserved_size = 0; + + switch (INTEL_GEN(dev_priv)) { + case 2: + case 3: + break; + case 4: + if (!IS_G4X(dev_priv)) + break; + /* fall through */ + case 5: + g4x_get_stolen_reserved(dev_priv, + &reserved_base, &reserved_size); + break; + case 6: + gen6_get_stolen_reserved(dev_priv, + &reserved_base, &reserved_size); + break; + case 7: + if (IS_VALLEYVIEW(dev_priv)) + vlv_get_stolen_reserved(dev_priv, + &reserved_base, &reserved_size); + else + gen7_get_stolen_reserved(dev_priv, + &reserved_base, &reserved_size); + break; + case 8: + case 9: + case 10: + if (IS_LP(dev_priv)) + chv_get_stolen_reserved(dev_priv, + &reserved_base, &reserved_size); + else + bdw_get_stolen_reserved(dev_priv, + &reserved_base, &reserved_size); + break; + case 11: + default: + icl_get_stolen_reserved(dev_priv, &reserved_base, + &reserved_size); + break; + } + + /* + * Our expectation is that the reserved space is at the top of the + * stolen region and *never* at the bottom. If we see !reserved_base, + * it likely means we failed to read the registers correctly. + */ + if (!reserved_base) { + DRM_ERROR("inconsistent reservation %pa + %pa; ignoring\n", + &reserved_base, &reserved_size); + reserved_base = stolen_top; + reserved_size = 0; + } + + dev_priv->dsm_reserved = + (struct resource) DEFINE_RES_MEM(reserved_base, reserved_size); + + if (!resource_contains(&dev_priv->dsm, &dev_priv->dsm_reserved)) { + DRM_ERROR("Stolen reserved area %pR outside stolen memory %pR\n", + &dev_priv->dsm_reserved, &dev_priv->dsm); + return 0; + } + + /* It is possible for the reserved area to end before the end of stolen + * memory, so just consider the start. */ + reserved_total = stolen_top - reserved_base; + + DRM_DEBUG_DRIVER("Memory reserved for graphics device: %lluK, usable: %lluK\n", + (u64)resource_size(&dev_priv->dsm) >> 10, + ((u64)resource_size(&dev_priv->dsm) - reserved_total) >> 10); + + dev_priv->stolen_usable_size = + resource_size(&dev_priv->dsm) - reserved_total; + + /* Basic memrange allocator for stolen space. */ + drm_mm_init(&dev_priv->mm.stolen, 0, dev_priv->stolen_usable_size); + + return 0; +} + +static struct sg_table * +i915_pages_create_for_stolen(struct drm_device *dev, + resource_size_t offset, resource_size_t size) +{ + struct drm_i915_private *dev_priv = to_i915(dev); + struct sg_table *st; + struct scatterlist *sg; + + GEM_BUG_ON(range_overflows(offset, size, resource_size(&dev_priv->dsm))); + + /* We hide that we have no struct page backing our stolen object + * by wrapping the contiguous physical allocation with a fake + * dma mapping in a single scatterlist. + */ + + st = kmalloc(sizeof(*st), GFP_KERNEL); + if (st == NULL) + return ERR_PTR(-ENOMEM); + + if (sg_alloc_table(st, 1, GFP_KERNEL)) { + kfree(st); + return ERR_PTR(-ENOMEM); + } + + sg = st->sgl; + sg->offset = 0; + sg->length = size; + + sg_dma_address(sg) = (dma_addr_t)dev_priv->dsm.start + offset; + sg_dma_len(sg) = size; + + return st; +} + +static int i915_gem_object_get_pages_stolen(struct drm_i915_gem_object *obj) +{ + struct sg_table *pages = + i915_pages_create_for_stolen(obj->base.dev, + obj->stolen->start, + obj->stolen->size); + if (IS_ERR(pages)) + return PTR_ERR(pages); + + __i915_gem_object_set_pages(obj, pages, obj->stolen->size); + + return 0; +} + +static void i915_gem_object_put_pages_stolen(struct drm_i915_gem_object *obj, + struct sg_table *pages) +{ + /* Should only be called from i915_gem_object_release_stolen() */ + sg_free_table(pages); + kfree(pages); +} + +static void +i915_gem_object_release_stolen(struct drm_i915_gem_object *obj) +{ + struct drm_i915_private *dev_priv = to_i915(obj->base.dev); + struct drm_mm_node *stolen = fetch_and_zero(&obj->stolen); + + GEM_BUG_ON(!stolen); + + __i915_gem_object_unpin_pages(obj); + + i915_gem_stolen_remove_node(dev_priv, stolen); + kfree(stolen); +} + +static const struct drm_i915_gem_object_ops i915_gem_object_stolen_ops = { + .get_pages = i915_gem_object_get_pages_stolen, + .put_pages = i915_gem_object_put_pages_stolen, + .release = i915_gem_object_release_stolen, +}; + +static struct drm_i915_gem_object * +_i915_gem_object_create_stolen(struct drm_i915_private *dev_priv, + struct drm_mm_node *stolen) +{ + struct drm_i915_gem_object *obj; + unsigned int cache_level; + + obj = i915_gem_object_alloc(); + if (obj == NULL) + return NULL; + + drm_gem_private_object_init(&dev_priv->drm, &obj->base, stolen->size); + i915_gem_object_init(obj, &i915_gem_object_stolen_ops); + + obj->stolen = stolen; + obj->read_domains = I915_GEM_DOMAIN_CPU | I915_GEM_DOMAIN_GTT; + cache_level = HAS_LLC(dev_priv) ? I915_CACHE_LLC : I915_CACHE_NONE; + i915_gem_object_set_cache_coherency(obj, cache_level); + + if (i915_gem_object_pin_pages(obj)) + goto cleanup; + + return obj; + +cleanup: + i915_gem_object_free(obj); + return NULL; +} + +struct drm_i915_gem_object * +i915_gem_object_create_stolen(struct drm_i915_private *dev_priv, + resource_size_t size) +{ + struct drm_i915_gem_object *obj; + struct drm_mm_node *stolen; + int ret; + + if (!drm_mm_initialized(&dev_priv->mm.stolen)) + return NULL; + + if (size == 0) + return NULL; + + stolen = kzalloc(sizeof(*stolen), GFP_KERNEL); + if (!stolen) + return NULL; + + ret = i915_gem_stolen_insert_node(dev_priv, stolen, size, 4096); + if (ret) { + kfree(stolen); + return NULL; + } + + obj = _i915_gem_object_create_stolen(dev_priv, stolen); + if (obj) + return obj; + + i915_gem_stolen_remove_node(dev_priv, stolen); + kfree(stolen); + return NULL; +} + +struct drm_i915_gem_object * +i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *dev_priv, + resource_size_t stolen_offset, + resource_size_t gtt_offset, + resource_size_t size) +{ + struct i915_ggtt *ggtt = &dev_priv->ggtt; + struct drm_i915_gem_object *obj; + struct drm_mm_node *stolen; + struct i915_vma *vma; + int ret; + + if (!drm_mm_initialized(&dev_priv->mm.stolen)) + return NULL; + + lockdep_assert_held(&dev_priv->drm.struct_mutex); + + DRM_DEBUG_DRIVER("creating preallocated stolen object: stolen_offset=%pa, gtt_offset=%pa, size=%pa\n", + &stolen_offset, >t_offset, &size); + + /* KISS and expect everything to be page-aligned */ + if (WARN_ON(size == 0) || + WARN_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE)) || + WARN_ON(!IS_ALIGNED(stolen_offset, I915_GTT_MIN_ALIGNMENT))) + return NULL; + + stolen = kzalloc(sizeof(*stolen), GFP_KERNEL); + if (!stolen) + return NULL; + + stolen->start = stolen_offset; + stolen->size = size; + mutex_lock(&dev_priv->mm.stolen_lock); + ret = drm_mm_reserve_node(&dev_priv->mm.stolen, stolen); + mutex_unlock(&dev_priv->mm.stolen_lock); + if (ret) { + DRM_DEBUG_DRIVER("failed to allocate stolen space\n"); + kfree(stolen); + return NULL; + } + + obj = _i915_gem_object_create_stolen(dev_priv, stolen); + if (obj == NULL) { + DRM_DEBUG_DRIVER("failed to allocate stolen object\n"); + i915_gem_stolen_remove_node(dev_priv, stolen); + kfree(stolen); + return NULL; + } + + /* Some objects just need physical mem from stolen space */ + if (gtt_offset == I915_GTT_OFFSET_NONE) + return obj; + + ret = i915_gem_object_pin_pages(obj); + if (ret) + goto err; + + vma = i915_vma_instance(obj, &ggtt->vm, NULL); + if (IS_ERR(vma)) { + ret = PTR_ERR(vma); + goto err_pages; + } + + /* To simplify the initialisation sequence between KMS and GTT, + * we allow construction of the stolen object prior to + * setting up the GTT space. The actual reservation will occur + * later. + */ + ret = i915_gem_gtt_reserve(&ggtt->vm, &vma->node, + size, gtt_offset, obj->cache_level, + 0); + if (ret) { + DRM_DEBUG_DRIVER("failed to allocate stolen GTT space\n"); + goto err_pages; + } + + GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); + + vma->pages = obj->mm.pages; + vma->flags |= I915_VMA_GLOBAL_BIND; + __i915_vma_set_map_and_fenceable(vma); + + mutex_lock(&ggtt->vm.mutex); + list_move_tail(&vma->vm_link, &ggtt->vm.bound_list); + mutex_unlock(&ggtt->vm.mutex); + + spin_lock(&dev_priv->mm.obj_lock); + list_move_tail(&obj->mm.link, &dev_priv->mm.bound_list); + obj->bind_count++; + spin_unlock(&dev_priv->mm.obj_lock); + + return obj; + +err_pages: + i915_gem_object_unpin_pages(obj); +err: + i915_gem_object_put(obj); + return NULL; +} diff --git a/drivers/gpu/drm/i915/gem/i915_gem_tiling.c b/drivers/gpu/drm/i915/gem/i915_gem_tiling.c new file mode 100644 index 000000000000..ca0c2f451742 --- /dev/null +++ b/drivers/gpu/drm/i915/gem/i915_gem_tiling.c @@ -0,0 +1,440 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2008 Intel Corporation + */ + +#include +#include +#include + +#include "i915_drv.h" +#include "i915_gem.h" +#include "i915_gem_ioctls.h" +#include "i915_gem_object.h" + +/** + * DOC: buffer object tiling + * + * i915_gem_set_tiling_ioctl() and i915_gem_get_tiling_ioctl() is the userspace + * interface to declare fence register requirements. + * + * In principle GEM doesn't care at all about the internal data layout of an + * object, and hence it also doesn't care about tiling or swizzling. There's two + * exceptions: + * + * - For X and Y tiling the hardware provides detilers for CPU access, so called + * fences. Since there's only a limited amount of them the kernel must manage + * these, and therefore userspace must tell the kernel the object tiling if it + * wants to use fences for detiling. + * - On gen3 and gen4 platforms have a swizzling pattern for tiled objects which + * depends upon the physical page frame number. When swapping such objects the + * page frame number might change and the kernel must be able to fix this up + * and hence now the tiling. Note that on a subset of platforms with + * asymmetric memory channel population the swizzling pattern changes in an + * unknown way, and for those the kernel simply forbids swapping completely. + * + * Since neither of this applies for new tiling layouts on modern platforms like + * W, Ys and Yf tiling GEM only allows object tiling to be set to X or Y tiled. + * Anything else can be handled in userspace entirely without the kernel's + * invovlement. + */ + +/** + * i915_gem_fence_size - required global GTT size for a fence + * @i915: i915 device + * @size: object size + * @tiling: tiling mode + * @stride: tiling stride + * + * Return the required global GTT size for a fence (view of a tiled object), + * taking into account potential fence register mapping. + */ +u32 i915_gem_fence_size(struct drm_i915_private *i915, + u32 size, unsigned int tiling, unsigned int stride) +{ + u32 ggtt_size; + + GEM_BUG_ON(!size); + + if (tiling == I915_TILING_NONE) + return size; + + GEM_BUG_ON(!stride); + + if (INTEL_GEN(i915) >= 4) { + stride *= i915_gem_tile_height(tiling); + GEM_BUG_ON(!IS_ALIGNED(stride, I965_FENCE_PAGE)); + return roundup(size, stride); + } + + /* Previous chips need a power-of-two fence region when tiling */ + if (IS_GEN(i915, 3)) + ggtt_size = 1024*1024; + else + ggtt_size = 512*1024; + + while (ggtt_size < size) + ggtt_size <<= 1; + + return ggtt_size; +} + +/** + * i915_gem_fence_alignment - required global GTT alignment for a fence + * @i915: i915 device + * @size: object size + * @tiling: tiling mode + * @stride: tiling stride + * + * Return the required global GTT alignment for a fence (a view of a tiled + * object), taking into account potential fence register mapping. + */ +u32 i915_gem_fence_alignment(struct drm_i915_private *i915, u32 size, + unsigned int tiling, unsigned int stride) +{ + GEM_BUG_ON(!size); + + /* + * Minimum alignment is 4k (GTT page size), but might be greater + * if a fence register is needed for the object. + */ + if (tiling == I915_TILING_NONE) + return I915_GTT_MIN_ALIGNMENT; + + if (INTEL_GEN(i915) >= 4) + return I965_FENCE_PAGE; + + /* + * Previous chips need to be aligned to the size of the smallest + * fence register that can contain the object. + */ + return i915_gem_fence_size(i915, size, tiling, stride); +} + +/* Check pitch constriants for all chips & tiling formats */ +static bool +i915_tiling_ok(struct drm_i915_gem_object *obj, + unsigned int tiling, unsigned int stride) +{ + struct drm_i915_private *i915 = to_i915(obj->base.dev); + unsigned int tile_width; + + /* Linear is always fine */ + if (tiling == I915_TILING_NONE) + return true; + + if (tiling > I915_TILING_LAST) + return false; + + /* check maximum stride & object size */ + /* i965+ stores the end address of the gtt mapping in the fence + * reg, so dont bother to check the size */ + if (INTEL_GEN(i915) >= 7) { + if (stride / 128 > GEN7_FENCE_MAX_PITCH_VAL) + return false; + } else if (INTEL_GEN(i915) >= 4) { + if (stride / 128 > I965_FENCE_MAX_PITCH_VAL) + return false; + } else { + if (stride > 8192) + return false; + + if (!is_power_of_2(stride)) + return false; + } + + if (IS_GEN(i915, 2) || + (tiling == I915_TILING_Y && HAS_128_BYTE_Y_TILING(i915))) + tile_width = 128; + else + tile_width = 512; + + if (!stride || !IS_ALIGNED(stride, tile_width)) + return false; + + return true; +} + +static bool i915_vma_fence_prepare(struct i915_vma *vma, + int tiling_mode, unsigned int stride) +{ + struct drm_i915_private *i915 = vma->vm->i915; + u32 size, alignment; + + if (!i915_vma_is_map_and_fenceable(vma)) + return true; + + size = i915_gem_fence_size(i915, vma->size, tiling_mode, stride); + if (vma->node.size < size) + return false; + + alignment = i915_gem_fence_alignment(i915, vma->size, tiling_mode, stride); + if (!IS_ALIGNED(vma->node.start, alignment)) + return false; + + return true; +} + +/* Make the current GTT allocation valid for the change in tiling. */ +static int +i915_gem_object_fence_prepare(struct drm_i915_gem_object *obj, + int tiling_mode, unsigned int stride) +{ + struct i915_vma *vma; + int ret; + + if (tiling_mode == I915_TILING_NONE) + return 0; + + for_each_ggtt_vma(vma, obj) { + if (i915_vma_fence_prepare(vma, tiling_mode, stride)) + continue; + + ret = i915_vma_unbind(vma); + if (ret) + return ret; + } + + return 0; +} + +int +i915_gem_object_set_tiling(struct drm_i915_gem_object *obj, + unsigned int tiling, unsigned int stride) +{ + struct drm_i915_private *i915 = to_i915(obj->base.dev); + struct i915_vma *vma; + int err; + + /* Make sure we don't cross-contaminate obj->tiling_and_stride */ + BUILD_BUG_ON(I915_TILING_LAST & STRIDE_MASK); + + GEM_BUG_ON(!i915_tiling_ok(obj, tiling, stride)); + GEM_BUG_ON(!stride ^ (tiling == I915_TILING_NONE)); + lockdep_assert_held(&i915->drm.struct_mutex); + + if ((tiling | stride) == obj->tiling_and_stride) + return 0; + + if (i915_gem_object_is_framebuffer(obj)) + return -EBUSY; + + /* We need to rebind the object if its current allocation + * no longer meets the alignment restrictions for its new + * tiling mode. Otherwise we can just leave it alone, but + * need to ensure that any fence register is updated before + * the next fenced (either through the GTT or by the BLT unit + * on older GPUs) access. + * + * After updating the tiling parameters, we then flag whether + * we need to update an associated fence register. Note this + * has to also include the unfenced register the GPU uses + * whilst executing a fenced command for an untiled object. + */ + + err = i915_gem_object_fence_prepare(obj, tiling, stride); + if (err) + return err; + + i915_gem_object_lock(obj); + if (i915_gem_object_is_framebuffer(obj)) { + i915_gem_object_unlock(obj); + return -EBUSY; + } + + /* If the memory has unknown (i.e. varying) swizzling, we pin the + * pages to prevent them being swapped out and causing corruption + * due to the change in swizzling. + */ + mutex_lock(&obj->mm.lock); + if (i915_gem_object_has_pages(obj) && + obj->mm.madv == I915_MADV_WILLNEED && + i915->quirks & QUIRK_PIN_SWIZZLED_PAGES) { + if (tiling == I915_TILING_NONE) { + GEM_BUG_ON(!obj->mm.quirked); + __i915_gem_object_unpin_pages(obj); + obj->mm.quirked = false; + } + if (!i915_gem_object_is_tiled(obj)) { + GEM_BUG_ON(obj->mm.quirked); + __i915_gem_object_pin_pages(obj); + obj->mm.quirked = true; + } + } + mutex_unlock(&obj->mm.lock); + + for_each_ggtt_vma(vma, obj) { + vma->fence_size = + i915_gem_fence_size(i915, vma->size, tiling, stride); + vma->fence_alignment = + i915_gem_fence_alignment(i915, + vma->size, tiling, stride); + + if (vma->fence) + vma->fence->dirty = true; + } + + obj->tiling_and_stride = tiling | stride; + i915_gem_object_unlock(obj); + + /* Force the fence to be reacquired for GTT access */ + i915_gem_object_release_mmap(obj); + + /* Try to preallocate memory required to save swizzling on put-pages */ + if (i915_gem_object_needs_bit17_swizzle(obj)) { + if (!obj->bit_17) { + obj->bit_17 = bitmap_zalloc(obj->base.size >> PAGE_SHIFT, + GFP_KERNEL); + } + } else { + bitmap_free(obj->bit_17); + obj->bit_17 = NULL; + } + + return 0; +} + +/** + * i915_gem_set_tiling_ioctl - IOCTL handler to set tiling mode + * @dev: DRM device + * @data: data pointer for the ioctl + * @file: DRM file for the ioctl call + * + * Sets the tiling mode of an object, returning the required swizzling of + * bit 6 of addresses in the object. + * + * Called by the user via ioctl. + * + * Returns: + * Zero on success, negative errno on failure. + */ +int +i915_gem_set_tiling_ioctl(struct drm_device *dev, void *data, + struct drm_file *file) +{ + struct drm_i915_gem_set_tiling *args = data; + struct drm_i915_gem_object *obj; + int err; + + obj = i915_gem_object_lookup(file, args->handle); + if (!obj) + return -ENOENT; + + /* + * The tiling mode of proxy objects is handled by its generator, and + * not allowed to be changed by userspace. + */ + if (i915_gem_object_is_proxy(obj)) { + err = -ENXIO; + goto err; + } + + if (!i915_tiling_ok(obj, args->tiling_mode, args->stride)) { + err = -EINVAL; + goto err; + } + + if (args->tiling_mode == I915_TILING_NONE) { + args->swizzle_mode = I915_BIT_6_SWIZZLE_NONE; + args->stride = 0; + } else { + if (args->tiling_mode == I915_TILING_X) + args->swizzle_mode = to_i915(dev)->mm.bit_6_swizzle_x; + else + args->swizzle_mode = to_i915(dev)->mm.bit_6_swizzle_y; + + /* Hide bit 17 swizzling from the user. This prevents old Mesa + * from aborting the application on sw fallbacks to bit 17, + * and we use the pread/pwrite bit17 paths to swizzle for it. + * If there was a user that was relying on the swizzle + * information for drm_intel_bo_map()ed reads/writes this would + * break it, but we don't have any of those. + */ + if (args->swizzle_mode == I915_BIT_6_SWIZZLE_9_17) + args->swizzle_mode = I915_BIT_6_SWIZZLE_9; + if (args->swizzle_mode == I915_BIT_6_SWIZZLE_9_10_17) + args->swizzle_mode = I915_BIT_6_SWIZZLE_9_10; + + /* If we can't handle the swizzling, make it untiled. */ + if (args->swizzle_mode == I915_BIT_6_SWIZZLE_UNKNOWN) { + args->tiling_mode = I915_TILING_NONE; + args->swizzle_mode = I915_BIT_6_SWIZZLE_NONE; + args->stride = 0; + } + } + + err = mutex_lock_interruptible(&dev->struct_mutex); + if (err) + goto err; + + err = i915_gem_object_set_tiling(obj, args->tiling_mode, args->stride); + mutex_unlock(&dev->struct_mutex); + + /* We have to maintain this existing ABI... */ + args->stride = i915_gem_object_get_stride(obj); + args->tiling_mode = i915_gem_object_get_tiling(obj); + +err: + i915_gem_object_put(obj); + return err; +} + +/** + * i915_gem_get_tiling_ioctl - IOCTL handler to get tiling mode + * @dev: DRM device + * @data: data pointer for the ioctl + * @file: DRM file for the ioctl call + * + * Returns the current tiling mode and required bit 6 swizzling for the object. + * + * Called by the user via ioctl. + * + * Returns: + * Zero on success, negative errno on failure. + */ +int +i915_gem_get_tiling_ioctl(struct drm_device *dev, void *data, + struct drm_file *file) +{ + struct drm_i915_gem_get_tiling *args = data; + struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_gem_object *obj; + int err = -ENOENT; + + rcu_read_lock(); + obj = i915_gem_object_lookup_rcu(file, args->handle); + if (obj) { + args->tiling_mode = + READ_ONCE(obj->tiling_and_stride) & TILING_MASK; + err = 0; + } + rcu_read_unlock(); + if (unlikely(err)) + return err; + + switch (args->tiling_mode) { + case I915_TILING_X: + args->swizzle_mode = dev_priv->mm.bit_6_swizzle_x; + break; + case I915_TILING_Y: + args->swizzle_mode = dev_priv->mm.bit_6_swizzle_y; + break; + default: + case I915_TILING_NONE: + args->swizzle_mode = I915_BIT_6_SWIZZLE_NONE; + break; + } + + /* Hide bit 17 from the user -- see comment in i915_gem_set_tiling */ + if (dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) + args->phys_swizzle_mode = I915_BIT_6_SWIZZLE_UNKNOWN; + else + args->phys_swizzle_mode = args->swizzle_mode; + if (args->swizzle_mode == I915_BIT_6_SWIZZLE_9_17) + args->swizzle_mode = I915_BIT_6_SWIZZLE_9; + if (args->swizzle_mode == I915_BIT_6_SWIZZLE_9_10_17) + args->swizzle_mode = I915_BIT_6_SWIZZLE_9_10; + + return 0; +} diff --git a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c new file mode 100644 index 000000000000..ccac73b72597 --- /dev/null +++ b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c @@ -0,0 +1,832 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2012-2014 Intel Corporation + */ + +#include +#include +#include +#include +#include + +#include + +#include "i915_gem_ioctls.h" +#include "i915_gem_object.h" +#include "i915_trace.h" +#include "intel_drv.h" + +struct i915_mm_struct { + struct mm_struct *mm; + struct drm_i915_private *i915; + struct i915_mmu_notifier *mn; + struct hlist_node node; + struct kref kref; + struct work_struct work; +}; + +#if defined(CONFIG_MMU_NOTIFIER) +#include + +struct i915_mmu_notifier { + spinlock_t lock; + struct hlist_node node; + struct mmu_notifier mn; + struct rb_root_cached objects; + struct i915_mm_struct *mm; +}; + +struct i915_mmu_object { + struct i915_mmu_notifier *mn; + struct drm_i915_gem_object *obj; + struct interval_tree_node it; +}; + +static void add_object(struct i915_mmu_object *mo) +{ + GEM_BUG_ON(!RB_EMPTY_NODE(&mo->it.rb)); + interval_tree_insert(&mo->it, &mo->mn->objects); +} + +static void del_object(struct i915_mmu_object *mo) +{ + if (RB_EMPTY_NODE(&mo->it.rb)) + return; + + interval_tree_remove(&mo->it, &mo->mn->objects); + RB_CLEAR_NODE(&mo->it.rb); +} + +static void +__i915_gem_userptr_set_active(struct drm_i915_gem_object *obj, bool value) +{ + struct i915_mmu_object *mo = obj->userptr.mmu_object; + + /* + * During mm_invalidate_range we need to cancel any userptr that + * overlaps the range being invalidated. Doing so requires the + * struct_mutex, and that risks recursion. In order to cause + * recursion, the user must alias the userptr address space with + * a GTT mmapping (possible with a MAP_FIXED) - then when we have + * to invalidate that mmaping, mm_invalidate_range is called with + * the userptr address *and* the struct_mutex held. To prevent that + * we set a flag under the i915_mmu_notifier spinlock to indicate + * whether this object is valid. + */ + if (!mo) + return; + + spin_lock(&mo->mn->lock); + if (value) + add_object(mo); + else + del_object(mo); + spin_unlock(&mo->mn->lock); +} + +static int +userptr_mn_invalidate_range_start(struct mmu_notifier *_mn, + const struct mmu_notifier_range *range) +{ + struct i915_mmu_notifier *mn = + container_of(_mn, struct i915_mmu_notifier, mn); + struct interval_tree_node *it; + struct mutex *unlock = NULL; + unsigned long end; + int ret = 0; + + if (RB_EMPTY_ROOT(&mn->objects.rb_root)) + return 0; + + /* interval ranges are inclusive, but invalidate range is exclusive */ + end = range->end - 1; + + spin_lock(&mn->lock); + it = interval_tree_iter_first(&mn->objects, range->start, end); + while (it) { + struct drm_i915_gem_object *obj; + + if (!mmu_notifier_range_blockable(range)) { + ret = -EAGAIN; + break; + } + + /* + * The mmu_object is released late when destroying the + * GEM object so it is entirely possible to gain a + * reference on an object in the process of being freed + * since our serialisation is via the spinlock and not + * the struct_mutex - and consequently use it after it + * is freed and then double free it. To prevent that + * use-after-free we only acquire a reference on the + * object if it is not in the process of being destroyed. + */ + obj = container_of(it, struct i915_mmu_object, it)->obj; + if (!kref_get_unless_zero(&obj->base.refcount)) { + it = interval_tree_iter_next(it, range->start, end); + continue; + } + spin_unlock(&mn->lock); + + if (!unlock) { + unlock = &mn->mm->i915->drm.struct_mutex; + + switch (mutex_trylock_recursive(unlock)) { + default: + case MUTEX_TRYLOCK_FAILED: + if (mutex_lock_killable_nested(unlock, I915_MM_SHRINKER)) { + i915_gem_object_put(obj); + return -EINTR; + } + /* fall through */ + case MUTEX_TRYLOCK_SUCCESS: + break; + + case MUTEX_TRYLOCK_RECURSIVE: + unlock = ERR_PTR(-EEXIST); + break; + } + } + + ret = i915_gem_object_unbind(obj); + if (ret == 0) + ret = __i915_gem_object_put_pages(obj, I915_MM_SHRINKER); + i915_gem_object_put(obj); + if (ret) + goto unlock; + + spin_lock(&mn->lock); + + /* + * As we do not (yet) protect the mmu from concurrent insertion + * over this range, there is no guarantee that this search will + * terminate given a pathologic workload. + */ + it = interval_tree_iter_first(&mn->objects, range->start, end); + } + spin_unlock(&mn->lock); + +unlock: + if (!IS_ERR_OR_NULL(unlock)) + mutex_unlock(unlock); + + return ret; + +} + +static const struct mmu_notifier_ops i915_gem_userptr_notifier = { + .invalidate_range_start = userptr_mn_invalidate_range_start, +}; + +static struct i915_mmu_notifier * +i915_mmu_notifier_create(struct i915_mm_struct *mm) +{ + struct i915_mmu_notifier *mn; + + mn = kmalloc(sizeof(*mn), GFP_KERNEL); + if (mn == NULL) + return ERR_PTR(-ENOMEM); + + spin_lock_init(&mn->lock); + mn->mn.ops = &i915_gem_userptr_notifier; + mn->objects = RB_ROOT_CACHED; + mn->mm = mm; + + return mn; +} + +static void +i915_gem_userptr_release__mmu_notifier(struct drm_i915_gem_object *obj) +{ + struct i915_mmu_object *mo; + + mo = fetch_and_zero(&obj->userptr.mmu_object); + if (!mo) + return; + + spin_lock(&mo->mn->lock); + del_object(mo); + spin_unlock(&mo->mn->lock); + kfree(mo); +} + +static struct i915_mmu_notifier * +i915_mmu_notifier_find(struct i915_mm_struct *mm) +{ + struct i915_mmu_notifier *mn; + int err = 0; + + mn = mm->mn; + if (mn) + return mn; + + mn = i915_mmu_notifier_create(mm); + if (IS_ERR(mn)) + err = PTR_ERR(mn); + + down_write(&mm->mm->mmap_sem); + mutex_lock(&mm->i915->mm_lock); + if (mm->mn == NULL && !err) { + /* Protected by mmap_sem (write-lock) */ + err = __mmu_notifier_register(&mn->mn, mm->mm); + if (!err) { + /* Protected by mm_lock */ + mm->mn = fetch_and_zero(&mn); + } + } else if (mm->mn) { + /* + * Someone else raced and successfully installed the mmu + * notifier, we can cancel our own errors. + */ + err = 0; + } + mutex_unlock(&mm->i915->mm_lock); + up_write(&mm->mm->mmap_sem); + + if (mn && !IS_ERR(mn)) + kfree(mn); + + return err ? ERR_PTR(err) : mm->mn; +} + +static int +i915_gem_userptr_init__mmu_notifier(struct drm_i915_gem_object *obj, + unsigned flags) +{ + struct i915_mmu_notifier *mn; + struct i915_mmu_object *mo; + + if (flags & I915_USERPTR_UNSYNCHRONIZED) + return capable(CAP_SYS_ADMIN) ? 0 : -EPERM; + + if (WARN_ON(obj->userptr.mm == NULL)) + return -EINVAL; + + mn = i915_mmu_notifier_find(obj->userptr.mm); + if (IS_ERR(mn)) + return PTR_ERR(mn); + + mo = kzalloc(sizeof(*mo), GFP_KERNEL); + if (!mo) + return -ENOMEM; + + mo->mn = mn; + mo->obj = obj; + mo->it.start = obj->userptr.ptr; + mo->it.last = obj->userptr.ptr + obj->base.size - 1; + RB_CLEAR_NODE(&mo->it.rb); + + obj->userptr.mmu_object = mo; + return 0; +} + +static void +i915_mmu_notifier_free(struct i915_mmu_notifier *mn, + struct mm_struct *mm) +{ + if (mn == NULL) + return; + + mmu_notifier_unregister(&mn->mn, mm); + kfree(mn); +} + +#else + +static void +__i915_gem_userptr_set_active(struct drm_i915_gem_object *obj, bool value) +{ +} + +static void +i915_gem_userptr_release__mmu_notifier(struct drm_i915_gem_object *obj) +{ +} + +static int +i915_gem_userptr_init__mmu_notifier(struct drm_i915_gem_object *obj, + unsigned flags) +{ + if ((flags & I915_USERPTR_UNSYNCHRONIZED) == 0) + return -ENODEV; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + return 0; +} + +static void +i915_mmu_notifier_free(struct i915_mmu_notifier *mn, + struct mm_struct *mm) +{ +} + +#endif + +static struct i915_mm_struct * +__i915_mm_struct_find(struct drm_i915_private *dev_priv, struct mm_struct *real) +{ + struct i915_mm_struct *mm; + + /* Protected by dev_priv->mm_lock */ + hash_for_each_possible(dev_priv->mm_structs, mm, node, (unsigned long)real) + if (mm->mm == real) + return mm; + + return NULL; +} + +static int +i915_gem_userptr_init__mm_struct(struct drm_i915_gem_object *obj) +{ + struct drm_i915_private *dev_priv = to_i915(obj->base.dev); + struct i915_mm_struct *mm; + int ret = 0; + + /* During release of the GEM object we hold the struct_mutex. This + * precludes us from calling mmput() at that time as that may be + * the last reference and so call exit_mmap(). exit_mmap() will + * attempt to reap the vma, and if we were holding a GTT mmap + * would then call drm_gem_vm_close() and attempt to reacquire + * the struct mutex. So in order to avoid that recursion, we have + * to defer releasing the mm reference until after we drop the + * struct_mutex, i.e. we need to schedule a worker to do the clean + * up. + */ + mutex_lock(&dev_priv->mm_lock); + mm = __i915_mm_struct_find(dev_priv, current->mm); + if (mm == NULL) { + mm = kmalloc(sizeof(*mm), GFP_KERNEL); + if (mm == NULL) { + ret = -ENOMEM; + goto out; + } + + kref_init(&mm->kref); + mm->i915 = to_i915(obj->base.dev); + + mm->mm = current->mm; + mmgrab(current->mm); + + mm->mn = NULL; + + /* Protected by dev_priv->mm_lock */ + hash_add(dev_priv->mm_structs, + &mm->node, (unsigned long)mm->mm); + } else + kref_get(&mm->kref); + + obj->userptr.mm = mm; +out: + mutex_unlock(&dev_priv->mm_lock); + return ret; +} + +static void +__i915_mm_struct_free__worker(struct work_struct *work) +{ + struct i915_mm_struct *mm = container_of(work, typeof(*mm), work); + i915_mmu_notifier_free(mm->mn, mm->mm); + mmdrop(mm->mm); + kfree(mm); +} + +static void +__i915_mm_struct_free(struct kref *kref) +{ + struct i915_mm_struct *mm = container_of(kref, typeof(*mm), kref); + + /* Protected by dev_priv->mm_lock */ + hash_del(&mm->node); + mutex_unlock(&mm->i915->mm_lock); + + INIT_WORK(&mm->work, __i915_mm_struct_free__worker); + queue_work(mm->i915->mm.userptr_wq, &mm->work); +} + +static void +i915_gem_userptr_release__mm_struct(struct drm_i915_gem_object *obj) +{ + if (obj->userptr.mm == NULL) + return; + + kref_put_mutex(&obj->userptr.mm->kref, + __i915_mm_struct_free, + &to_i915(obj->base.dev)->mm_lock); + obj->userptr.mm = NULL; +} + +struct get_pages_work { + struct work_struct work; + struct drm_i915_gem_object *obj; + struct task_struct *task; +}; + +static struct sg_table * +__i915_gem_userptr_alloc_pages(struct drm_i915_gem_object *obj, + struct page **pvec, int num_pages) +{ + unsigned int max_segment = i915_sg_segment_size(); + struct sg_table *st; + unsigned int sg_page_sizes; + int ret; + + st = kmalloc(sizeof(*st), GFP_KERNEL); + if (!st) + return ERR_PTR(-ENOMEM); + +alloc_table: + ret = __sg_alloc_table_from_pages(st, pvec, num_pages, + 0, num_pages << PAGE_SHIFT, + max_segment, + GFP_KERNEL); + if (ret) { + kfree(st); + return ERR_PTR(ret); + } + + ret = i915_gem_gtt_prepare_pages(obj, st); + if (ret) { + sg_free_table(st); + + if (max_segment > PAGE_SIZE) { + max_segment = PAGE_SIZE; + goto alloc_table; + } + + kfree(st); + return ERR_PTR(ret); + } + + sg_page_sizes = i915_sg_page_sizes(st->sgl); + + __i915_gem_object_set_pages(obj, st, sg_page_sizes); + + return st; +} + +static void +__i915_gem_userptr_get_pages_worker(struct work_struct *_work) +{ + struct get_pages_work *work = container_of(_work, typeof(*work), work); + struct drm_i915_gem_object *obj = work->obj; + const int npages = obj->base.size >> PAGE_SHIFT; + struct page **pvec; + int pinned, ret; + + ret = -ENOMEM; + pinned = 0; + + pvec = kvmalloc_array(npages, sizeof(struct page *), GFP_KERNEL); + if (pvec != NULL) { + struct mm_struct *mm = obj->userptr.mm->mm; + unsigned int flags = 0; + + if (!i915_gem_object_is_readonly(obj)) + flags |= FOLL_WRITE; + + ret = -EFAULT; + if (mmget_not_zero(mm)) { + down_read(&mm->mmap_sem); + while (pinned < npages) { + ret = get_user_pages_remote + (work->task, mm, + obj->userptr.ptr + pinned * PAGE_SIZE, + npages - pinned, + flags, + pvec + pinned, NULL, NULL); + if (ret < 0) + break; + + pinned += ret; + } + up_read(&mm->mmap_sem); + mmput(mm); + } + } + + mutex_lock(&obj->mm.lock); + if (obj->userptr.work == &work->work) { + struct sg_table *pages = ERR_PTR(ret); + + if (pinned == npages) { + pages = __i915_gem_userptr_alloc_pages(obj, pvec, + npages); + if (!IS_ERR(pages)) { + pinned = 0; + pages = NULL; + } + } + + obj->userptr.work = ERR_CAST(pages); + if (IS_ERR(pages)) + __i915_gem_userptr_set_active(obj, false); + } + mutex_unlock(&obj->mm.lock); + + release_pages(pvec, pinned); + kvfree(pvec); + + i915_gem_object_put(obj); + put_task_struct(work->task); + kfree(work); +} + +static struct sg_table * +__i915_gem_userptr_get_pages_schedule(struct drm_i915_gem_object *obj) +{ + struct get_pages_work *work; + + /* Spawn a worker so that we can acquire the + * user pages without holding our mutex. Access + * to the user pages requires mmap_sem, and we have + * a strict lock ordering of mmap_sem, struct_mutex - + * we already hold struct_mutex here and so cannot + * call gup without encountering a lock inversion. + * + * Userspace will keep on repeating the operation + * (thanks to EAGAIN) until either we hit the fast + * path or the worker completes. If the worker is + * cancelled or superseded, the task is still run + * but the results ignored. (This leads to + * complications that we may have a stray object + * refcount that we need to be wary of when + * checking for existing objects during creation.) + * If the worker encounters an error, it reports + * that error back to this function through + * obj->userptr.work = ERR_PTR. + */ + work = kmalloc(sizeof(*work), GFP_KERNEL); + if (work == NULL) + return ERR_PTR(-ENOMEM); + + obj->userptr.work = &work->work; + + work->obj = i915_gem_object_get(obj); + + work->task = current; + get_task_struct(work->task); + + INIT_WORK(&work->work, __i915_gem_userptr_get_pages_worker); + queue_work(to_i915(obj->base.dev)->mm.userptr_wq, &work->work); + + return ERR_PTR(-EAGAIN); +} + +static int i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj) +{ + const int num_pages = obj->base.size >> PAGE_SHIFT; + struct mm_struct *mm = obj->userptr.mm->mm; + struct page **pvec; + struct sg_table *pages; + bool active; + int pinned; + + /* If userspace should engineer that these pages are replaced in + * the vma between us binding this page into the GTT and completion + * of rendering... Their loss. If they change the mapping of their + * pages they need to create a new bo to point to the new vma. + * + * However, that still leaves open the possibility of the vma + * being copied upon fork. Which falls under the same userspace + * synchronisation issue as a regular bo, except that this time + * the process may not be expecting that a particular piece of + * memory is tied to the GPU. + * + * Fortunately, we can hook into the mmu_notifier in order to + * discard the page references prior to anything nasty happening + * to the vma (discard or cloning) which should prevent the more + * egregious cases from causing harm. + */ + + if (obj->userptr.work) { + /* active flag should still be held for the pending work */ + if (IS_ERR(obj->userptr.work)) + return PTR_ERR(obj->userptr.work); + else + return -EAGAIN; + } + + pvec = NULL; + pinned = 0; + + if (mm == current->mm) { + pvec = kvmalloc_array(num_pages, sizeof(struct page *), + GFP_KERNEL | + __GFP_NORETRY | + __GFP_NOWARN); + if (pvec) /* defer to worker if malloc fails */ + pinned = __get_user_pages_fast(obj->userptr.ptr, + num_pages, + !i915_gem_object_is_readonly(obj), + pvec); + } + + active = false; + if (pinned < 0) { + pages = ERR_PTR(pinned); + pinned = 0; + } else if (pinned < num_pages) { + pages = __i915_gem_userptr_get_pages_schedule(obj); + active = pages == ERR_PTR(-EAGAIN); + } else { + pages = __i915_gem_userptr_alloc_pages(obj, pvec, num_pages); + active = !IS_ERR(pages); + } + if (active) + __i915_gem_userptr_set_active(obj, true); + + if (IS_ERR(pages)) + release_pages(pvec, pinned); + kvfree(pvec); + + return PTR_ERR_OR_ZERO(pages); +} + +static void +i915_gem_userptr_put_pages(struct drm_i915_gem_object *obj, + struct sg_table *pages) +{ + struct sgt_iter sgt_iter; + struct page *page; + + /* Cancel any inflight work and force them to restart their gup */ + obj->userptr.work = NULL; + __i915_gem_userptr_set_active(obj, false); + if (!pages) + return; + + __i915_gem_object_release_shmem(obj, pages, true); + i915_gem_gtt_finish_pages(obj, pages); + + for_each_sgt_page(page, sgt_iter, pages) { + if (obj->mm.dirty) + set_page_dirty(page); + + mark_page_accessed(page); + put_page(page); + } + obj->mm.dirty = false; + + sg_free_table(pages); + kfree(pages); +} + +static void +i915_gem_userptr_release(struct drm_i915_gem_object *obj) +{ + i915_gem_userptr_release__mmu_notifier(obj); + i915_gem_userptr_release__mm_struct(obj); +} + +static int +i915_gem_userptr_dmabuf_export(struct drm_i915_gem_object *obj) +{ + if (obj->userptr.mmu_object) + return 0; + + return i915_gem_userptr_init__mmu_notifier(obj, 0); +} + +static const struct drm_i915_gem_object_ops i915_gem_userptr_ops = { + .flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE | + I915_GEM_OBJECT_IS_SHRINKABLE | + I915_GEM_OBJECT_ASYNC_CANCEL, + .get_pages = i915_gem_userptr_get_pages, + .put_pages = i915_gem_userptr_put_pages, + .dmabuf_export = i915_gem_userptr_dmabuf_export, + .release = i915_gem_userptr_release, +}; + +/* + * Creates a new mm object that wraps some normal memory from the process + * context - user memory. + * + * We impose several restrictions upon the memory being mapped + * into the GPU. + * 1. It must be page aligned (both start/end addresses, i.e ptr and size). + * 2. It must be normal system memory, not a pointer into another map of IO + * space (e.g. it must not be a GTT mmapping of another object). + * 3. We only allow a bo as large as we could in theory map into the GTT, + * that is we limit the size to the total size of the GTT. + * 4. The bo is marked as being snoopable. The backing pages are left + * accessible directly by the CPU, but reads and writes by the GPU may + * incur the cost of a snoop (unless you have an LLC architecture). + * + * Synchronisation between multiple users and the GPU is left to userspace + * through the normal set-domain-ioctl. The kernel will enforce that the + * GPU relinquishes the VMA before it is returned back to the system + * i.e. upon free(), munmap() or process termination. However, the userspace + * malloc() library may not immediately relinquish the VMA after free() and + * instead reuse it whilst the GPU is still reading and writing to the VMA. + * Caveat emptor. + * + * Also note, that the object created here is not currently a "first class" + * object, in that several ioctls are banned. These are the CPU access + * ioctls: mmap(), pwrite and pread. In practice, you are expected to use + * direct access via your pointer rather than use those ioctls. Another + * restriction is that we do not allow userptr surfaces to be pinned to the + * hardware and so we reject any attempt to create a framebuffer out of a + * userptr. + * + * If you think this is a good interface to use to pass GPU memory between + * drivers, please use dma-buf instead. In fact, wherever possible use + * dma-buf instead. + */ +int +i915_gem_userptr_ioctl(struct drm_device *dev, + void *data, + struct drm_file *file) +{ + struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_gem_userptr *args = data; + struct drm_i915_gem_object *obj; + int ret; + u32 handle; + + if (!HAS_LLC(dev_priv) && !HAS_SNOOP(dev_priv)) { + /* We cannot support coherent userptr objects on hw without + * LLC and broken snooping. + */ + return -ENODEV; + } + + if (args->flags & ~(I915_USERPTR_READ_ONLY | + I915_USERPTR_UNSYNCHRONIZED)) + return -EINVAL; + + if (!args->user_size) + return -EINVAL; + + if (offset_in_page(args->user_ptr | args->user_size)) + return -EINVAL; + + if (!access_ok((char __user *)(unsigned long)args->user_ptr, args->user_size)) + return -EFAULT; + + if (args->flags & I915_USERPTR_READ_ONLY) { + struct i915_hw_ppgtt *ppgtt; + + /* + * On almost all of the older hw, we cannot tell the GPU that + * a page is readonly. + */ + ppgtt = dev_priv->kernel_context->ppgtt; + if (!ppgtt || !ppgtt->vm.has_read_only) + return -ENODEV; + } + + obj = i915_gem_object_alloc(); + if (obj == NULL) + return -ENOMEM; + + drm_gem_private_object_init(dev, &obj->base, args->user_size); + i915_gem_object_init(obj, &i915_gem_userptr_ops); + obj->read_domains = I915_GEM_DOMAIN_CPU; + obj->write_domain = I915_GEM_DOMAIN_CPU; + i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC); + + obj->userptr.ptr = args->user_ptr; + if (args->flags & I915_USERPTR_READ_ONLY) + i915_gem_object_set_readonly(obj); + + /* And keep a pointer to the current->mm for resolving the user pages + * at binding. This means that we need to hook into the mmu_notifier + * in order to detect if the mmu is destroyed. + */ + ret = i915_gem_userptr_init__mm_struct(obj); + if (ret == 0) + ret = i915_gem_userptr_init__mmu_notifier(obj, args->flags); + if (ret == 0) + ret = drm_gem_handle_create(file, &obj->base, &handle); + + /* drop reference from allocate - handle holds it now */ + i915_gem_object_put(obj); + if (ret) + return ret; + + args->handle = handle; + return 0; +} + +int i915_gem_init_userptr(struct drm_i915_private *dev_priv) +{ + mutex_init(&dev_priv->mm_lock); + hash_init(dev_priv->mm_structs); + + dev_priv->mm.userptr_wq = + alloc_workqueue("i915-userptr-acquire", + WQ_HIGHPRI | WQ_UNBOUND, + 0); + if (!dev_priv->mm.userptr_wq) + return -ENOMEM; + + return 0; +} + +void i915_gem_cleanup_userptr(struct drm_i915_private *dev_priv) +{ + destroy_workqueue(dev_priv->mm.userptr_wq); +} diff --git a/drivers/gpu/drm/i915/gem/i915_gemfs.c b/drivers/gpu/drm/i915/gem/i915_gemfs.c new file mode 100644 index 000000000000..099f3397aada --- /dev/null +++ b/drivers/gpu/drm/i915/gem/i915_gemfs.c @@ -0,0 +1,57 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2017 Intel Corporation + */ + +#include +#include +#include + +#include "i915_drv.h" +#include "i915_gemfs.h" + +int i915_gemfs_init(struct drm_i915_private *i915) +{ + struct file_system_type *type; + struct vfsmount *gemfs; + + type = get_fs_type("tmpfs"); + if (!type) + return -ENODEV; + + gemfs = kern_mount(type); + if (IS_ERR(gemfs)) + return PTR_ERR(gemfs); + + /* + * Enable huge-pages for objects that are at least HPAGE_PMD_SIZE, most + * likely 2M. Note that within_size may overallocate huge-pages, if say + * we allocate an object of size 2M + 4K, we may get 2M + 2M, but under + * memory pressure shmem should split any huge-pages which can be + * shrunk. + */ + + if (has_transparent_hugepage()) { + struct super_block *sb = gemfs->mnt_sb; + /* FIXME: Disabled until we get W/A for read BW issue. */ + char options[] = "huge=never"; + int flags = 0; + int err; + + err = sb->s_op->remount_fs(sb, &flags, options); + if (err) { + kern_unmount(gemfs); + return err; + } + } + + i915->mm.gemfs = gemfs; + + return 0; +} + +void i915_gemfs_fini(struct drm_i915_private *i915) +{ + kern_unmount(i915->mm.gemfs); +} diff --git a/drivers/gpu/drm/i915/gem/i915_gemfs.h b/drivers/gpu/drm/i915/gem/i915_gemfs.h new file mode 100644 index 000000000000..2a1e59af3e4a --- /dev/null +++ b/drivers/gpu/drm/i915/gem/i915_gemfs.h @@ -0,0 +1,16 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2017 Intel Corporation + */ + +#ifndef __I915_GEMFS_H__ +#define __I915_GEMFS_H__ + +struct drm_i915_private; + +int i915_gemfs_init(struct drm_i915_private *i915); + +void i915_gemfs_fini(struct drm_i915_private *i915); + +#endif diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.c b/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.c new file mode 100644 index 000000000000..824f3761314c --- /dev/null +++ b/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.c @@ -0,0 +1,121 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2016 Intel Corporation + */ + +#include "huge_gem_object.h" + +static void huge_free_pages(struct drm_i915_gem_object *obj, + struct sg_table *pages) +{ + unsigned long nreal = obj->scratch / PAGE_SIZE; + struct scatterlist *sg; + + for (sg = pages->sgl; sg && nreal--; sg = __sg_next(sg)) + __free_page(sg_page(sg)); + + sg_free_table(pages); + kfree(pages); +} + +static int huge_get_pages(struct drm_i915_gem_object *obj) +{ +#define GFP (GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY) + const unsigned long nreal = obj->scratch / PAGE_SIZE; + const unsigned long npages = obj->base.size / PAGE_SIZE; + struct scatterlist *sg, *src, *end; + struct sg_table *pages; + unsigned long n; + + pages = kmalloc(sizeof(*pages), GFP); + if (!pages) + return -ENOMEM; + + if (sg_alloc_table(pages, npages, GFP)) { + kfree(pages); + return -ENOMEM; + } + + sg = pages->sgl; + for (n = 0; n < nreal; n++) { + struct page *page; + + page = alloc_page(GFP | __GFP_HIGHMEM); + if (!page) { + sg_mark_end(sg); + goto err; + } + + sg_set_page(sg, page, PAGE_SIZE, 0); + sg = __sg_next(sg); + } + if (nreal < npages) { + for (end = sg, src = pages->sgl; sg; sg = __sg_next(sg)) { + sg_set_page(sg, sg_page(src), PAGE_SIZE, 0); + src = __sg_next(src); + if (src == end) + src = pages->sgl; + } + } + + if (i915_gem_gtt_prepare_pages(obj, pages)) + goto err; + + __i915_gem_object_set_pages(obj, pages, PAGE_SIZE); + + return 0; + +err: + huge_free_pages(obj, pages); + + return -ENOMEM; +#undef GFP +} + +static void huge_put_pages(struct drm_i915_gem_object *obj, + struct sg_table *pages) +{ + i915_gem_gtt_finish_pages(obj, pages); + huge_free_pages(obj, pages); + + obj->mm.dirty = false; +} + +static const struct drm_i915_gem_object_ops huge_ops = { + .flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE | + I915_GEM_OBJECT_IS_SHRINKABLE, + .get_pages = huge_get_pages, + .put_pages = huge_put_pages, +}; + +struct drm_i915_gem_object * +huge_gem_object(struct drm_i915_private *i915, + phys_addr_t phys_size, + dma_addr_t dma_size) +{ + struct drm_i915_gem_object *obj; + unsigned int cache_level; + + GEM_BUG_ON(!phys_size || phys_size > dma_size); + GEM_BUG_ON(!IS_ALIGNED(phys_size, PAGE_SIZE)); + GEM_BUG_ON(!IS_ALIGNED(dma_size, I915_GTT_PAGE_SIZE)); + + if (overflows_type(dma_size, obj->base.size)) + return ERR_PTR(-E2BIG); + + obj = i915_gem_object_alloc(); + if (!obj) + return ERR_PTR(-ENOMEM); + + drm_gem_private_object_init(&i915->drm, &obj->base, dma_size); + i915_gem_object_init(obj, &huge_ops); + + obj->read_domains = I915_GEM_DOMAIN_CPU; + obj->write_domain = I915_GEM_DOMAIN_CPU; + cache_level = HAS_LLC(i915) ? I915_CACHE_LLC : I915_CACHE_NONE; + i915_gem_object_set_cache_coherency(obj, cache_level); + obj->scratch = phys_size; + + return obj; +} diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.h b/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.h new file mode 100644 index 000000000000..549c1394bcdc --- /dev/null +++ b/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.h @@ -0,0 +1,27 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2016 Intel Corporation + */ + +#ifndef __HUGE_GEM_OBJECT_H +#define __HUGE_GEM_OBJECT_H + +struct drm_i915_gem_object * +huge_gem_object(struct drm_i915_private *i915, + phys_addr_t phys_size, + dma_addr_t dma_size); + +static inline phys_addr_t +huge_gem_object_phys_size(struct drm_i915_gem_object *obj) +{ + return obj->scratch; +} + +static inline dma_addr_t +huge_gem_object_dma_size(struct drm_i915_gem_object *obj) +{ + return obj->base.size; +} + +#endif /* !__HUGE_GEM_OBJECT_H */ diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c new file mode 100644 index 000000000000..7b437f06a9be --- /dev/null +++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c @@ -0,0 +1,1780 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2017 Intel Corporation + */ + +#include + +#include "i915_selftest.h" + +#include "gem/i915_gem_pm.h" + +#include "igt_gem_utils.h" +#include "mock_context.h" + +#include "selftests/mock_drm.h" +#include "selftests/mock_gem_device.h" +#include "selftests/i915_random.h" + +static const unsigned int page_sizes[] = { + I915_GTT_PAGE_SIZE_2M, + I915_GTT_PAGE_SIZE_64K, + I915_GTT_PAGE_SIZE_4K, +}; + +static unsigned int get_largest_page_size(struct drm_i915_private *i915, + u64 rem) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(page_sizes); ++i) { + unsigned int page_size = page_sizes[i]; + + if (HAS_PAGE_SIZES(i915, page_size) && rem >= page_size) + return page_size; + } + + return 0; +} + +static void huge_pages_free_pages(struct sg_table *st) +{ + struct scatterlist *sg; + + for (sg = st->sgl; sg; sg = __sg_next(sg)) { + if (sg_page(sg)) + __free_pages(sg_page(sg), get_order(sg->length)); + } + + sg_free_table(st); + kfree(st); +} + +static int get_huge_pages(struct drm_i915_gem_object *obj) +{ +#define GFP (GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY) + unsigned int page_mask = obj->mm.page_mask; + struct sg_table *st; + struct scatterlist *sg; + unsigned int sg_page_sizes; + u64 rem; + + st = kmalloc(sizeof(*st), GFP); + if (!st) + return -ENOMEM; + + if (sg_alloc_table(st, obj->base.size >> PAGE_SHIFT, GFP)) { + kfree(st); + return -ENOMEM; + } + + rem = obj->base.size; + sg = st->sgl; + st->nents = 0; + sg_page_sizes = 0; + + /* + * Our goal here is simple, we want to greedily fill the object from + * largest to smallest page-size, while ensuring that we use *every* + * page-size as per the given page-mask. + */ + do { + unsigned int bit = ilog2(page_mask); + unsigned int page_size = BIT(bit); + int order = get_order(page_size); + + do { + struct page *page; + + GEM_BUG_ON(order >= MAX_ORDER); + page = alloc_pages(GFP | __GFP_ZERO, order); + if (!page) + goto err; + + sg_set_page(sg, page, page_size, 0); + sg_page_sizes |= page_size; + st->nents++; + + rem -= page_size; + if (!rem) { + sg_mark_end(sg); + break; + } + + sg = __sg_next(sg); + } while ((rem - ((page_size-1) & page_mask)) >= page_size); + + page_mask &= (page_size-1); + } while (page_mask); + + if (i915_gem_gtt_prepare_pages(obj, st)) + goto err; + + obj->mm.madv = I915_MADV_DONTNEED; + + GEM_BUG_ON(sg_page_sizes != obj->mm.page_mask); + __i915_gem_object_set_pages(obj, st, sg_page_sizes); + + return 0; + +err: + sg_set_page(sg, NULL, 0, 0); + sg_mark_end(sg); + huge_pages_free_pages(st); + + return -ENOMEM; +} + +static void put_huge_pages(struct drm_i915_gem_object *obj, + struct sg_table *pages) +{ + i915_gem_gtt_finish_pages(obj, pages); + huge_pages_free_pages(pages); + + obj->mm.dirty = false; + obj->mm.madv = I915_MADV_WILLNEED; +} + +static const struct drm_i915_gem_object_ops huge_page_ops = { + .flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE | + I915_GEM_OBJECT_IS_SHRINKABLE, + .get_pages = get_huge_pages, + .put_pages = put_huge_pages, +}; + +static struct drm_i915_gem_object * +huge_pages_object(struct drm_i915_private *i915, + u64 size, + unsigned int page_mask) +{ + struct drm_i915_gem_object *obj; + + GEM_BUG_ON(!size); + GEM_BUG_ON(!IS_ALIGNED(size, BIT(__ffs(page_mask)))); + + if (size >> PAGE_SHIFT > INT_MAX) + return ERR_PTR(-E2BIG); + + if (overflows_type(size, obj->base.size)) + return ERR_PTR(-E2BIG); + + obj = i915_gem_object_alloc(); + if (!obj) + return ERR_PTR(-ENOMEM); + + drm_gem_private_object_init(&i915->drm, &obj->base, size); + i915_gem_object_init(obj, &huge_page_ops); + + obj->write_domain = I915_GEM_DOMAIN_CPU; + obj->read_domains = I915_GEM_DOMAIN_CPU; + obj->cache_level = I915_CACHE_NONE; + + obj->mm.page_mask = page_mask; + + return obj; +} + +static int fake_get_huge_pages(struct drm_i915_gem_object *obj) +{ + struct drm_i915_private *i915 = to_i915(obj->base.dev); + const u64 max_len = rounddown_pow_of_two(UINT_MAX); + struct sg_table *st; + struct scatterlist *sg; + unsigned int sg_page_sizes; + u64 rem; + + st = kmalloc(sizeof(*st), GFP); + if (!st) + return -ENOMEM; + + if (sg_alloc_table(st, obj->base.size >> PAGE_SHIFT, GFP)) { + kfree(st); + return -ENOMEM; + } + + /* Use optimal page sized chunks to fill in the sg table */ + rem = obj->base.size; + sg = st->sgl; + st->nents = 0; + sg_page_sizes = 0; + do { + unsigned int page_size = get_largest_page_size(i915, rem); + unsigned int len = min(page_size * div_u64(rem, page_size), + max_len); + + GEM_BUG_ON(!page_size); + + sg->offset = 0; + sg->length = len; + sg_dma_len(sg) = len; + sg_dma_address(sg) = page_size; + + sg_page_sizes |= len; + + st->nents++; + + rem -= len; + if (!rem) { + sg_mark_end(sg); + break; + } + + sg = sg_next(sg); + } while (1); + + i915_sg_trim(st); + + obj->mm.madv = I915_MADV_DONTNEED; + + __i915_gem_object_set_pages(obj, st, sg_page_sizes); + + return 0; +} + +static int fake_get_huge_pages_single(struct drm_i915_gem_object *obj) +{ + struct drm_i915_private *i915 = to_i915(obj->base.dev); + struct sg_table *st; + struct scatterlist *sg; + unsigned int page_size; + + st = kmalloc(sizeof(*st), GFP); + if (!st) + return -ENOMEM; + + if (sg_alloc_table(st, 1, GFP)) { + kfree(st); + return -ENOMEM; + } + + sg = st->sgl; + st->nents = 1; + + page_size = get_largest_page_size(i915, obj->base.size); + GEM_BUG_ON(!page_size); + + sg->offset = 0; + sg->length = obj->base.size; + sg_dma_len(sg) = obj->base.size; + sg_dma_address(sg) = page_size; + + obj->mm.madv = I915_MADV_DONTNEED; + + __i915_gem_object_set_pages(obj, st, sg->length); + + return 0; +#undef GFP +} + +static void fake_free_huge_pages(struct drm_i915_gem_object *obj, + struct sg_table *pages) +{ + sg_free_table(pages); + kfree(pages); +} + +static void fake_put_huge_pages(struct drm_i915_gem_object *obj, + struct sg_table *pages) +{ + fake_free_huge_pages(obj, pages); + obj->mm.dirty = false; + obj->mm.madv = I915_MADV_WILLNEED; +} + +static const struct drm_i915_gem_object_ops fake_ops = { + .flags = I915_GEM_OBJECT_IS_SHRINKABLE, + .get_pages = fake_get_huge_pages, + .put_pages = fake_put_huge_pages, +}; + +static const struct drm_i915_gem_object_ops fake_ops_single = { + .flags = I915_GEM_OBJECT_IS_SHRINKABLE, + .get_pages = fake_get_huge_pages_single, + .put_pages = fake_put_huge_pages, +}; + +static struct drm_i915_gem_object * +fake_huge_pages_object(struct drm_i915_private *i915, u64 size, bool single) +{ + struct drm_i915_gem_object *obj; + + GEM_BUG_ON(!size); + GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE)); + + if (size >> PAGE_SHIFT > UINT_MAX) + return ERR_PTR(-E2BIG); + + if (overflows_type(size, obj->base.size)) + return ERR_PTR(-E2BIG); + + obj = i915_gem_object_alloc(); + if (!obj) + return ERR_PTR(-ENOMEM); + + drm_gem_private_object_init(&i915->drm, &obj->base, size); + + if (single) + i915_gem_object_init(obj, &fake_ops_single); + else + i915_gem_object_init(obj, &fake_ops); + + obj->write_domain = I915_GEM_DOMAIN_CPU; + obj->read_domains = I915_GEM_DOMAIN_CPU; + obj->cache_level = I915_CACHE_NONE; + + return obj; +} + +static int igt_check_page_sizes(struct i915_vma *vma) +{ + struct drm_i915_private *i915 = vma->vm->i915; + unsigned int supported = INTEL_INFO(i915)->page_sizes; + struct drm_i915_gem_object *obj = vma->obj; + int err = 0; + + if (!HAS_PAGE_SIZES(i915, vma->page_sizes.sg)) { + pr_err("unsupported page_sizes.sg=%u, supported=%u\n", + vma->page_sizes.sg & ~supported, supported); + err = -EINVAL; + } + + if (!HAS_PAGE_SIZES(i915, vma->page_sizes.gtt)) { + pr_err("unsupported page_sizes.gtt=%u, supported=%u\n", + vma->page_sizes.gtt & ~supported, supported); + err = -EINVAL; + } + + if (vma->page_sizes.phys != obj->mm.page_sizes.phys) { + pr_err("vma->page_sizes.phys(%u) != obj->mm.page_sizes.phys(%u)\n", + vma->page_sizes.phys, obj->mm.page_sizes.phys); + err = -EINVAL; + } + + if (vma->page_sizes.sg != obj->mm.page_sizes.sg) { + pr_err("vma->page_sizes.sg(%u) != obj->mm.page_sizes.sg(%u)\n", + vma->page_sizes.sg, obj->mm.page_sizes.sg); + err = -EINVAL; + } + + if (obj->mm.page_sizes.gtt) { + pr_err("obj->page_sizes.gtt(%u) should never be set\n", + obj->mm.page_sizes.gtt); + err = -EINVAL; + } + + return err; +} + +static int igt_mock_exhaust_device_supported_pages(void *arg) +{ + struct i915_hw_ppgtt *ppgtt = arg; + struct drm_i915_private *i915 = ppgtt->vm.i915; + unsigned int saved_mask = INTEL_INFO(i915)->page_sizes; + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + int i, j, single; + int err; + + /* + * Sanity check creating objects with every valid page support + * combination for our mock device. + */ + + for (i = 1; i < BIT(ARRAY_SIZE(page_sizes)); i++) { + unsigned int combination = 0; + + for (j = 0; j < ARRAY_SIZE(page_sizes); j++) { + if (i & BIT(j)) + combination |= page_sizes[j]; + } + + mkwrite_device_info(i915)->page_sizes = combination; + + for (single = 0; single <= 1; ++single) { + obj = fake_huge_pages_object(i915, combination, !!single); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto out_device; + } + + if (obj->base.size != combination) { + pr_err("obj->base.size=%zu, expected=%u\n", + obj->base.size, combination); + err = -EINVAL; + goto out_put; + } + + vma = i915_vma_instance(obj, &ppgtt->vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto out_put; + } + + err = i915_vma_pin(vma, 0, 0, PIN_USER); + if (err) + goto out_close; + + err = igt_check_page_sizes(vma); + + if (vma->page_sizes.sg != combination) { + pr_err("page_sizes.sg=%u, expected=%u\n", + vma->page_sizes.sg, combination); + err = -EINVAL; + } + + i915_vma_unpin(vma); + i915_vma_close(vma); + + i915_gem_object_put(obj); + + if (err) + goto out_device; + } + } + + goto out_device; + +out_close: + i915_vma_close(vma); +out_put: + i915_gem_object_put(obj); +out_device: + mkwrite_device_info(i915)->page_sizes = saved_mask; + + return err; +} + +static int igt_mock_ppgtt_misaligned_dma(void *arg) +{ + struct i915_hw_ppgtt *ppgtt = arg; + struct drm_i915_private *i915 = ppgtt->vm.i915; + unsigned long supported = INTEL_INFO(i915)->page_sizes; + struct drm_i915_gem_object *obj; + int bit; + int err; + + /* + * Sanity check dma misalignment for huge pages -- the dma addresses we + * insert into the paging structures need to always respect the page + * size alignment. + */ + + bit = ilog2(I915_GTT_PAGE_SIZE_64K); + + for_each_set_bit_from(bit, &supported, + ilog2(I915_GTT_MAX_PAGE_SIZE) + 1) { + IGT_TIMEOUT(end_time); + unsigned int page_size = BIT(bit); + unsigned int flags = PIN_USER | PIN_OFFSET_FIXED; + unsigned int offset; + unsigned int size = + round_up(page_size, I915_GTT_PAGE_SIZE_2M) << 1; + struct i915_vma *vma; + + obj = fake_huge_pages_object(i915, size, true); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + if (obj->base.size != size) { + pr_err("obj->base.size=%zu, expected=%u\n", + obj->base.size, size); + err = -EINVAL; + goto out_put; + } + + err = i915_gem_object_pin_pages(obj); + if (err) + goto out_put; + + /* Force the page size for this object */ + obj->mm.page_sizes.sg = page_size; + + vma = i915_vma_instance(obj, &ppgtt->vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto out_unpin; + } + + err = i915_vma_pin(vma, 0, 0, flags); + if (err) { + i915_vma_close(vma); + goto out_unpin; + } + + + err = igt_check_page_sizes(vma); + + if (vma->page_sizes.gtt != page_size) { + pr_err("page_sizes.gtt=%u, expected %u\n", + vma->page_sizes.gtt, page_size); + err = -EINVAL; + } + + i915_vma_unpin(vma); + + if (err) { + i915_vma_close(vma); + goto out_unpin; + } + + /* + * Try all the other valid offsets until the next + * boundary -- should always fall back to using 4K + * pages. + */ + for (offset = 4096; offset < page_size; offset += 4096) { + err = i915_vma_unbind(vma); + if (err) { + i915_vma_close(vma); + goto out_unpin; + } + + err = i915_vma_pin(vma, 0, 0, flags | offset); + if (err) { + i915_vma_close(vma); + goto out_unpin; + } + + err = igt_check_page_sizes(vma); + + if (vma->page_sizes.gtt != I915_GTT_PAGE_SIZE_4K) { + pr_err("page_sizes.gtt=%u, expected %llu\n", + vma->page_sizes.gtt, I915_GTT_PAGE_SIZE_4K); + err = -EINVAL; + } + + i915_vma_unpin(vma); + + if (err) { + i915_vma_close(vma); + goto out_unpin; + } + + if (igt_timeout(end_time, + "%s timed out at offset %x with page-size %x\n", + __func__, offset, page_size)) + break; + } + + i915_vma_close(vma); + + i915_gem_object_unpin_pages(obj); + __i915_gem_object_put_pages(obj, I915_MM_NORMAL); + i915_gem_object_put(obj); + } + + return 0; + +out_unpin: + i915_gem_object_unpin_pages(obj); +out_put: + i915_gem_object_put(obj); + + return err; +} + +static void close_object_list(struct list_head *objects, + struct i915_hw_ppgtt *ppgtt) +{ + struct drm_i915_gem_object *obj, *on; + + list_for_each_entry_safe(obj, on, objects, st_link) { + struct i915_vma *vma; + + vma = i915_vma_instance(obj, &ppgtt->vm, NULL); + if (!IS_ERR(vma)) + i915_vma_close(vma); + + list_del(&obj->st_link); + i915_gem_object_unpin_pages(obj); + __i915_gem_object_put_pages(obj, I915_MM_NORMAL); + i915_gem_object_put(obj); + } +} + +static int igt_mock_ppgtt_huge_fill(void *arg) +{ + struct i915_hw_ppgtt *ppgtt = arg; + struct drm_i915_private *i915 = ppgtt->vm.i915; + unsigned long max_pages = ppgtt->vm.total >> PAGE_SHIFT; + unsigned long page_num; + bool single = false; + LIST_HEAD(objects); + IGT_TIMEOUT(end_time); + int err = -ENODEV; + + for_each_prime_number_from(page_num, 1, max_pages) { + struct drm_i915_gem_object *obj; + u64 size = page_num << PAGE_SHIFT; + struct i915_vma *vma; + unsigned int expected_gtt = 0; + int i; + + obj = fake_huge_pages_object(i915, size, single); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + break; + } + + if (obj->base.size != size) { + pr_err("obj->base.size=%zd, expected=%llu\n", + obj->base.size, size); + i915_gem_object_put(obj); + err = -EINVAL; + break; + } + + err = i915_gem_object_pin_pages(obj); + if (err) { + i915_gem_object_put(obj); + break; + } + + list_add(&obj->st_link, &objects); + + vma = i915_vma_instance(obj, &ppgtt->vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + break; + } + + err = i915_vma_pin(vma, 0, 0, PIN_USER); + if (err) + break; + + err = igt_check_page_sizes(vma); + if (err) { + i915_vma_unpin(vma); + break; + } + + /* + * Figure out the expected gtt page size knowing that we go from + * largest to smallest page size sg chunks, and that we align to + * the largest page size. + */ + for (i = 0; i < ARRAY_SIZE(page_sizes); ++i) { + unsigned int page_size = page_sizes[i]; + + if (HAS_PAGE_SIZES(i915, page_size) && + size >= page_size) { + expected_gtt |= page_size; + size &= page_size-1; + } + } + + GEM_BUG_ON(!expected_gtt); + GEM_BUG_ON(size); + + if (expected_gtt & I915_GTT_PAGE_SIZE_4K) + expected_gtt &= ~I915_GTT_PAGE_SIZE_64K; + + i915_vma_unpin(vma); + + if (vma->page_sizes.sg & I915_GTT_PAGE_SIZE_64K) { + if (!IS_ALIGNED(vma->node.start, + I915_GTT_PAGE_SIZE_2M)) { + pr_err("node.start(%llx) not aligned to 2M\n", + vma->node.start); + err = -EINVAL; + break; + } + + if (!IS_ALIGNED(vma->node.size, + I915_GTT_PAGE_SIZE_2M)) { + pr_err("node.size(%llx) not aligned to 2M\n", + vma->node.size); + err = -EINVAL; + break; + } + } + + if (vma->page_sizes.gtt != expected_gtt) { + pr_err("gtt=%u, expected=%u, size=%zd, single=%s\n", + vma->page_sizes.gtt, expected_gtt, + obj->base.size, yesno(!!single)); + err = -EINVAL; + break; + } + + if (igt_timeout(end_time, + "%s timed out at size %zd\n", + __func__, obj->base.size)) + break; + + single = !single; + } + + close_object_list(&objects, ppgtt); + + if (err == -ENOMEM || err == -ENOSPC) + err = 0; + + return err; +} + +static int igt_mock_ppgtt_64K(void *arg) +{ + struct i915_hw_ppgtt *ppgtt = arg; + struct drm_i915_private *i915 = ppgtt->vm.i915; + struct drm_i915_gem_object *obj; + const struct object_info { + unsigned int size; + unsigned int gtt; + unsigned int offset; + } objects[] = { + /* Cases with forced padding/alignment */ + { + .size = SZ_64K, + .gtt = I915_GTT_PAGE_SIZE_64K, + .offset = 0, + }, + { + .size = SZ_64K + SZ_4K, + .gtt = I915_GTT_PAGE_SIZE_4K, + .offset = 0, + }, + { + .size = SZ_64K - SZ_4K, + .gtt = I915_GTT_PAGE_SIZE_4K, + .offset = 0, + }, + { + .size = SZ_2M, + .gtt = I915_GTT_PAGE_SIZE_64K, + .offset = 0, + }, + { + .size = SZ_2M - SZ_4K, + .gtt = I915_GTT_PAGE_SIZE_4K, + .offset = 0, + }, + { + .size = SZ_2M + SZ_4K, + .gtt = I915_GTT_PAGE_SIZE_64K | I915_GTT_PAGE_SIZE_4K, + .offset = 0, + }, + { + .size = SZ_2M + SZ_64K, + .gtt = I915_GTT_PAGE_SIZE_64K, + .offset = 0, + }, + { + .size = SZ_2M - SZ_64K, + .gtt = I915_GTT_PAGE_SIZE_64K, + .offset = 0, + }, + /* Try without any forced padding/alignment */ + { + .size = SZ_64K, + .offset = SZ_2M, + .gtt = I915_GTT_PAGE_SIZE_4K, + }, + { + .size = SZ_128K, + .offset = SZ_2M - SZ_64K, + .gtt = I915_GTT_PAGE_SIZE_4K, + }, + }; + struct i915_vma *vma; + int i, single; + int err; + + /* + * Sanity check some of the trickiness with 64K pages -- either we can + * safely mark the whole page-table(2M block) as 64K, or we have to + * always fallback to 4K. + */ + + if (!HAS_PAGE_SIZES(i915, I915_GTT_PAGE_SIZE_64K)) + return 0; + + for (i = 0; i < ARRAY_SIZE(objects); ++i) { + unsigned int size = objects[i].size; + unsigned int expected_gtt = objects[i].gtt; + unsigned int offset = objects[i].offset; + unsigned int flags = PIN_USER; + + for (single = 0; single <= 1; single++) { + obj = fake_huge_pages_object(i915, size, !!single); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + err = i915_gem_object_pin_pages(obj); + if (err) + goto out_object_put; + + /* + * Disable 2M pages -- We only want to use 64K/4K pages + * for this test. + */ + obj->mm.page_sizes.sg &= ~I915_GTT_PAGE_SIZE_2M; + + vma = i915_vma_instance(obj, &ppgtt->vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto out_object_unpin; + } + + if (offset) + flags |= PIN_OFFSET_FIXED | offset; + + err = i915_vma_pin(vma, 0, 0, flags); + if (err) + goto out_vma_close; + + err = igt_check_page_sizes(vma); + if (err) + goto out_vma_unpin; + + if (!offset && vma->page_sizes.sg & I915_GTT_PAGE_SIZE_64K) { + if (!IS_ALIGNED(vma->node.start, + I915_GTT_PAGE_SIZE_2M)) { + pr_err("node.start(%llx) not aligned to 2M\n", + vma->node.start); + err = -EINVAL; + goto out_vma_unpin; + } + + if (!IS_ALIGNED(vma->node.size, + I915_GTT_PAGE_SIZE_2M)) { + pr_err("node.size(%llx) not aligned to 2M\n", + vma->node.size); + err = -EINVAL; + goto out_vma_unpin; + } + } + + if (vma->page_sizes.gtt != expected_gtt) { + pr_err("gtt=%u, expected=%u, i=%d, single=%s\n", + vma->page_sizes.gtt, expected_gtt, i, + yesno(!!single)); + err = -EINVAL; + goto out_vma_unpin; + } + + i915_vma_unpin(vma); + i915_vma_close(vma); + + i915_gem_object_unpin_pages(obj); + __i915_gem_object_put_pages(obj, I915_MM_NORMAL); + i915_gem_object_put(obj); + } + } + + return 0; + +out_vma_unpin: + i915_vma_unpin(vma); +out_vma_close: + i915_vma_close(vma); +out_object_unpin: + i915_gem_object_unpin_pages(obj); +out_object_put: + i915_gem_object_put(obj); + + return err; +} + +static struct i915_vma * +gpu_write_dw(struct i915_vma *vma, u64 offset, u32 val) +{ + struct drm_i915_private *i915 = vma->vm->i915; + const int gen = INTEL_GEN(i915); + unsigned int count = vma->size >> PAGE_SHIFT; + struct drm_i915_gem_object *obj; + struct i915_vma *batch; + unsigned int size; + u32 *cmd; + int n; + int err; + + size = (1 + 4 * count) * sizeof(u32); + size = round_up(size, PAGE_SIZE); + obj = i915_gem_object_create_internal(i915, size); + if (IS_ERR(obj)) + return ERR_CAST(obj); + + cmd = i915_gem_object_pin_map(obj, I915_MAP_WC); + if (IS_ERR(cmd)) { + err = PTR_ERR(cmd); + goto err; + } + + offset += vma->node.start; + + for (n = 0; n < count; n++) { + if (gen >= 8) { + *cmd++ = MI_STORE_DWORD_IMM_GEN4; + *cmd++ = lower_32_bits(offset); + *cmd++ = upper_32_bits(offset); + *cmd++ = val; + } else if (gen >= 4) { + *cmd++ = MI_STORE_DWORD_IMM_GEN4 | + (gen < 6 ? MI_USE_GGTT : 0); + *cmd++ = 0; + *cmd++ = offset; + *cmd++ = val; + } else { + *cmd++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL; + *cmd++ = offset; + *cmd++ = val; + } + + offset += PAGE_SIZE; + } + + *cmd = MI_BATCH_BUFFER_END; + i915_gem_chipset_flush(i915); + + i915_gem_object_unpin_map(obj); + + batch = i915_vma_instance(obj, vma->vm, NULL); + if (IS_ERR(batch)) { + err = PTR_ERR(batch); + goto err; + } + + err = i915_vma_pin(batch, 0, 0, PIN_USER); + if (err) + goto err; + + return batch; + +err: + i915_gem_object_put(obj); + + return ERR_PTR(err); +} + +static int gpu_write(struct i915_vma *vma, + struct i915_gem_context *ctx, + struct intel_engine_cs *engine, + u32 dword, + u32 value) +{ + struct i915_request *rq; + struct i915_vma *batch; + int err; + + GEM_BUG_ON(!intel_engine_can_store_dword(engine)); + + err = i915_gem_object_set_to_gtt_domain(vma->obj, true); + if (err) + return err; + + batch = gpu_write_dw(vma, dword * sizeof(u32), value); + if (IS_ERR(batch)) + return PTR_ERR(batch); + + rq = igt_request_alloc(ctx, engine); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto err_batch; + } + + err = i915_vma_move_to_active(batch, rq, 0); + if (err) + goto err_request; + + i915_gem_object_set_active_reference(batch->obj); + + err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); + if (err) + goto err_request; + + err = engine->emit_bb_start(rq, + batch->node.start, batch->node.size, + 0); +err_request: + if (err) + i915_request_skip(rq, err); + i915_request_add(rq); +err_batch: + i915_vma_unpin(batch); + i915_vma_close(batch); + + return err; +} + +static int cpu_check(struct drm_i915_gem_object *obj, u32 dword, u32 val) +{ + unsigned int needs_flush; + unsigned long n; + int err; + + err = i915_gem_object_prepare_read(obj, &needs_flush); + if (err) + return err; + + for (n = 0; n < obj->base.size >> PAGE_SHIFT; ++n) { + u32 *ptr = kmap_atomic(i915_gem_object_get_page(obj, n)); + + if (needs_flush & CLFLUSH_BEFORE) + drm_clflush_virt_range(ptr, PAGE_SIZE); + + if (ptr[dword] != val) { + pr_err("n=%lu ptr[%u]=%u, val=%u\n", + n, dword, ptr[dword], val); + kunmap_atomic(ptr); + err = -EINVAL; + break; + } + + kunmap_atomic(ptr); + } + + i915_gem_object_finish_access(obj); + + return err; +} + +static int __igt_write_huge(struct i915_gem_context *ctx, + struct intel_engine_cs *engine, + struct drm_i915_gem_object *obj, + u64 size, u64 offset, + u32 dword, u32 val) +{ + struct drm_i915_private *i915 = to_i915(obj->base.dev); + struct i915_address_space *vm = + ctx->ppgtt ? &ctx->ppgtt->vm : &i915->ggtt.vm; + unsigned int flags = PIN_USER | PIN_OFFSET_FIXED; + struct i915_vma *vma; + int err; + + vma = i915_vma_instance(obj, vm, NULL); + if (IS_ERR(vma)) + return PTR_ERR(vma); + + err = i915_vma_unbind(vma); + if (err) + goto out_vma_close; + + err = i915_vma_pin(vma, size, 0, flags | offset); + if (err) { + /* + * The ggtt may have some pages reserved so + * refrain from erroring out. + */ + if (err == -ENOSPC && i915_is_ggtt(vm)) + err = 0; + + goto out_vma_close; + } + + err = igt_check_page_sizes(vma); + if (err) + goto out_vma_unpin; + + err = gpu_write(vma, ctx, engine, dword, val); + if (err) { + pr_err("gpu-write failed at offset=%llx\n", offset); + goto out_vma_unpin; + } + + err = cpu_check(obj, dword, val); + if (err) { + pr_err("cpu-check failed at offset=%llx\n", offset); + goto out_vma_unpin; + } + +out_vma_unpin: + i915_vma_unpin(vma); +out_vma_close: + i915_vma_destroy(vma); + + return err; +} + +static int igt_write_huge(struct i915_gem_context *ctx, + struct drm_i915_gem_object *obj) +{ + struct drm_i915_private *i915 = to_i915(obj->base.dev); + struct i915_address_space *vm = + ctx->ppgtt ? &ctx->ppgtt->vm : &i915->ggtt.vm; + static struct intel_engine_cs *engines[I915_NUM_ENGINES]; + struct intel_engine_cs *engine; + I915_RND_STATE(prng); + IGT_TIMEOUT(end_time); + unsigned int max_page_size; + unsigned int id; + u64 max; + u64 num; + u64 size; + int *order; + int i, n; + int err = 0; + + GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); + + size = obj->base.size; + if (obj->mm.page_sizes.sg & I915_GTT_PAGE_SIZE_64K) + size = round_up(size, I915_GTT_PAGE_SIZE_2M); + + max_page_size = rounddown_pow_of_two(obj->mm.page_sizes.sg); + max = div_u64((vm->total - size), max_page_size); + + n = 0; + for_each_engine(engine, i915, id) { + if (!intel_engine_can_store_dword(engine)) { + pr_info("store-dword-imm not supported on engine=%u\n", + id); + continue; + } + engines[n++] = engine; + } + + if (!n) + return 0; + + /* + * To keep things interesting when alternating between engines in our + * randomized order, lets also make feeding to the same engine a few + * times in succession a possibility by enlarging the permutation array. + */ + order = i915_random_order(n * I915_NUM_ENGINES, &prng); + if (!order) + return -ENOMEM; + + /* + * Try various offsets in an ascending/descending fashion until we + * timeout -- we want to avoid issues hidden by effectively always using + * offset = 0. + */ + i = 0; + for_each_prime_number_from(num, 0, max) { + u64 offset_low = num * max_page_size; + u64 offset_high = (max - num) * max_page_size; + u32 dword = offset_in_page(num) / 4; + + engine = engines[order[i] % n]; + i = (i + 1) % (n * I915_NUM_ENGINES); + + /* + * In order to utilize 64K pages we need to both pad the vma + * size and ensure the vma offset is at the start of the pt + * boundary, however to improve coverage we opt for testing both + * aligned and unaligned offsets. + */ + if (obj->mm.page_sizes.sg & I915_GTT_PAGE_SIZE_64K) + offset_low = round_down(offset_low, + I915_GTT_PAGE_SIZE_2M); + + err = __igt_write_huge(ctx, engine, obj, size, offset_low, + dword, num + 1); + if (err) + break; + + err = __igt_write_huge(ctx, engine, obj, size, offset_high, + dword, num + 1); + if (err) + break; + + if (igt_timeout(end_time, + "%s timed out on engine=%u, offset_low=%llx offset_high=%llx, max_page_size=%x\n", + __func__, engine->id, offset_low, offset_high, + max_page_size)) + break; + } + + kfree(order); + + return err; +} + +static int igt_ppgtt_exhaust_huge(void *arg) +{ + struct i915_gem_context *ctx = arg; + struct drm_i915_private *i915 = ctx->i915; + unsigned long supported = INTEL_INFO(i915)->page_sizes; + static unsigned int pages[ARRAY_SIZE(page_sizes)]; + struct drm_i915_gem_object *obj; + unsigned int size_mask; + unsigned int page_mask; + int n, i; + int err = -ENODEV; + + if (supported == I915_GTT_PAGE_SIZE_4K) + return 0; + + /* + * Sanity check creating objects with a varying mix of page sizes -- + * ensuring that our writes lands in the right place. + */ + + n = 0; + for_each_set_bit(i, &supported, ilog2(I915_GTT_MAX_PAGE_SIZE) + 1) + pages[n++] = BIT(i); + + for (size_mask = 2; size_mask < BIT(n); size_mask++) { + unsigned int size = 0; + + for (i = 0; i < n; i++) { + if (size_mask & BIT(i)) + size |= pages[i]; + } + + /* + * For our page mask we want to enumerate all the page-size + * combinations which will fit into our chosen object size. + */ + for (page_mask = 2; page_mask <= size_mask; page_mask++) { + unsigned int page_sizes = 0; + + for (i = 0; i < n; i++) { + if (page_mask & BIT(i)) + page_sizes |= pages[i]; + } + + /* + * Ensure that we can actually fill the given object + * with our chosen page mask. + */ + if (!IS_ALIGNED(size, BIT(__ffs(page_sizes)))) + continue; + + obj = huge_pages_object(i915, size, page_sizes); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto out_device; + } + + err = i915_gem_object_pin_pages(obj); + if (err) { + i915_gem_object_put(obj); + + if (err == -ENOMEM) { + pr_info("unable to get pages, size=%u, pages=%u\n", + size, page_sizes); + err = 0; + break; + } + + pr_err("pin_pages failed, size=%u, pages=%u\n", + size_mask, page_mask); + + goto out_device; + } + + /* Force the page-size for the gtt insertion */ + obj->mm.page_sizes.sg = page_sizes; + + err = igt_write_huge(ctx, obj); + if (err) { + pr_err("exhaust write-huge failed with size=%u\n", + size); + goto out_unpin; + } + + i915_gem_object_unpin_pages(obj); + __i915_gem_object_put_pages(obj, I915_MM_NORMAL); + i915_gem_object_put(obj); + } + } + + goto out_device; + +out_unpin: + i915_gem_object_unpin_pages(obj); + i915_gem_object_put(obj); +out_device: + mkwrite_device_info(i915)->page_sizes = supported; + + return err; +} + +static int igt_ppgtt_internal_huge(void *arg) +{ + struct i915_gem_context *ctx = arg; + struct drm_i915_private *i915 = ctx->i915; + struct drm_i915_gem_object *obj; + static const unsigned int sizes[] = { + SZ_64K, + SZ_128K, + SZ_256K, + SZ_512K, + SZ_1M, + SZ_2M, + }; + int i; + int err; + + /* + * Sanity check that the HW uses huge pages correctly through internal + * -- ensure that our writes land in the right place. + */ + + for (i = 0; i < ARRAY_SIZE(sizes); ++i) { + unsigned int size = sizes[i]; + + obj = i915_gem_object_create_internal(i915, size); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + err = i915_gem_object_pin_pages(obj); + if (err) + goto out_put; + + if (obj->mm.page_sizes.phys < I915_GTT_PAGE_SIZE_64K) { + pr_info("internal unable to allocate huge-page(s) with size=%u\n", + size); + goto out_unpin; + } + + err = igt_write_huge(ctx, obj); + if (err) { + pr_err("internal write-huge failed with size=%u\n", + size); + goto out_unpin; + } + + i915_gem_object_unpin_pages(obj); + __i915_gem_object_put_pages(obj, I915_MM_NORMAL); + i915_gem_object_put(obj); + } + + return 0; + +out_unpin: + i915_gem_object_unpin_pages(obj); +out_put: + i915_gem_object_put(obj); + + return err; +} + +static inline bool igt_can_allocate_thp(struct drm_i915_private *i915) +{ + return i915->mm.gemfs && has_transparent_hugepage(); +} + +static int igt_ppgtt_gemfs_huge(void *arg) +{ + struct i915_gem_context *ctx = arg; + struct drm_i915_private *i915 = ctx->i915; + struct drm_i915_gem_object *obj; + static const unsigned int sizes[] = { + SZ_2M, + SZ_4M, + SZ_8M, + SZ_16M, + SZ_32M, + }; + int i; + int err; + + /* + * Sanity check that the HW uses huge pages correctly through gemfs -- + * ensure that our writes land in the right place. + */ + + if (!igt_can_allocate_thp(i915)) { + pr_info("missing THP support, skipping\n"); + return 0; + } + + for (i = 0; i < ARRAY_SIZE(sizes); ++i) { + unsigned int size = sizes[i]; + + obj = i915_gem_object_create_shmem(i915, size); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + err = i915_gem_object_pin_pages(obj); + if (err) + goto out_put; + + if (obj->mm.page_sizes.phys < I915_GTT_PAGE_SIZE_2M) { + pr_info("finishing test early, gemfs unable to allocate huge-page(s) with size=%u\n", + size); + goto out_unpin; + } + + err = igt_write_huge(ctx, obj); + if (err) { + pr_err("gemfs write-huge failed with size=%u\n", + size); + goto out_unpin; + } + + i915_gem_object_unpin_pages(obj); + __i915_gem_object_put_pages(obj, I915_MM_NORMAL); + i915_gem_object_put(obj); + } + + return 0; + +out_unpin: + i915_gem_object_unpin_pages(obj); +out_put: + i915_gem_object_put(obj); + + return err; +} + +static int igt_ppgtt_pin_update(void *arg) +{ + struct i915_gem_context *ctx = arg; + struct drm_i915_private *dev_priv = ctx->i915; + unsigned long supported = INTEL_INFO(dev_priv)->page_sizes; + struct i915_hw_ppgtt *ppgtt = ctx->ppgtt; + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + unsigned int flags = PIN_USER | PIN_OFFSET_FIXED; + int first, last; + int err; + + /* + * Make sure there's no funny business when doing a PIN_UPDATE -- in the + * past we had a subtle issue with being able to incorrectly do multiple + * alloc va ranges on the same object when doing a PIN_UPDATE, which + * resulted in some pretty nasty bugs, though only when using + * huge-gtt-pages. + */ + + if (!ppgtt || !i915_vm_is_4lvl(&ppgtt->vm)) { + pr_info("48b PPGTT not supported, skipping\n"); + return 0; + } + + first = ilog2(I915_GTT_PAGE_SIZE_64K); + last = ilog2(I915_GTT_PAGE_SIZE_2M); + + for_each_set_bit_from(first, &supported, last + 1) { + unsigned int page_size = BIT(first); + + obj = i915_gem_object_create_internal(dev_priv, page_size); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + vma = i915_vma_instance(obj, &ppgtt->vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto out_put; + } + + err = i915_vma_pin(vma, SZ_2M, 0, flags); + if (err) + goto out_close; + + if (vma->page_sizes.sg < page_size) { + pr_info("Unable to allocate page-size %x, finishing test early\n", + page_size); + goto out_unpin; + } + + err = igt_check_page_sizes(vma); + if (err) + goto out_unpin; + + if (vma->page_sizes.gtt != page_size) { + dma_addr_t addr = i915_gem_object_get_dma_address(obj, 0); + + /* + * The only valid reason for this to ever fail would be + * if the dma-mapper screwed us over when we did the + * dma_map_sg(), since it has the final say over the dma + * address. + */ + if (IS_ALIGNED(addr, page_size)) { + pr_err("page_sizes.gtt=%u, expected=%u\n", + vma->page_sizes.gtt, page_size); + err = -EINVAL; + } else { + pr_info("dma address misaligned, finishing test early\n"); + } + + goto out_unpin; + } + + err = i915_vma_bind(vma, I915_CACHE_NONE, PIN_UPDATE); + if (err) + goto out_unpin; + + i915_vma_unpin(vma); + i915_vma_close(vma); + + i915_gem_object_put(obj); + } + + obj = i915_gem_object_create_internal(dev_priv, PAGE_SIZE); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + vma = i915_vma_instance(obj, &ppgtt->vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto out_put; + } + + err = i915_vma_pin(vma, 0, 0, flags); + if (err) + goto out_close; + + /* + * Make sure we don't end up with something like where the pde is still + * pointing to the 2M page, and the pt we just filled-in is dangling -- + * we can check this by writing to the first page where it would then + * land in the now stale 2M page. + */ + + err = gpu_write(vma, ctx, dev_priv->engine[RCS0], 0, 0xdeadbeaf); + if (err) + goto out_unpin; + + err = cpu_check(obj, 0, 0xdeadbeaf); + +out_unpin: + i915_vma_unpin(vma); +out_close: + i915_vma_close(vma); +out_put: + i915_gem_object_put(obj); + + return err; +} + +static int igt_tmpfs_fallback(void *arg) +{ + struct i915_gem_context *ctx = arg; + struct drm_i915_private *i915 = ctx->i915; + struct vfsmount *gemfs = i915->mm.gemfs; + struct i915_address_space *vm = + ctx->ppgtt ? &ctx->ppgtt->vm : &i915->ggtt.vm; + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + u32 *vaddr; + int err = 0; + + /* + * Make sure that we don't burst into a ball of flames upon falling back + * to tmpfs, which we rely on if on the off-chance we encouter a failure + * when setting up gemfs. + */ + + i915->mm.gemfs = NULL; + + obj = i915_gem_object_create_shmem(i915, PAGE_SIZE); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto out_restore; + } + + vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB); + if (IS_ERR(vaddr)) { + err = PTR_ERR(vaddr); + goto out_put; + } + *vaddr = 0xdeadbeaf; + + __i915_gem_object_flush_map(obj, 0, 64); + i915_gem_object_unpin_map(obj); + + vma = i915_vma_instance(obj, vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto out_put; + } + + err = i915_vma_pin(vma, 0, 0, PIN_USER); + if (err) + goto out_close; + + err = igt_check_page_sizes(vma); + + i915_vma_unpin(vma); +out_close: + i915_vma_close(vma); +out_put: + i915_gem_object_put(obj); +out_restore: + i915->mm.gemfs = gemfs; + + return err; +} + +static int igt_shrink_thp(void *arg) +{ + struct i915_gem_context *ctx = arg; + struct drm_i915_private *i915 = ctx->i915; + struct i915_address_space *vm = + ctx->ppgtt ? &ctx->ppgtt->vm : &i915->ggtt.vm; + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + unsigned int flags = PIN_USER; + int err; + + /* + * Sanity check shrinking huge-paged object -- make sure nothing blows + * up. + */ + + if (!igt_can_allocate_thp(i915)) { + pr_info("missing THP support, skipping\n"); + return 0; + } + + obj = i915_gem_object_create_shmem(i915, SZ_2M); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + vma = i915_vma_instance(obj, vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto out_put; + } + + err = i915_vma_pin(vma, 0, 0, flags); + if (err) + goto out_close; + + if (obj->mm.page_sizes.phys < I915_GTT_PAGE_SIZE_2M) { + pr_info("failed to allocate THP, finishing test early\n"); + goto out_unpin; + } + + err = igt_check_page_sizes(vma); + if (err) + goto out_unpin; + + err = gpu_write(vma, ctx, i915->engine[RCS0], 0, 0xdeadbeaf); + if (err) + goto out_unpin; + + i915_vma_unpin(vma); + + /* + * Now that the pages are *unpinned* shrink-all should invoke + * shmem to truncate our pages. + */ + i915_gem_shrink_all(i915); + if (i915_gem_object_has_pages(obj)) { + pr_err("shrink-all didn't truncate the pages\n"); + err = -EINVAL; + goto out_close; + } + + if (obj->mm.page_sizes.sg || obj->mm.page_sizes.phys) { + pr_err("residual page-size bits left\n"); + err = -EINVAL; + goto out_close; + } + + err = i915_vma_pin(vma, 0, 0, flags); + if (err) + goto out_close; + + err = cpu_check(obj, 0, 0xdeadbeaf); + +out_unpin: + i915_vma_unpin(vma); +out_close: + i915_vma_close(vma); +out_put: + i915_gem_object_put(obj); + + return err; +} + +int i915_gem_huge_page_mock_selftests(void) +{ + static const struct i915_subtest tests[] = { + SUBTEST(igt_mock_exhaust_device_supported_pages), + SUBTEST(igt_mock_ppgtt_misaligned_dma), + SUBTEST(igt_mock_ppgtt_huge_fill), + SUBTEST(igt_mock_ppgtt_64K), + }; + struct drm_i915_private *dev_priv; + struct i915_hw_ppgtt *ppgtt; + int err; + + dev_priv = mock_gem_device(); + if (!dev_priv) + return -ENOMEM; + + /* Pretend to be a device which supports the 48b PPGTT */ + mkwrite_device_info(dev_priv)->ppgtt_type = INTEL_PPGTT_FULL; + mkwrite_device_info(dev_priv)->ppgtt_size = 48; + + mutex_lock(&dev_priv->drm.struct_mutex); + ppgtt = i915_ppgtt_create(dev_priv); + if (IS_ERR(ppgtt)) { + err = PTR_ERR(ppgtt); + goto out_unlock; + } + + if (!i915_vm_is_4lvl(&ppgtt->vm)) { + pr_err("failed to create 48b PPGTT\n"); + err = -EINVAL; + goto out_close; + } + + /* If we were ever hit this then it's time to mock the 64K scratch */ + if (!i915_vm_has_scratch_64K(&ppgtt->vm)) { + pr_err("PPGTT missing 64K scratch page\n"); + err = -EINVAL; + goto out_close; + } + + err = i915_subtests(tests, ppgtt); + +out_close: + i915_ppgtt_put(ppgtt); + +out_unlock: + mutex_unlock(&dev_priv->drm.struct_mutex); + drm_dev_put(&dev_priv->drm); + + return err; +} + +int i915_gem_huge_page_live_selftests(struct drm_i915_private *dev_priv) +{ + static const struct i915_subtest tests[] = { + SUBTEST(igt_shrink_thp), + SUBTEST(igt_ppgtt_pin_update), + SUBTEST(igt_tmpfs_fallback), + SUBTEST(igt_ppgtt_exhaust_huge), + SUBTEST(igt_ppgtt_gemfs_huge), + SUBTEST(igt_ppgtt_internal_huge), + }; + struct drm_file *file; + struct i915_gem_context *ctx; + intel_wakeref_t wakeref; + int err; + + if (!HAS_PPGTT(dev_priv)) { + pr_info("PPGTT not supported, skipping live-selftests\n"); + return 0; + } + + if (i915_terminally_wedged(dev_priv)) + return 0; + + file = mock_file(dev_priv); + if (IS_ERR(file)) + return PTR_ERR(file); + + mutex_lock(&dev_priv->drm.struct_mutex); + wakeref = intel_runtime_pm_get(dev_priv); + + ctx = live_context(dev_priv, file); + if (IS_ERR(ctx)) { + err = PTR_ERR(ctx); + goto out_unlock; + } + + if (ctx->ppgtt) + ctx->ppgtt->vm.scrub_64K = true; + + err = i915_subtests(tests, ctx); + +out_unlock: + intel_runtime_pm_put(dev_priv, wakeref); + mutex_unlock(&dev_priv->drm.struct_mutex); + + mock_file_free(dev_priv, file); + + return err; +} diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c new file mode 100644 index 000000000000..5495875b48b3 --- /dev/null +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c @@ -0,0 +1,379 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2017 Intel Corporation + */ + +#include + +#include "i915_selftest.h" +#include "selftests/i915_random.h" + +static int cpu_set(struct drm_i915_gem_object *obj, + unsigned long offset, + u32 v) +{ + unsigned int needs_clflush; + struct page *page; + void *map; + u32 *cpu; + int err; + + err = i915_gem_object_prepare_write(obj, &needs_clflush); + if (err) + return err; + + page = i915_gem_object_get_page(obj, offset >> PAGE_SHIFT); + map = kmap_atomic(page); + cpu = map + offset_in_page(offset); + + if (needs_clflush & CLFLUSH_BEFORE) + drm_clflush_virt_range(cpu, sizeof(*cpu)); + + *cpu = v; + + if (needs_clflush & CLFLUSH_AFTER) + drm_clflush_virt_range(cpu, sizeof(*cpu)); + + kunmap_atomic(map); + i915_gem_object_finish_access(obj); + + return 0; +} + +static int cpu_get(struct drm_i915_gem_object *obj, + unsigned long offset, + u32 *v) +{ + unsigned int needs_clflush; + struct page *page; + void *map; + u32 *cpu; + int err; + + err = i915_gem_object_prepare_read(obj, &needs_clflush); + if (err) + return err; + + page = i915_gem_object_get_page(obj, offset >> PAGE_SHIFT); + map = kmap_atomic(page); + cpu = map + offset_in_page(offset); + + if (needs_clflush & CLFLUSH_BEFORE) + drm_clflush_virt_range(cpu, sizeof(*cpu)); + + *v = *cpu; + + kunmap_atomic(map); + i915_gem_object_finish_access(obj); + + return 0; +} + +static int gtt_set(struct drm_i915_gem_object *obj, + unsigned long offset, + u32 v) +{ + struct i915_vma *vma; + u32 __iomem *map; + int err; + + err = i915_gem_object_set_to_gtt_domain(obj, true); + if (err) + return err; + + vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, PIN_MAPPABLE); + if (IS_ERR(vma)) + return PTR_ERR(vma); + + map = i915_vma_pin_iomap(vma); + i915_vma_unpin(vma); + if (IS_ERR(map)) + return PTR_ERR(map); + + iowrite32(v, &map[offset / sizeof(*map)]); + i915_vma_unpin_iomap(vma); + + return 0; +} + +static int gtt_get(struct drm_i915_gem_object *obj, + unsigned long offset, + u32 *v) +{ + struct i915_vma *vma; + u32 __iomem *map; + int err; + + err = i915_gem_object_set_to_gtt_domain(obj, false); + if (err) + return err; + + vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, PIN_MAPPABLE); + if (IS_ERR(vma)) + return PTR_ERR(vma); + + map = i915_vma_pin_iomap(vma); + i915_vma_unpin(vma); + if (IS_ERR(map)) + return PTR_ERR(map); + + *v = ioread32(&map[offset / sizeof(*map)]); + i915_vma_unpin_iomap(vma); + + return 0; +} + +static int wc_set(struct drm_i915_gem_object *obj, + unsigned long offset, + u32 v) +{ + u32 *map; + int err; + + err = i915_gem_object_set_to_wc_domain(obj, true); + if (err) + return err; + + map = i915_gem_object_pin_map(obj, I915_MAP_WC); + if (IS_ERR(map)) + return PTR_ERR(map); + + map[offset / sizeof(*map)] = v; + i915_gem_object_unpin_map(obj); + + return 0; +} + +static int wc_get(struct drm_i915_gem_object *obj, + unsigned long offset, + u32 *v) +{ + u32 *map; + int err; + + err = i915_gem_object_set_to_wc_domain(obj, false); + if (err) + return err; + + map = i915_gem_object_pin_map(obj, I915_MAP_WC); + if (IS_ERR(map)) + return PTR_ERR(map); + + *v = map[offset / sizeof(*map)]; + i915_gem_object_unpin_map(obj); + + return 0; +} + +static int gpu_set(struct drm_i915_gem_object *obj, + unsigned long offset, + u32 v) +{ + struct drm_i915_private *i915 = to_i915(obj->base.dev); + struct i915_request *rq; + struct i915_vma *vma; + u32 *cs; + int err; + + err = i915_gem_object_set_to_gtt_domain(obj, true); + if (err) + return err; + + vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 0); + if (IS_ERR(vma)) + return PTR_ERR(vma); + + rq = i915_request_create(i915->engine[RCS0]->kernel_context); + if (IS_ERR(rq)) { + i915_vma_unpin(vma); + return PTR_ERR(rq); + } + + cs = intel_ring_begin(rq, 4); + if (IS_ERR(cs)) { + i915_request_add(rq); + i915_vma_unpin(vma); + return PTR_ERR(cs); + } + + if (INTEL_GEN(i915) >= 8) { + *cs++ = MI_STORE_DWORD_IMM_GEN4 | 1 << 22; + *cs++ = lower_32_bits(i915_ggtt_offset(vma) + offset); + *cs++ = upper_32_bits(i915_ggtt_offset(vma) + offset); + *cs++ = v; + } else if (INTEL_GEN(i915) >= 4) { + *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; + *cs++ = 0; + *cs++ = i915_ggtt_offset(vma) + offset; + *cs++ = v; + } else { + *cs++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL; + *cs++ = i915_ggtt_offset(vma) + offset; + *cs++ = v; + *cs++ = MI_NOOP; + } + intel_ring_advance(rq, cs); + + err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); + i915_vma_unpin(vma); + + i915_request_add(rq); + + return err; +} + +static bool always_valid(struct drm_i915_private *i915) +{ + return true; +} + +static bool needs_fence_registers(struct drm_i915_private *i915) +{ + return !i915_terminally_wedged(i915); +} + +static bool needs_mi_store_dword(struct drm_i915_private *i915) +{ + if (i915_terminally_wedged(i915)) + return false; + + return intel_engine_can_store_dword(i915->engine[RCS0]); +} + +static const struct igt_coherency_mode { + const char *name; + int (*set)(struct drm_i915_gem_object *, unsigned long offset, u32 v); + int (*get)(struct drm_i915_gem_object *, unsigned long offset, u32 *v); + bool (*valid)(struct drm_i915_private *i915); +} igt_coherency_mode[] = { + { "cpu", cpu_set, cpu_get, always_valid }, + { "gtt", gtt_set, gtt_get, needs_fence_registers }, + { "wc", wc_set, wc_get, always_valid }, + { "gpu", gpu_set, NULL, needs_mi_store_dword }, + { }, +}; + +static int igt_gem_coherency(void *arg) +{ + const unsigned int ncachelines = PAGE_SIZE/64; + I915_RND_STATE(prng); + struct drm_i915_private *i915 = arg; + const struct igt_coherency_mode *read, *write, *over; + struct drm_i915_gem_object *obj; + intel_wakeref_t wakeref; + unsigned long count, n; + u32 *offsets, *values; + int err = 0; + + /* We repeatedly write, overwrite and read from a sequence of + * cachelines in order to try and detect incoherency (unflushed writes + * from either the CPU or GPU). Each setter/getter uses our cache + * domain API which should prevent incoherency. + */ + + offsets = kmalloc_array(ncachelines, 2*sizeof(u32), GFP_KERNEL); + if (!offsets) + return -ENOMEM; + for (count = 0; count < ncachelines; count++) + offsets[count] = count * 64 + 4 * (count % 16); + + values = offsets + ncachelines; + + mutex_lock(&i915->drm.struct_mutex); + wakeref = intel_runtime_pm_get(i915); + for (over = igt_coherency_mode; over->name; over++) { + if (!over->set) + continue; + + if (!over->valid(i915)) + continue; + + for (write = igt_coherency_mode; write->name; write++) { + if (!write->set) + continue; + + if (!write->valid(i915)) + continue; + + for (read = igt_coherency_mode; read->name; read++) { + if (!read->get) + continue; + + if (!read->valid(i915)) + continue; + + for_each_prime_number_from(count, 1, ncachelines) { + obj = i915_gem_object_create_internal(i915, PAGE_SIZE); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto unlock; + } + + i915_random_reorder(offsets, ncachelines, &prng); + for (n = 0; n < count; n++) + values[n] = prandom_u32_state(&prng); + + for (n = 0; n < count; n++) { + err = over->set(obj, offsets[n], ~values[n]); + if (err) { + pr_err("Failed to set stale value[%ld/%ld] in object using %s, err=%d\n", + n, count, over->name, err); + goto put_object; + } + } + + for (n = 0; n < count; n++) { + err = write->set(obj, offsets[n], values[n]); + if (err) { + pr_err("Failed to set value[%ld/%ld] in object using %s, err=%d\n", + n, count, write->name, err); + goto put_object; + } + } + + for (n = 0; n < count; n++) { + u32 found; + + err = read->get(obj, offsets[n], &found); + if (err) { + pr_err("Failed to get value[%ld/%ld] in object using %s, err=%d\n", + n, count, read->name, err); + goto put_object; + } + + if (found != values[n]) { + pr_err("Value[%ld/%ld] mismatch, (overwrite with %s) wrote [%s] %x read [%s] %x (inverse %x), at offset %x\n", + n, count, over->name, + write->name, values[n], + read->name, found, + ~values[n], offsets[n]); + err = -EINVAL; + goto put_object; + } + } + + __i915_gem_object_release_unless_active(obj); + } + } + } + } +unlock: + intel_runtime_pm_put(i915, wakeref); + mutex_unlock(&i915->drm.struct_mutex); + kfree(offsets); + return err; + +put_object: + __i915_gem_object_release_unless_active(obj); + goto unlock; +} + +int i915_gem_coherency_live_selftests(struct drm_i915_private *i915) +{ + static const struct i915_subtest tests[] = { + SUBTEST(igt_gem_coherency), + }; + + return i915_subtests(tests, i915); +} diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c new file mode 100644 index 000000000000..653ae08a277f --- /dev/null +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c @@ -0,0 +1,1736 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2017 Intel Corporation + */ + +#include + +#include "gem/i915_gem_pm.h" +#include "gt/intel_reset.h" +#include "i915_selftest.h" + +#include "gem/selftests/igt_gem_utils.h" +#include "selftests/i915_random.h" +#include "selftests/igt_flush_test.h" +#include "selftests/igt_live_test.h" +#include "selftests/igt_reset.h" +#include "selftests/igt_spinner.h" +#include "selftests/mock_drm.h" +#include "selftests/mock_gem_device.h" + +#include "huge_gem_object.h" +#include "igt_gem_utils.h" + +#define DW_PER_PAGE (PAGE_SIZE / sizeof(u32)) + +static int live_nop_switch(void *arg) +{ + const unsigned int nctx = 1024; + struct drm_i915_private *i915 = arg; + struct intel_engine_cs *engine; + struct i915_gem_context **ctx; + enum intel_engine_id id; + intel_wakeref_t wakeref; + struct igt_live_test t; + struct drm_file *file; + unsigned long n; + int err = -ENODEV; + + /* + * Create as many contexts as we can feasibly get away with + * and check we can switch between them rapidly. + * + * Serves as very simple stress test for submission and HW switching + * between contexts. + */ + + if (!DRIVER_CAPS(i915)->has_logical_contexts) + return 0; + + file = mock_file(i915); + if (IS_ERR(file)) + return PTR_ERR(file); + + mutex_lock(&i915->drm.struct_mutex); + wakeref = intel_runtime_pm_get(i915); + + ctx = kcalloc(nctx, sizeof(*ctx), GFP_KERNEL); + if (!ctx) { + err = -ENOMEM; + goto out_unlock; + } + + for (n = 0; n < nctx; n++) { + ctx[n] = live_context(i915, file); + if (IS_ERR(ctx[n])) { + err = PTR_ERR(ctx[n]); + goto out_unlock; + } + } + + for_each_engine(engine, i915, id) { + struct i915_request *rq; + unsigned long end_time, prime; + ktime_t times[2] = {}; + + times[0] = ktime_get_raw(); + for (n = 0; n < nctx; n++) { + rq = igt_request_alloc(ctx[n], engine); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto out_unlock; + } + i915_request_add(rq); + } + if (i915_request_wait(rq, + I915_WAIT_LOCKED, + HZ / 5) < 0) { + pr_err("Failed to populated %d contexts\n", nctx); + i915_gem_set_wedged(i915); + err = -EIO; + goto out_unlock; + } + + times[1] = ktime_get_raw(); + + pr_info("Populated %d contexts on %s in %lluns\n", + nctx, engine->name, ktime_to_ns(times[1] - times[0])); + + err = igt_live_test_begin(&t, i915, __func__, engine->name); + if (err) + goto out_unlock; + + end_time = jiffies + i915_selftest.timeout_jiffies; + for_each_prime_number_from(prime, 2, 8192) { + times[1] = ktime_get_raw(); + + for (n = 0; n < prime; n++) { + rq = igt_request_alloc(ctx[n % nctx], engine); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto out_unlock; + } + + /* + * This space is left intentionally blank. + * + * We do not actually want to perform any + * action with this request, we just want + * to measure the latency in allocation + * and submission of our breadcrumbs - + * ensuring that the bare request is sufficient + * for the system to work (i.e. proper HEAD + * tracking of the rings, interrupt handling, + * etc). It also gives us the lowest bounds + * for latency. + */ + + i915_request_add(rq); + } + if (i915_request_wait(rq, + I915_WAIT_LOCKED, + HZ / 5) < 0) { + pr_err("Switching between %ld contexts timed out\n", + prime); + i915_gem_set_wedged(i915); + break; + } + + times[1] = ktime_sub(ktime_get_raw(), times[1]); + if (prime == 2) + times[0] = times[1]; + + if (__igt_timeout(end_time, NULL)) + break; + } + + err = igt_live_test_end(&t); + if (err) + goto out_unlock; + + pr_info("Switch latencies on %s: 1 = %lluns, %lu = %lluns\n", + engine->name, + ktime_to_ns(times[0]), + prime - 1, div64_u64(ktime_to_ns(times[1]), prime - 1)); + } + +out_unlock: + intel_runtime_pm_put(i915, wakeref); + mutex_unlock(&i915->drm.struct_mutex); + mock_file_free(i915, file); + return err; +} + +static struct i915_vma * +gpu_fill_dw(struct i915_vma *vma, u64 offset, unsigned long count, u32 value) +{ + struct drm_i915_gem_object *obj; + const int gen = INTEL_GEN(vma->vm->i915); + unsigned long n, size; + u32 *cmd; + int err; + + size = (4 * count + 1) * sizeof(u32); + size = round_up(size, PAGE_SIZE); + obj = i915_gem_object_create_internal(vma->vm->i915, size); + if (IS_ERR(obj)) + return ERR_CAST(obj); + + cmd = i915_gem_object_pin_map(obj, I915_MAP_WB); + if (IS_ERR(cmd)) { + err = PTR_ERR(cmd); + goto err; + } + + GEM_BUG_ON(offset + (count - 1) * PAGE_SIZE > vma->node.size); + offset += vma->node.start; + + for (n = 0; n < count; n++) { + if (gen >= 8) { + *cmd++ = MI_STORE_DWORD_IMM_GEN4; + *cmd++ = lower_32_bits(offset); + *cmd++ = upper_32_bits(offset); + *cmd++ = value; + } else if (gen >= 4) { + *cmd++ = MI_STORE_DWORD_IMM_GEN4 | + (gen < 6 ? MI_USE_GGTT : 0); + *cmd++ = 0; + *cmd++ = offset; + *cmd++ = value; + } else { + *cmd++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL; + *cmd++ = offset; + *cmd++ = value; + } + offset += PAGE_SIZE; + } + *cmd = MI_BATCH_BUFFER_END; + i915_gem_object_flush_map(obj); + i915_gem_object_unpin_map(obj); + + err = i915_gem_object_set_to_gtt_domain(obj, false); + if (err) + goto err; + + vma = i915_vma_instance(obj, vma->vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto err; + } + + err = i915_vma_pin(vma, 0, 0, PIN_USER); + if (err) + goto err; + + return vma; + +err: + i915_gem_object_put(obj); + return ERR_PTR(err); +} + +static unsigned long real_page_count(struct drm_i915_gem_object *obj) +{ + return huge_gem_object_phys_size(obj) >> PAGE_SHIFT; +} + +static unsigned long fake_page_count(struct drm_i915_gem_object *obj) +{ + return huge_gem_object_dma_size(obj) >> PAGE_SHIFT; +} + +static int gpu_fill(struct drm_i915_gem_object *obj, + struct i915_gem_context *ctx, + struct intel_engine_cs *engine, + unsigned int dw) +{ + struct drm_i915_private *i915 = to_i915(obj->base.dev); + struct i915_address_space *vm = + ctx->ppgtt ? &ctx->ppgtt->vm : &i915->ggtt.vm; + struct i915_request *rq; + struct i915_vma *vma; + struct i915_vma *batch; + unsigned int flags; + int err; + + GEM_BUG_ON(obj->base.size > vm->total); + GEM_BUG_ON(!intel_engine_can_store_dword(engine)); + + vma = i915_vma_instance(obj, vm, NULL); + if (IS_ERR(vma)) + return PTR_ERR(vma); + + err = i915_gem_object_set_to_gtt_domain(obj, false); + if (err) + return err; + + err = i915_vma_pin(vma, 0, 0, PIN_HIGH | PIN_USER); + if (err) + return err; + + /* Within the GTT the huge objects maps every page onto + * its 1024 real pages (using phys_pfn = dma_pfn % 1024). + * We set the nth dword within the page using the nth + * mapping via the GTT - this should exercise the GTT mapping + * whilst checking that each context provides a unique view + * into the object. + */ + batch = gpu_fill_dw(vma, + (dw * real_page_count(obj)) << PAGE_SHIFT | + (dw * sizeof(u32)), + real_page_count(obj), + dw); + if (IS_ERR(batch)) { + err = PTR_ERR(batch); + goto err_vma; + } + + rq = igt_request_alloc(ctx, engine); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto err_batch; + } + + flags = 0; + if (INTEL_GEN(vm->i915) <= 5) + flags |= I915_DISPATCH_SECURE; + + err = engine->emit_bb_start(rq, + batch->node.start, batch->node.size, + flags); + if (err) + goto err_request; + + err = i915_vma_move_to_active(batch, rq, 0); + if (err) + goto skip_request; + + err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); + if (err) + goto skip_request; + + i915_gem_object_set_active_reference(batch->obj); + i915_vma_unpin(batch); + i915_vma_close(batch); + + i915_vma_unpin(vma); + + i915_request_add(rq); + + return 0; + +skip_request: + i915_request_skip(rq, err); +err_request: + i915_request_add(rq); +err_batch: + i915_vma_unpin(batch); + i915_vma_put(batch); +err_vma: + i915_vma_unpin(vma); + return err; +} + +static int cpu_fill(struct drm_i915_gem_object *obj, u32 value) +{ + const bool has_llc = HAS_LLC(to_i915(obj->base.dev)); + unsigned int n, m, need_flush; + int err; + + err = i915_gem_object_prepare_write(obj, &need_flush); + if (err) + return err; + + for (n = 0; n < real_page_count(obj); n++) { + u32 *map; + + map = kmap_atomic(i915_gem_object_get_page(obj, n)); + for (m = 0; m < DW_PER_PAGE; m++) + map[m] = value; + if (!has_llc) + drm_clflush_virt_range(map, PAGE_SIZE); + kunmap_atomic(map); + } + + i915_gem_object_finish_access(obj); + obj->read_domains = I915_GEM_DOMAIN_GTT | I915_GEM_DOMAIN_CPU; + obj->write_domain = 0; + return 0; +} + +static noinline int cpu_check(struct drm_i915_gem_object *obj, + unsigned int idx, unsigned int max) +{ + unsigned int n, m, needs_flush; + int err; + + err = i915_gem_object_prepare_read(obj, &needs_flush); + if (err) + return err; + + for (n = 0; n < real_page_count(obj); n++) { + u32 *map; + + map = kmap_atomic(i915_gem_object_get_page(obj, n)); + if (needs_flush & CLFLUSH_BEFORE) + drm_clflush_virt_range(map, PAGE_SIZE); + + for (m = 0; m < max; m++) { + if (map[m] != m) { + pr_err("%pS: Invalid value at object %d page %d/%ld, offset %d/%d: found %x expected %x\n", + __builtin_return_address(0), idx, + n, real_page_count(obj), m, max, + map[m], m); + err = -EINVAL; + goto out_unmap; + } + } + + for (; m < DW_PER_PAGE; m++) { + if (map[m] != STACK_MAGIC) { + pr_err("%pS: Invalid value at object %d page %d, offset %d: found %x expected %x (uninitialised)\n", + __builtin_return_address(0), idx, n, m, + map[m], STACK_MAGIC); + err = -EINVAL; + goto out_unmap; + } + } + +out_unmap: + kunmap_atomic(map); + if (err) + break; + } + + i915_gem_object_finish_access(obj); + return err; +} + +static int file_add_object(struct drm_file *file, + struct drm_i915_gem_object *obj) +{ + int err; + + GEM_BUG_ON(obj->base.handle_count); + + /* tie the object to the drm_file for easy reaping */ + err = idr_alloc(&file->object_idr, &obj->base, 1, 0, GFP_KERNEL); + if (err < 0) + return err; + + i915_gem_object_get(obj); + obj->base.handle_count++; + return 0; +} + +static struct drm_i915_gem_object * +create_test_object(struct i915_gem_context *ctx, + struct drm_file *file, + struct list_head *objects) +{ + struct drm_i915_gem_object *obj; + struct i915_address_space *vm = + ctx->ppgtt ? &ctx->ppgtt->vm : &ctx->i915->ggtt.vm; + u64 size; + int err; + + size = min(vm->total / 2, 1024ull * DW_PER_PAGE * PAGE_SIZE); + size = round_down(size, DW_PER_PAGE * PAGE_SIZE); + + obj = huge_gem_object(ctx->i915, DW_PER_PAGE * PAGE_SIZE, size); + if (IS_ERR(obj)) + return obj; + + err = file_add_object(file, obj); + i915_gem_object_put(obj); + if (err) + return ERR_PTR(err); + + err = cpu_fill(obj, STACK_MAGIC); + if (err) { + pr_err("Failed to fill object with cpu, err=%d\n", + err); + return ERR_PTR(err); + } + + list_add_tail(&obj->st_link, objects); + return obj; +} + +static unsigned long max_dwords(struct drm_i915_gem_object *obj) +{ + unsigned long npages = fake_page_count(obj); + + GEM_BUG_ON(!IS_ALIGNED(npages, DW_PER_PAGE)); + return npages / DW_PER_PAGE; +} + +static int igt_ctx_exec(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct intel_engine_cs *engine; + enum intel_engine_id id; + int err = -ENODEV; + + /* + * Create a few different contexts (with different mm) and write + * through each ctx/mm using the GPU making sure those writes end + * up in the expected pages of our obj. + */ + + if (!DRIVER_CAPS(i915)->has_logical_contexts) + return 0; + + for_each_engine(engine, i915, id) { + struct drm_i915_gem_object *obj = NULL; + unsigned long ncontexts, ndwords, dw; + struct igt_live_test t; + struct drm_file *file; + IGT_TIMEOUT(end_time); + LIST_HEAD(objects); + + if (!intel_engine_can_store_dword(engine)) + continue; + + if (!engine->context_size) + continue; /* No logical context support in HW */ + + file = mock_file(i915); + if (IS_ERR(file)) + return PTR_ERR(file); + + mutex_lock(&i915->drm.struct_mutex); + + err = igt_live_test_begin(&t, i915, __func__, engine->name); + if (err) + goto out_unlock; + + ncontexts = 0; + ndwords = 0; + dw = 0; + while (!time_after(jiffies, end_time)) { + struct i915_gem_context *ctx; + intel_wakeref_t wakeref; + + ctx = live_context(i915, file); + if (IS_ERR(ctx)) { + err = PTR_ERR(ctx); + goto out_unlock; + } + + if (!obj) { + obj = create_test_object(ctx, file, &objects); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto out_unlock; + } + } + + with_intel_runtime_pm(i915, wakeref) + err = gpu_fill(obj, ctx, engine, dw); + if (err) { + pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) in ctx %u [full-ppgtt? %s], err=%d\n", + ndwords, dw, max_dwords(obj), + engine->name, ctx->hw_id, + yesno(!!ctx->ppgtt), err); + goto out_unlock; + } + + if (++dw == max_dwords(obj)) { + obj = NULL; + dw = 0; + } + + ndwords++; + ncontexts++; + } + + pr_info("Submitted %lu contexts to %s, filling %lu dwords\n", + ncontexts, engine->name, ndwords); + + ncontexts = dw = 0; + list_for_each_entry(obj, &objects, st_link) { + unsigned int rem = + min_t(unsigned int, ndwords - dw, max_dwords(obj)); + + err = cpu_check(obj, ncontexts++, rem); + if (err) + break; + + dw += rem; + } + +out_unlock: + if (igt_live_test_end(&t)) + err = -EIO; + mutex_unlock(&i915->drm.struct_mutex); + + mock_file_free(i915, file); + if (err) + return err; + } + + return 0; +} + +static int igt_shared_ctx_exec(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct i915_gem_context *parent; + struct intel_engine_cs *engine; + enum intel_engine_id id; + struct igt_live_test t; + struct drm_file *file; + int err = 0; + + /* + * Create a few different contexts with the same mm and write + * through each ctx using the GPU making sure those writes end + * up in the expected pages of our obj. + */ + if (!DRIVER_CAPS(i915)->has_logical_contexts) + return 0; + + file = mock_file(i915); + if (IS_ERR(file)) + return PTR_ERR(file); + + mutex_lock(&i915->drm.struct_mutex); + + parent = live_context(i915, file); + if (IS_ERR(parent)) { + err = PTR_ERR(parent); + goto out_unlock; + } + + if (!parent->ppgtt) { /* not full-ppgtt; nothing to share */ + err = 0; + goto out_unlock; + } + + err = igt_live_test_begin(&t, i915, __func__, ""); + if (err) + goto out_unlock; + + for_each_engine(engine, i915, id) { + unsigned long ncontexts, ndwords, dw; + struct drm_i915_gem_object *obj = NULL; + IGT_TIMEOUT(end_time); + LIST_HEAD(objects); + + if (!intel_engine_can_store_dword(engine)) + continue; + + dw = 0; + ndwords = 0; + ncontexts = 0; + while (!time_after(jiffies, end_time)) { + struct i915_gem_context *ctx; + intel_wakeref_t wakeref; + + ctx = kernel_context(i915); + if (IS_ERR(ctx)) { + err = PTR_ERR(ctx); + goto out_test; + } + + __assign_ppgtt(ctx, parent->ppgtt); + + if (!obj) { + obj = create_test_object(parent, file, &objects); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + kernel_context_close(ctx); + goto out_test; + } + } + + err = 0; + with_intel_runtime_pm(i915, wakeref) + err = gpu_fill(obj, ctx, engine, dw); + if (err) { + pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) in ctx %u [full-ppgtt? %s], err=%d\n", + ndwords, dw, max_dwords(obj), + engine->name, ctx->hw_id, + yesno(!!ctx->ppgtt), err); + kernel_context_close(ctx); + goto out_test; + } + + if (++dw == max_dwords(obj)) { + obj = NULL; + dw = 0; + } + + ndwords++; + ncontexts++; + + kernel_context_close(ctx); + } + pr_info("Submitted %lu contexts to %s, filling %lu dwords\n", + ncontexts, engine->name, ndwords); + + ncontexts = dw = 0; + list_for_each_entry(obj, &objects, st_link) { + unsigned int rem = + min_t(unsigned int, ndwords - dw, max_dwords(obj)); + + err = cpu_check(obj, ncontexts++, rem); + if (err) + goto out_test; + + dw += rem; + } + } +out_test: + if (igt_live_test_end(&t)) + err = -EIO; +out_unlock: + mutex_unlock(&i915->drm.struct_mutex); + + mock_file_free(i915, file); + return err; +} + +static struct i915_vma *rpcs_query_batch(struct i915_vma *vma) +{ + struct drm_i915_gem_object *obj; + u32 *cmd; + int err; + + if (INTEL_GEN(vma->vm->i915) < 8) + return ERR_PTR(-EINVAL); + + obj = i915_gem_object_create_internal(vma->vm->i915, PAGE_SIZE); + if (IS_ERR(obj)) + return ERR_CAST(obj); + + cmd = i915_gem_object_pin_map(obj, I915_MAP_WB); + if (IS_ERR(cmd)) { + err = PTR_ERR(cmd); + goto err; + } + + *cmd++ = MI_STORE_REGISTER_MEM_GEN8; + *cmd++ = i915_mmio_reg_offset(GEN8_R_PWR_CLK_STATE); + *cmd++ = lower_32_bits(vma->node.start); + *cmd++ = upper_32_bits(vma->node.start); + *cmd = MI_BATCH_BUFFER_END; + + __i915_gem_object_flush_map(obj, 0, 64); + i915_gem_object_unpin_map(obj); + + vma = i915_vma_instance(obj, vma->vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto err; + } + + err = i915_vma_pin(vma, 0, 0, PIN_USER); + if (err) + goto err; + + return vma; + +err: + i915_gem_object_put(obj); + return ERR_PTR(err); +} + +static int +emit_rpcs_query(struct drm_i915_gem_object *obj, + struct intel_context *ce, + struct i915_request **rq_out) +{ + struct i915_request *rq; + struct i915_vma *batch; + struct i915_vma *vma; + int err; + + GEM_BUG_ON(!intel_engine_can_store_dword(ce->engine)); + + vma = i915_vma_instance(obj, &ce->gem_context->ppgtt->vm, NULL); + if (IS_ERR(vma)) + return PTR_ERR(vma); + + err = i915_gem_object_set_to_gtt_domain(obj, false); + if (err) + return err; + + err = i915_vma_pin(vma, 0, 0, PIN_USER); + if (err) + return err; + + batch = rpcs_query_batch(vma); + if (IS_ERR(batch)) { + err = PTR_ERR(batch); + goto err_vma; + } + + rq = i915_request_create(ce); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto err_batch; + } + + err = rq->engine->emit_bb_start(rq, + batch->node.start, batch->node.size, + 0); + if (err) + goto err_request; + + err = i915_vma_move_to_active(batch, rq, 0); + if (err) + goto skip_request; + + err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); + if (err) + goto skip_request; + + i915_gem_object_set_active_reference(batch->obj); + i915_vma_unpin(batch); + i915_vma_close(batch); + + i915_vma_unpin(vma); + + *rq_out = i915_request_get(rq); + + i915_request_add(rq); + + return 0; + +skip_request: + i915_request_skip(rq, err); +err_request: + i915_request_add(rq); +err_batch: + i915_vma_unpin(batch); +err_vma: + i915_vma_unpin(vma); + + return err; +} + +#define TEST_IDLE BIT(0) +#define TEST_BUSY BIT(1) +#define TEST_RESET BIT(2) + +static int +__sseu_prepare(struct drm_i915_private *i915, + const char *name, + unsigned int flags, + struct intel_context *ce, + struct igt_spinner **spin) +{ + struct i915_request *rq; + int ret; + + *spin = NULL; + if (!(flags & (TEST_BUSY | TEST_RESET))) + return 0; + + *spin = kzalloc(sizeof(**spin), GFP_KERNEL); + if (!*spin) + return -ENOMEM; + + ret = igt_spinner_init(*spin, i915); + if (ret) + goto err_free; + + rq = igt_spinner_create_request(*spin, + ce->gem_context, + ce->engine, + MI_NOOP); + if (IS_ERR(rq)) { + ret = PTR_ERR(rq); + goto err_fini; + } + + i915_request_add(rq); + + if (!igt_wait_for_spinner(*spin, rq)) { + pr_err("%s: Spinner failed to start!\n", name); + ret = -ETIMEDOUT; + goto err_end; + } + + return 0; + +err_end: + igt_spinner_end(*spin); +err_fini: + igt_spinner_fini(*spin); +err_free: + kfree(fetch_and_zero(spin)); + return ret; +} + +static int +__read_slice_count(struct drm_i915_private *i915, + struct intel_context *ce, + struct drm_i915_gem_object *obj, + struct igt_spinner *spin, + u32 *rpcs) +{ + struct i915_request *rq = NULL; + u32 s_mask, s_shift; + unsigned int cnt; + u32 *buf, val; + long ret; + + ret = emit_rpcs_query(obj, ce, &rq); + if (ret) + return ret; + + if (spin) + igt_spinner_end(spin); + + ret = i915_request_wait(rq, I915_WAIT_LOCKED, MAX_SCHEDULE_TIMEOUT); + i915_request_put(rq); + if (ret < 0) + return ret; + + buf = i915_gem_object_pin_map(obj, I915_MAP_WB); + if (IS_ERR(buf)) { + ret = PTR_ERR(buf); + return ret; + } + + if (INTEL_GEN(i915) >= 11) { + s_mask = GEN11_RPCS_S_CNT_MASK; + s_shift = GEN11_RPCS_S_CNT_SHIFT; + } else { + s_mask = GEN8_RPCS_S_CNT_MASK; + s_shift = GEN8_RPCS_S_CNT_SHIFT; + } + + val = *buf; + cnt = (val & s_mask) >> s_shift; + *rpcs = val; + + i915_gem_object_unpin_map(obj); + + return cnt; +} + +static int +__check_rpcs(const char *name, u32 rpcs, int slices, unsigned int expected, + const char *prefix, const char *suffix) +{ + if (slices == expected) + return 0; + + if (slices < 0) { + pr_err("%s: %s read slice count failed with %d%s\n", + name, prefix, slices, suffix); + return slices; + } + + pr_err("%s: %s slice count %d is not %u%s\n", + name, prefix, slices, expected, suffix); + + pr_info("RPCS=0x%x; %u%sx%u%s\n", + rpcs, slices, + (rpcs & GEN8_RPCS_S_CNT_ENABLE) ? "*" : "", + (rpcs & GEN8_RPCS_SS_CNT_MASK) >> GEN8_RPCS_SS_CNT_SHIFT, + (rpcs & GEN8_RPCS_SS_CNT_ENABLE) ? "*" : ""); + + return -EINVAL; +} + +static int +__sseu_finish(struct drm_i915_private *i915, + const char *name, + unsigned int flags, + struct intel_context *ce, + struct drm_i915_gem_object *obj, + unsigned int expected, + struct igt_spinner *spin) +{ + unsigned int slices = hweight32(ce->engine->sseu.slice_mask); + u32 rpcs = 0; + int ret = 0; + + if (flags & TEST_RESET) { + ret = i915_reset_engine(ce->engine, "sseu"); + if (ret) + goto out; + } + + ret = __read_slice_count(i915, ce, obj, + flags & TEST_RESET ? NULL : spin, &rpcs); + ret = __check_rpcs(name, rpcs, ret, expected, "Context", "!"); + if (ret) + goto out; + + ret = __read_slice_count(i915, ce->engine->kernel_context, obj, + NULL, &rpcs); + ret = __check_rpcs(name, rpcs, ret, slices, "Kernel context", "!"); + +out: + if (spin) + igt_spinner_end(spin); + + if ((flags & TEST_IDLE) && ret == 0) { + ret = i915_gem_wait_for_idle(i915, + I915_WAIT_LOCKED, + MAX_SCHEDULE_TIMEOUT); + if (ret) + return ret; + + ret = __read_slice_count(i915, ce, obj, NULL, &rpcs); + ret = __check_rpcs(name, rpcs, ret, expected, + "Context", " after idle!"); + } + + return ret; +} + +static int +__sseu_test(struct drm_i915_private *i915, + const char *name, + unsigned int flags, + struct intel_context *ce, + struct drm_i915_gem_object *obj, + struct intel_sseu sseu) +{ + struct igt_spinner *spin = NULL; + int ret; + + ret = __sseu_prepare(i915, name, flags, ce, &spin); + if (ret) + return ret; + + ret = __intel_context_reconfigure_sseu(ce, sseu); + if (ret) + goto out_spin; + + ret = __sseu_finish(i915, name, flags, ce, obj, + hweight32(sseu.slice_mask), spin); + +out_spin: + if (spin) { + igt_spinner_end(spin); + igt_spinner_fini(spin); + kfree(spin); + } + return ret; +} + +static int +__igt_ctx_sseu(struct drm_i915_private *i915, + const char *name, + unsigned int flags) +{ + struct intel_engine_cs *engine = i915->engine[RCS0]; + struct intel_sseu default_sseu = engine->sseu; + struct drm_i915_gem_object *obj; + struct i915_gem_context *ctx; + struct intel_context *ce; + struct intel_sseu pg_sseu; + intel_wakeref_t wakeref; + struct drm_file *file; + int ret; + + if (INTEL_GEN(i915) < 9) + return 0; + + if (!RUNTIME_INFO(i915)->sseu.has_slice_pg) + return 0; + + if (hweight32(default_sseu.slice_mask) < 2) + return 0; + + /* + * Gen11 VME friendly power-gated configuration with half enabled + * sub-slices. + */ + pg_sseu = default_sseu; + pg_sseu.slice_mask = 1; + pg_sseu.subslice_mask = + ~(~0 << (hweight32(default_sseu.subslice_mask) / 2)); + + pr_info("SSEU subtest '%s', flags=%x, def_slices=%u, pg_slices=%u\n", + name, flags, hweight32(default_sseu.slice_mask), + hweight32(pg_sseu.slice_mask)); + + file = mock_file(i915); + if (IS_ERR(file)) + return PTR_ERR(file); + + if (flags & TEST_RESET) + igt_global_reset_lock(i915); + + mutex_lock(&i915->drm.struct_mutex); + + ctx = live_context(i915, file); + if (IS_ERR(ctx)) { + ret = PTR_ERR(ctx); + goto out_unlock; + } + i915_gem_context_clear_bannable(ctx); /* to reset and beyond! */ + + obj = i915_gem_object_create_internal(i915, PAGE_SIZE); + if (IS_ERR(obj)) { + ret = PTR_ERR(obj); + goto out_unlock; + } + + wakeref = intel_runtime_pm_get(i915); + + ce = i915_gem_context_get_engine(ctx, RCS0); + if (IS_ERR(ce)) { + ret = PTR_ERR(ce); + goto out_rpm; + } + + ret = intel_context_pin(ce); + if (ret) + goto out_context; + + /* First set the default mask. */ + ret = __sseu_test(i915, name, flags, ce, obj, default_sseu); + if (ret) + goto out_fail; + + /* Then set a power-gated configuration. */ + ret = __sseu_test(i915, name, flags, ce, obj, pg_sseu); + if (ret) + goto out_fail; + + /* Back to defaults. */ + ret = __sseu_test(i915, name, flags, ce, obj, default_sseu); + if (ret) + goto out_fail; + + /* One last power-gated configuration for the road. */ + ret = __sseu_test(i915, name, flags, ce, obj, pg_sseu); + if (ret) + goto out_fail; + +out_fail: + if (igt_flush_test(i915, I915_WAIT_LOCKED)) + ret = -EIO; + + intel_context_unpin(ce); +out_context: + intel_context_put(ce); +out_rpm: + intel_runtime_pm_put(i915, wakeref); + i915_gem_object_put(obj); + +out_unlock: + mutex_unlock(&i915->drm.struct_mutex); + + if (flags & TEST_RESET) + igt_global_reset_unlock(i915); + + mock_file_free(i915, file); + + if (ret) + pr_err("%s: Failed with %d!\n", name, ret); + + return ret; +} + +static int igt_ctx_sseu(void *arg) +{ + struct { + const char *name; + unsigned int flags; + } *phase, phases[] = { + { .name = "basic", .flags = 0 }, + { .name = "idle", .flags = TEST_IDLE }, + { .name = "busy", .flags = TEST_BUSY }, + { .name = "busy-reset", .flags = TEST_BUSY | TEST_RESET }, + { .name = "busy-idle", .flags = TEST_BUSY | TEST_IDLE }, + { .name = "reset-idle", .flags = TEST_RESET | TEST_IDLE }, + }; + unsigned int i; + int ret = 0; + + for (i = 0, phase = phases; ret == 0 && i < ARRAY_SIZE(phases); + i++, phase++) + ret = __igt_ctx_sseu(arg, phase->name, phase->flags); + + return ret; +} + +static int igt_ctx_readonly(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_object *obj = NULL; + struct i915_gem_context *ctx; + struct i915_hw_ppgtt *ppgtt; + unsigned long idx, ndwords, dw; + struct igt_live_test t; + struct drm_file *file; + I915_RND_STATE(prng); + IGT_TIMEOUT(end_time); + LIST_HEAD(objects); + int err = -ENODEV; + + /* + * Create a few read-only objects (with the occasional writable object) + * and try to write into these object checking that the GPU discards + * any write to a read-only object. + */ + + file = mock_file(i915); + if (IS_ERR(file)) + return PTR_ERR(file); + + mutex_lock(&i915->drm.struct_mutex); + + err = igt_live_test_begin(&t, i915, __func__, ""); + if (err) + goto out_unlock; + + ctx = live_context(i915, file); + if (IS_ERR(ctx)) { + err = PTR_ERR(ctx); + goto out_unlock; + } + + ppgtt = ctx->ppgtt ?: i915->mm.aliasing_ppgtt; + if (!ppgtt || !ppgtt->vm.has_read_only) { + err = 0; + goto out_unlock; + } + + ndwords = 0; + dw = 0; + while (!time_after(jiffies, end_time)) { + struct intel_engine_cs *engine; + unsigned int id; + + for_each_engine(engine, i915, id) { + intel_wakeref_t wakeref; + + if (!intel_engine_can_store_dword(engine)) + continue; + + if (!obj) { + obj = create_test_object(ctx, file, &objects); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto out_unlock; + } + + if (prandom_u32_state(&prng) & 1) + i915_gem_object_set_readonly(obj); + } + + err = 0; + with_intel_runtime_pm(i915, wakeref) + err = gpu_fill(obj, ctx, engine, dw); + if (err) { + pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) in ctx %u [full-ppgtt? %s], err=%d\n", + ndwords, dw, max_dwords(obj), + engine->name, ctx->hw_id, + yesno(!!ctx->ppgtt), err); + goto out_unlock; + } + + if (++dw == max_dwords(obj)) { + obj = NULL; + dw = 0; + } + ndwords++; + } + } + pr_info("Submitted %lu dwords (across %u engines)\n", + ndwords, RUNTIME_INFO(i915)->num_engines); + + dw = 0; + idx = 0; + list_for_each_entry(obj, &objects, st_link) { + unsigned int rem = + min_t(unsigned int, ndwords - dw, max_dwords(obj)); + unsigned int num_writes; + + num_writes = rem; + if (i915_gem_object_is_readonly(obj)) + num_writes = 0; + + err = cpu_check(obj, idx++, num_writes); + if (err) + break; + + dw += rem; + } + +out_unlock: + if (igt_live_test_end(&t)) + err = -EIO; + mutex_unlock(&i915->drm.struct_mutex); + + mock_file_free(i915, file); + return err; +} + +static int check_scratch(struct i915_gem_context *ctx, u64 offset) +{ + struct drm_mm_node *node = + __drm_mm_interval_first(&ctx->ppgtt->vm.mm, + offset, offset + sizeof(u32) - 1); + if (!node || node->start > offset) + return 0; + + GEM_BUG_ON(offset >= node->start + node->size); + + pr_err("Target offset 0x%08x_%08x overlaps with a node in the mm!\n", + upper_32_bits(offset), lower_32_bits(offset)); + return -EINVAL; +} + +static int write_to_scratch(struct i915_gem_context *ctx, + struct intel_engine_cs *engine, + u64 offset, u32 value) +{ + struct drm_i915_private *i915 = ctx->i915; + struct drm_i915_gem_object *obj; + struct i915_request *rq; + struct i915_vma *vma; + u32 *cmd; + int err; + + GEM_BUG_ON(offset < I915_GTT_PAGE_SIZE); + + obj = i915_gem_object_create_internal(i915, PAGE_SIZE); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + cmd = i915_gem_object_pin_map(obj, I915_MAP_WB); + if (IS_ERR(cmd)) { + err = PTR_ERR(cmd); + goto err; + } + + *cmd++ = MI_STORE_DWORD_IMM_GEN4; + if (INTEL_GEN(i915) >= 8) { + *cmd++ = lower_32_bits(offset); + *cmd++ = upper_32_bits(offset); + } else { + *cmd++ = 0; + *cmd++ = offset; + } + *cmd++ = value; + *cmd = MI_BATCH_BUFFER_END; + __i915_gem_object_flush_map(obj, 0, 64); + i915_gem_object_unpin_map(obj); + + vma = i915_vma_instance(obj, &ctx->ppgtt->vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto err; + } + + err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_OFFSET_FIXED); + if (err) + goto err; + + err = check_scratch(ctx, offset); + if (err) + goto err_unpin; + + rq = igt_request_alloc(ctx, engine); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto err_unpin; + } + + err = engine->emit_bb_start(rq, vma->node.start, vma->node.size, 0); + if (err) + goto err_request; + + err = i915_vma_move_to_active(vma, rq, 0); + if (err) + goto skip_request; + + i915_gem_object_set_active_reference(obj); + i915_vma_unpin(vma); + i915_vma_close(vma); + + i915_request_add(rq); + + return 0; + +skip_request: + i915_request_skip(rq, err); +err_request: + i915_request_add(rq); +err_unpin: + i915_vma_unpin(vma); +err: + i915_gem_object_put(obj); + return err; +} + +static int read_from_scratch(struct i915_gem_context *ctx, + struct intel_engine_cs *engine, + u64 offset, u32 *value) +{ + struct drm_i915_private *i915 = ctx->i915; + struct drm_i915_gem_object *obj; + const u32 RCS_GPR0 = 0x2600; /* not all engines have their own GPR! */ + const u32 result = 0x100; + struct i915_request *rq; + struct i915_vma *vma; + u32 *cmd; + int err; + + GEM_BUG_ON(offset < I915_GTT_PAGE_SIZE); + + obj = i915_gem_object_create_internal(i915, PAGE_SIZE); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + cmd = i915_gem_object_pin_map(obj, I915_MAP_WB); + if (IS_ERR(cmd)) { + err = PTR_ERR(cmd); + goto err; + } + + memset(cmd, POISON_INUSE, PAGE_SIZE); + if (INTEL_GEN(i915) >= 8) { + *cmd++ = MI_LOAD_REGISTER_MEM_GEN8; + *cmd++ = RCS_GPR0; + *cmd++ = lower_32_bits(offset); + *cmd++ = upper_32_bits(offset); + *cmd++ = MI_STORE_REGISTER_MEM_GEN8; + *cmd++ = RCS_GPR0; + *cmd++ = result; + *cmd++ = 0; + } else { + *cmd++ = MI_LOAD_REGISTER_MEM; + *cmd++ = RCS_GPR0; + *cmd++ = offset; + *cmd++ = MI_STORE_REGISTER_MEM; + *cmd++ = RCS_GPR0; + *cmd++ = result; + } + *cmd = MI_BATCH_BUFFER_END; + + i915_gem_object_flush_map(obj); + i915_gem_object_unpin_map(obj); + + vma = i915_vma_instance(obj, &ctx->ppgtt->vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto err; + } + + err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_OFFSET_FIXED); + if (err) + goto err; + + err = check_scratch(ctx, offset); + if (err) + goto err_unpin; + + rq = igt_request_alloc(ctx, engine); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto err_unpin; + } + + err = engine->emit_bb_start(rq, vma->node.start, vma->node.size, 0); + if (err) + goto err_request; + + err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); + if (err) + goto skip_request; + + i915_vma_unpin(vma); + i915_vma_close(vma); + + i915_request_add(rq); + + err = i915_gem_object_set_to_cpu_domain(obj, false); + if (err) + goto err; + + cmd = i915_gem_object_pin_map(obj, I915_MAP_WB); + if (IS_ERR(cmd)) { + err = PTR_ERR(cmd); + goto err; + } + + *value = cmd[result / sizeof(*cmd)]; + i915_gem_object_unpin_map(obj); + i915_gem_object_put(obj); + + return 0; + +skip_request: + i915_request_skip(rq, err); +err_request: + i915_request_add(rq); +err_unpin: + i915_vma_unpin(vma); +err: + i915_gem_object_put(obj); + return err; +} + +static int igt_vm_isolation(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct i915_gem_context *ctx_a, *ctx_b; + struct intel_engine_cs *engine; + intel_wakeref_t wakeref; + struct igt_live_test t; + struct drm_file *file; + I915_RND_STATE(prng); + unsigned long count; + unsigned int id; + u64 vm_total; + int err; + + if (INTEL_GEN(i915) < 7) + return 0; + + /* + * The simple goal here is that a write into one context is not + * observed in a second (separate page tables and scratch). + */ + + file = mock_file(i915); + if (IS_ERR(file)) + return PTR_ERR(file); + + mutex_lock(&i915->drm.struct_mutex); + + err = igt_live_test_begin(&t, i915, __func__, ""); + if (err) + goto out_unlock; + + ctx_a = live_context(i915, file); + if (IS_ERR(ctx_a)) { + err = PTR_ERR(ctx_a); + goto out_unlock; + } + + ctx_b = live_context(i915, file); + if (IS_ERR(ctx_b)) { + err = PTR_ERR(ctx_b); + goto out_unlock; + } + + /* We can only test vm isolation, if the vm are distinct */ + if (ctx_a->ppgtt == ctx_b->ppgtt) + goto out_unlock; + + vm_total = ctx_a->ppgtt->vm.total; + GEM_BUG_ON(ctx_b->ppgtt->vm.total != vm_total); + vm_total -= I915_GTT_PAGE_SIZE; + + wakeref = intel_runtime_pm_get(i915); + + count = 0; + for_each_engine(engine, i915, id) { + IGT_TIMEOUT(end_time); + unsigned long this = 0; + + if (!intel_engine_can_store_dword(engine)) + continue; + + while (!__igt_timeout(end_time, NULL)) { + u32 value = 0xc5c5c5c5; + u64 offset; + + div64_u64_rem(i915_prandom_u64_state(&prng), + vm_total, &offset); + offset &= -sizeof(u32); + offset += I915_GTT_PAGE_SIZE; + + err = write_to_scratch(ctx_a, engine, + offset, 0xdeadbeef); + if (err == 0) + err = read_from_scratch(ctx_b, engine, + offset, &value); + if (err) + goto out_rpm; + + if (value) { + pr_err("%s: Read %08x from scratch (offset 0x%08x_%08x), after %lu reads!\n", + engine->name, value, + upper_32_bits(offset), + lower_32_bits(offset), + this); + err = -EINVAL; + goto out_rpm; + } + + this++; + } + count += this; + } + pr_info("Checked %lu scratch offsets across %d engines\n", + count, RUNTIME_INFO(i915)->num_engines); + +out_rpm: + intel_runtime_pm_put(i915, wakeref); +out_unlock: + if (igt_live_test_end(&t)) + err = -EIO; + mutex_unlock(&i915->drm.struct_mutex); + + mock_file_free(i915, file); + return err; +} + +static __maybe_unused const char * +__engine_name(struct drm_i915_private *i915, intel_engine_mask_t engines) +{ + struct intel_engine_cs *engine; + intel_engine_mask_t tmp; + + if (engines == ALL_ENGINES) + return "all"; + + for_each_engine_masked(engine, i915, engines, tmp) + return engine->name; + + return "none"; +} + +static void mock_barrier_task(void *data) +{ + unsigned int *counter = data; + + ++*counter; +} + +static int mock_context_barrier(void *arg) +{ +#undef pr_fmt +#define pr_fmt(x) "context_barrier_task():" # x + struct drm_i915_private *i915 = arg; + struct i915_gem_context *ctx; + struct i915_request *rq; + unsigned int counter; + int err; + + /* + * The context barrier provides us with a callback after it emits + * a request; useful for retiring old state after loading new. + */ + + mutex_lock(&i915->drm.struct_mutex); + + ctx = mock_context(i915, "mock"); + if (!ctx) { + err = -ENOMEM; + goto unlock; + } + + counter = 0; + err = context_barrier_task(ctx, 0, + NULL, mock_barrier_task, &counter); + if (err) { + pr_err("Failed at line %d, err=%d\n", __LINE__, err); + goto out; + } + if (counter == 0) { + pr_err("Did not retire immediately with 0 engines\n"); + err = -EINVAL; + goto out; + } + + counter = 0; + err = context_barrier_task(ctx, ALL_ENGINES, + NULL, mock_barrier_task, &counter); + if (err) { + pr_err("Failed at line %d, err=%d\n", __LINE__, err); + goto out; + } + if (counter == 0) { + pr_err("Did not retire immediately for all unused engines\n"); + err = -EINVAL; + goto out; + } + + rq = igt_request_alloc(ctx, i915->engine[RCS0]); + if (IS_ERR(rq)) { + pr_err("Request allocation failed!\n"); + goto out; + } + i915_request_add(rq); + + counter = 0; + context_barrier_inject_fault = BIT(RCS0); + err = context_barrier_task(ctx, ALL_ENGINES, + NULL, mock_barrier_task, &counter); + context_barrier_inject_fault = 0; + if (err == -ENXIO) + err = 0; + else + pr_err("Did not hit fault injection!\n"); + if (counter != 0) { + pr_err("Invoked callback on error!\n"); + err = -EIO; + } + if (err) + goto out; + + counter = 0; + err = context_barrier_task(ctx, ALL_ENGINES, + NULL, mock_barrier_task, &counter); + if (err) { + pr_err("Failed at line %d, err=%d\n", __LINE__, err); + goto out; + } + mock_device_flush(i915); + if (counter == 0) { + pr_err("Did not retire on each active engines\n"); + err = -EINVAL; + goto out; + } + +out: + mock_context_close(ctx); +unlock: + mutex_unlock(&i915->drm.struct_mutex); + return err; +#undef pr_fmt +#define pr_fmt(x) x +} + +int i915_gem_context_mock_selftests(void) +{ + static const struct i915_subtest tests[] = { + SUBTEST(mock_context_barrier), + }; + struct drm_i915_private *i915; + int err; + + i915 = mock_gem_device(); + if (!i915) + return -ENOMEM; + + err = i915_subtests(tests, i915); + + drm_dev_put(&i915->drm); + return err; +} + +int i915_gem_context_live_selftests(struct drm_i915_private *dev_priv) +{ + static const struct i915_subtest tests[] = { + SUBTEST(live_nop_switch), + SUBTEST(igt_ctx_exec), + SUBTEST(igt_ctx_readonly), + SUBTEST(igt_ctx_sseu), + SUBTEST(igt_shared_ctx_exec), + SUBTEST(igt_vm_isolation), + }; + + if (i915_terminally_wedged(dev_priv)) + return 0; + + return i915_subtests(tests, dev_priv); +} diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c new file mode 100644 index 000000000000..b7431712de66 --- /dev/null +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c @@ -0,0 +1,386 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2016 Intel Corporation + */ + +#include "i915_selftest.h" + +#include "mock_dmabuf.h" +#include "selftests/mock_gem_device.h" + +static int igt_dmabuf_export(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_object *obj; + struct dma_buf *dmabuf; + + obj = i915_gem_object_create_shmem(i915, PAGE_SIZE); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + dmabuf = i915_gem_prime_export(&i915->drm, &obj->base, 0); + i915_gem_object_put(obj); + if (IS_ERR(dmabuf)) { + pr_err("i915_gem_prime_export failed with err=%d\n", + (int)PTR_ERR(dmabuf)); + return PTR_ERR(dmabuf); + } + + dma_buf_put(dmabuf); + return 0; +} + +static int igt_dmabuf_import_self(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_object *obj; + struct drm_gem_object *import; + struct dma_buf *dmabuf; + int err; + + obj = i915_gem_object_create_shmem(i915, PAGE_SIZE); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + dmabuf = i915_gem_prime_export(&i915->drm, &obj->base, 0); + if (IS_ERR(dmabuf)) { + pr_err("i915_gem_prime_export failed with err=%d\n", + (int)PTR_ERR(dmabuf)); + err = PTR_ERR(dmabuf); + goto out; + } + + import = i915_gem_prime_import(&i915->drm, dmabuf); + if (IS_ERR(import)) { + pr_err("i915_gem_prime_import failed with err=%d\n", + (int)PTR_ERR(import)); + err = PTR_ERR(import); + goto out_dmabuf; + } + + if (import != &obj->base) { + pr_err("i915_gem_prime_import created a new object!\n"); + err = -EINVAL; + goto out_import; + } + + err = 0; +out_import: + i915_gem_object_put(to_intel_bo(import)); +out_dmabuf: + dma_buf_put(dmabuf); +out: + i915_gem_object_put(obj); + return err; +} + +static int igt_dmabuf_import(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_object *obj; + struct dma_buf *dmabuf; + void *obj_map, *dma_map; + u32 pattern[] = { 0, 0xaa, 0xcc, 0x55, 0xff }; + int err, i; + + dmabuf = mock_dmabuf(1); + if (IS_ERR(dmabuf)) + return PTR_ERR(dmabuf); + + obj = to_intel_bo(i915_gem_prime_import(&i915->drm, dmabuf)); + if (IS_ERR(obj)) { + pr_err("i915_gem_prime_import failed with err=%d\n", + (int)PTR_ERR(obj)); + err = PTR_ERR(obj); + goto out_dmabuf; + } + + if (obj->base.dev != &i915->drm) { + pr_err("i915_gem_prime_import created a non-i915 object!\n"); + err = -EINVAL; + goto out_obj; + } + + if (obj->base.size != PAGE_SIZE) { + pr_err("i915_gem_prime_import is wrong size found %lld, expected %ld\n", + (long long)obj->base.size, PAGE_SIZE); + err = -EINVAL; + goto out_obj; + } + + dma_map = dma_buf_vmap(dmabuf); + if (!dma_map) { + pr_err("dma_buf_vmap failed\n"); + err = -ENOMEM; + goto out_obj; + } + + if (0) { /* Can not yet map dmabuf */ + obj_map = i915_gem_object_pin_map(obj, I915_MAP_WB); + if (IS_ERR(obj_map)) { + err = PTR_ERR(obj_map); + pr_err("i915_gem_object_pin_map failed with err=%d\n", err); + goto out_dma_map; + } + + for (i = 0; i < ARRAY_SIZE(pattern); i++) { + memset(dma_map, pattern[i], PAGE_SIZE); + if (memchr_inv(obj_map, pattern[i], PAGE_SIZE)) { + err = -EINVAL; + pr_err("imported vmap not all set to %x!\n", pattern[i]); + i915_gem_object_unpin_map(obj); + goto out_dma_map; + } + } + + for (i = 0; i < ARRAY_SIZE(pattern); i++) { + memset(obj_map, pattern[i], PAGE_SIZE); + if (memchr_inv(dma_map, pattern[i], PAGE_SIZE)) { + err = -EINVAL; + pr_err("exported vmap not all set to %x!\n", pattern[i]); + i915_gem_object_unpin_map(obj); + goto out_dma_map; + } + } + + i915_gem_object_unpin_map(obj); + } + + err = 0; +out_dma_map: + dma_buf_vunmap(dmabuf, dma_map); +out_obj: + i915_gem_object_put(obj); +out_dmabuf: + dma_buf_put(dmabuf); + return err; +} + +static int igt_dmabuf_import_ownership(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_object *obj; + struct dma_buf *dmabuf; + void *ptr; + int err; + + dmabuf = mock_dmabuf(1); + if (IS_ERR(dmabuf)) + return PTR_ERR(dmabuf); + + ptr = dma_buf_vmap(dmabuf); + if (!ptr) { + pr_err("dma_buf_vmap failed\n"); + err = -ENOMEM; + goto err_dmabuf; + } + + memset(ptr, 0xc5, PAGE_SIZE); + dma_buf_vunmap(dmabuf, ptr); + + obj = to_intel_bo(i915_gem_prime_import(&i915->drm, dmabuf)); + if (IS_ERR(obj)) { + pr_err("i915_gem_prime_import failed with err=%d\n", + (int)PTR_ERR(obj)); + err = PTR_ERR(obj); + goto err_dmabuf; + } + + dma_buf_put(dmabuf); + + err = i915_gem_object_pin_pages(obj); + if (err) { + pr_err("i915_gem_object_pin_pages failed with err=%d\n", err); + goto out_obj; + } + + err = 0; + i915_gem_object_unpin_pages(obj); +out_obj: + i915_gem_object_put(obj); + return err; + +err_dmabuf: + dma_buf_put(dmabuf); + return err; +} + +static int igt_dmabuf_export_vmap(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_object *obj; + struct dma_buf *dmabuf; + void *ptr; + int err; + + obj = i915_gem_object_create_shmem(i915, PAGE_SIZE); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + dmabuf = i915_gem_prime_export(&i915->drm, &obj->base, 0); + if (IS_ERR(dmabuf)) { + pr_err("i915_gem_prime_export failed with err=%d\n", + (int)PTR_ERR(dmabuf)); + err = PTR_ERR(dmabuf); + goto err_obj; + } + i915_gem_object_put(obj); + + ptr = dma_buf_vmap(dmabuf); + if (!ptr) { + pr_err("dma_buf_vmap failed\n"); + err = -ENOMEM; + goto out; + } + + if (memchr_inv(ptr, 0, dmabuf->size)) { + pr_err("Exported object not initialiased to zero!\n"); + err = -EINVAL; + goto out; + } + + memset(ptr, 0xc5, dmabuf->size); + + err = 0; + dma_buf_vunmap(dmabuf, ptr); +out: + dma_buf_put(dmabuf); + return err; + +err_obj: + i915_gem_object_put(obj); + return err; +} + +static int igt_dmabuf_export_kmap(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_object *obj; + struct dma_buf *dmabuf; + void *ptr; + int err; + + obj = i915_gem_object_create_shmem(i915, 2 * PAGE_SIZE); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + dmabuf = i915_gem_prime_export(&i915->drm, &obj->base, 0); + i915_gem_object_put(obj); + if (IS_ERR(dmabuf)) { + err = PTR_ERR(dmabuf); + pr_err("i915_gem_prime_export failed with err=%d\n", err); + return err; + } + + ptr = dma_buf_kmap(dmabuf, 0); + if (!ptr) { + pr_err("dma_buf_kmap failed\n"); + err = -ENOMEM; + goto err; + } + + if (memchr_inv(ptr, 0, PAGE_SIZE)) { + dma_buf_kunmap(dmabuf, 0, ptr); + pr_err("Exported page[0] not initialiased to zero!\n"); + err = -EINVAL; + goto err; + } + + memset(ptr, 0xc5, PAGE_SIZE); + dma_buf_kunmap(dmabuf, 0, ptr); + + ptr = i915_gem_object_pin_map(obj, I915_MAP_WB); + if (IS_ERR(ptr)) { + err = PTR_ERR(ptr); + pr_err("i915_gem_object_pin_map failed with err=%d\n", err); + goto err; + } + memset(ptr + PAGE_SIZE, 0xaa, PAGE_SIZE); + i915_gem_object_flush_map(obj); + i915_gem_object_unpin_map(obj); + + ptr = dma_buf_kmap(dmabuf, 1); + if (!ptr) { + pr_err("dma_buf_kmap failed\n"); + err = -ENOMEM; + goto err; + } + + if (memchr_inv(ptr, 0xaa, PAGE_SIZE)) { + dma_buf_kunmap(dmabuf, 1, ptr); + pr_err("Exported page[1] not set to 0xaa!\n"); + err = -EINVAL; + goto err; + } + + memset(ptr, 0xc5, PAGE_SIZE); + dma_buf_kunmap(dmabuf, 1, ptr); + + ptr = dma_buf_kmap(dmabuf, 0); + if (!ptr) { + pr_err("dma_buf_kmap failed\n"); + err = -ENOMEM; + goto err; + } + if (memchr_inv(ptr, 0xc5, PAGE_SIZE)) { + dma_buf_kunmap(dmabuf, 0, ptr); + pr_err("Exported page[0] did not retain 0xc5!\n"); + err = -EINVAL; + goto err; + } + dma_buf_kunmap(dmabuf, 0, ptr); + + ptr = dma_buf_kmap(dmabuf, 2); + if (ptr) { + pr_err("Erroneously kmapped beyond the end of the object!\n"); + dma_buf_kunmap(dmabuf, 2, ptr); + err = -EINVAL; + goto err; + } + + ptr = dma_buf_kmap(dmabuf, -1); + if (ptr) { + pr_err("Erroneously kmapped before the start of the object!\n"); + dma_buf_kunmap(dmabuf, -1, ptr); + err = -EINVAL; + goto err; + } + + err = 0; +err: + dma_buf_put(dmabuf); + return err; +} + +int i915_gem_dmabuf_mock_selftests(void) +{ + static const struct i915_subtest tests[] = { + SUBTEST(igt_dmabuf_export), + SUBTEST(igt_dmabuf_import_self), + SUBTEST(igt_dmabuf_import), + SUBTEST(igt_dmabuf_import_ownership), + SUBTEST(igt_dmabuf_export_vmap), + SUBTEST(igt_dmabuf_export_kmap), + }; + struct drm_i915_private *i915; + int err; + + i915 = mock_gem_device(); + if (!i915) + return -ENOMEM; + + err = i915_subtests(tests, i915); + + drm_dev_put(&i915->drm); + return err; +} + +int i915_gem_dmabuf_live_selftests(struct drm_i915_private *i915) +{ + static const struct i915_subtest tests[] = { + SUBTEST(igt_dmabuf_export), + }; + + return i915_subtests(tests, i915); +} diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c index 87da01230179..12c90d8fe0fb 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c @@ -7,8 +7,8 @@ #include #include "gt/intel_gt_pm.h" +#include "huge_gem_object.h" #include "i915_selftest.h" -#include "selftests/huge_gem_object.h" #include "selftests/igt_flush_test.h" struct tile { diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_object.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_object.c new file mode 100644 index 000000000000..2b6db6f799de --- /dev/null +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_object.c @@ -0,0 +1,99 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2016 Intel Corporation + */ + +#include "i915_selftest.h" + +#include "huge_gem_object.h" +#include "selftests/igt_flush_test.h" +#include "selftests/mock_gem_device.h" + +static int igt_gem_object(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_object *obj; + int err = -ENOMEM; + + /* Basic test to ensure we can create an object */ + + obj = i915_gem_object_create_shmem(i915, PAGE_SIZE); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + pr_err("i915_gem_object_create failed, err=%d\n", err); + goto out; + } + + err = 0; + i915_gem_object_put(obj); +out: + return err; +} + +static int igt_gem_huge(void *arg) +{ + const unsigned int nreal = 509; /* just to be awkward */ + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_object *obj; + unsigned int n; + int err; + + /* Basic sanitycheck of our huge fake object allocation */ + + obj = huge_gem_object(i915, + nreal * PAGE_SIZE, + i915->ggtt.vm.total + PAGE_SIZE); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + err = i915_gem_object_pin_pages(obj); + if (err) { + pr_err("Failed to allocate %u pages (%lu total), err=%d\n", + nreal, obj->base.size / PAGE_SIZE, err); + goto out; + } + + for (n = 0; n < obj->base.size / PAGE_SIZE; n++) { + if (i915_gem_object_get_page(obj, n) != + i915_gem_object_get_page(obj, n % nreal)) { + pr_err("Page lookup mismatch at index %u [%u]\n", + n, n % nreal); + err = -EINVAL; + goto out_unpin; + } + } + +out_unpin: + i915_gem_object_unpin_pages(obj); +out: + i915_gem_object_put(obj); + return err; +} + +int i915_gem_object_mock_selftests(void) +{ + static const struct i915_subtest tests[] = { + SUBTEST(igt_gem_object), + }; + struct drm_i915_private *i915; + int err; + + i915 = mock_gem_device(); + if (!i915) + return -ENOMEM; + + err = i915_subtests(tests, i915); + + drm_dev_put(&i915->drm); + return err; +} + +int i915_gem_object_live_selftests(struct drm_i915_private *i915) +{ + static const struct i915_subtest tests[] = { + SUBTEST(igt_gem_huge), + }; + + return i915_subtests(tests, i915); +} diff --git a/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c b/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c new file mode 100644 index 000000000000..b232e6d2cd92 --- /dev/null +++ b/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c @@ -0,0 +1,34 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2018 Intel Corporation + */ + +#include "igt_gem_utils.h" + +#include "gem/i915_gem_context.h" +#include "gem/i915_gem_pm.h" +#include "gt/intel_context.h" + +#include "i915_request.h" + +struct i915_request * +igt_request_alloc(struct i915_gem_context *ctx, struct intel_engine_cs *engine) +{ + struct intel_context *ce; + struct i915_request *rq; + + /* + * Pinning the contexts may generate requests in order to acquire + * GGTT space, so do this first before we reserve a seqno for + * ourselves. + */ + ce = i915_gem_context_get_engine(ctx, engine->id); + if (IS_ERR(ce)) + return ERR_CAST(ce); + + rq = intel_context_create_request(ce); + intel_context_put(ce); + + return rq; +} diff --git a/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.h b/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.h new file mode 100644 index 000000000000..0f17251cf75d --- /dev/null +++ b/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.h @@ -0,0 +1,17 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2018 Intel Corporation + */ + +#ifndef __IGT_GEM_UTILS_H__ +#define __IGT_GEM_UTILS_H__ + +struct i915_request; +struct i915_gem_context; +struct intel_engine_cs; + +struct i915_request * +igt_request_alloc(struct i915_gem_context *ctx, struct intel_engine_cs *engine); + +#endif /* __IGT_GEM_UTILS_H__ */ diff --git a/drivers/gpu/drm/i915/gem/selftests/mock_context.c b/drivers/gpu/drm/i915/gem/selftests/mock_context.c new file mode 100644 index 000000000000..68d50da035e6 --- /dev/null +++ b/drivers/gpu/drm/i915/gem/selftests/mock_context.c @@ -0,0 +1,111 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2016 Intel Corporation + */ + +#include "mock_context.h" +#include "selftests/mock_gtt.h" + +struct i915_gem_context * +mock_context(struct drm_i915_private *i915, + const char *name) +{ + struct i915_gem_context *ctx; + struct i915_gem_engines *e; + int ret; + + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) + return NULL; + + kref_init(&ctx->ref); + INIT_LIST_HEAD(&ctx->link); + ctx->i915 = i915; + + mutex_init(&ctx->engines_mutex); + e = default_engines(ctx); + if (IS_ERR(e)) + goto err_free; + RCU_INIT_POINTER(ctx->engines, e); + + INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL); + INIT_LIST_HEAD(&ctx->handles_list); + INIT_LIST_HEAD(&ctx->hw_id_link); + mutex_init(&ctx->mutex); + + ret = i915_gem_context_pin_hw_id(ctx); + if (ret < 0) + goto err_engines; + + if (name) { + struct i915_hw_ppgtt *ppgtt; + + ctx->name = kstrdup(name, GFP_KERNEL); + if (!ctx->name) + goto err_put; + + ppgtt = mock_ppgtt(i915, name); + if (!ppgtt) + goto err_put; + + __set_ppgtt(ctx, ppgtt); + } + + return ctx; + +err_engines: + free_engines(rcu_access_pointer(ctx->engines)); +err_free: + kfree(ctx); + return NULL; + +err_put: + i915_gem_context_set_closed(ctx); + i915_gem_context_put(ctx); + return NULL; +} + +void mock_context_close(struct i915_gem_context *ctx) +{ + context_close(ctx); +} + +void mock_init_contexts(struct drm_i915_private *i915) +{ + init_contexts(i915); +} + +struct i915_gem_context * +live_context(struct drm_i915_private *i915, struct drm_file *file) +{ + struct i915_gem_context *ctx; + int err; + + lockdep_assert_held(&i915->drm.struct_mutex); + + ctx = i915_gem_create_context(i915, 0); + if (IS_ERR(ctx)) + return ctx; + + err = gem_context_register(ctx, file->driver_priv); + if (err < 0) + goto err_ctx; + + return ctx; + +err_ctx: + context_close(ctx); + return ERR_PTR(err); +} + +struct i915_gem_context * +kernel_context(struct drm_i915_private *i915) +{ + return i915_gem_context_create_kernel(i915, I915_PRIORITY_NORMAL); +} + +void kernel_context_close(struct i915_gem_context *ctx) +{ + context_close(ctx); +} diff --git a/drivers/gpu/drm/i915/gem/selftests/mock_context.h b/drivers/gpu/drm/i915/gem/selftests/mock_context.h new file mode 100644 index 000000000000..0b926653914f --- /dev/null +++ b/drivers/gpu/drm/i915/gem/selftests/mock_context.h @@ -0,0 +1,24 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2016 Intel Corporation + */ + +#ifndef __MOCK_CONTEXT_H +#define __MOCK_CONTEXT_H + +void mock_init_contexts(struct drm_i915_private *i915); + +struct i915_gem_context * +mock_context(struct drm_i915_private *i915, + const char *name); + +void mock_context_close(struct i915_gem_context *ctx); + +struct i915_gem_context * +live_context(struct drm_i915_private *i915, struct drm_file *file); + +struct i915_gem_context *kernel_context(struct drm_i915_private *i915); +void kernel_context_close(struct i915_gem_context *ctx); + +#endif /* !__MOCK_CONTEXT_H */ diff --git a/drivers/gpu/drm/i915/gem/selftests/mock_dmabuf.c b/drivers/gpu/drm/i915/gem/selftests/mock_dmabuf.c new file mode 100644 index 000000000000..b9e059d4328a --- /dev/null +++ b/drivers/gpu/drm/i915/gem/selftests/mock_dmabuf.c @@ -0,0 +1,144 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2016 Intel Corporation + */ + +#include "mock_dmabuf.h" + +static struct sg_table *mock_map_dma_buf(struct dma_buf_attachment *attachment, + enum dma_data_direction dir) +{ + struct mock_dmabuf *mock = to_mock(attachment->dmabuf); + struct sg_table *st; + struct scatterlist *sg; + int i, err; + + st = kmalloc(sizeof(*st), GFP_KERNEL); + if (!st) + return ERR_PTR(-ENOMEM); + + err = sg_alloc_table(st, mock->npages, GFP_KERNEL); + if (err) + goto err_free; + + sg = st->sgl; + for (i = 0; i < mock->npages; i++) { + sg_set_page(sg, mock->pages[i], PAGE_SIZE, 0); + sg = sg_next(sg); + } + + if (!dma_map_sg(attachment->dev, st->sgl, st->nents, dir)) { + err = -ENOMEM; + goto err_st; + } + + return st; + +err_st: + sg_free_table(st); +err_free: + kfree(st); + return ERR_PTR(err); +} + +static void mock_unmap_dma_buf(struct dma_buf_attachment *attachment, + struct sg_table *st, + enum dma_data_direction dir) +{ + dma_unmap_sg(attachment->dev, st->sgl, st->nents, dir); + sg_free_table(st); + kfree(st); +} + +static void mock_dmabuf_release(struct dma_buf *dma_buf) +{ + struct mock_dmabuf *mock = to_mock(dma_buf); + int i; + + for (i = 0; i < mock->npages; i++) + put_page(mock->pages[i]); + + kfree(mock); +} + +static void *mock_dmabuf_vmap(struct dma_buf *dma_buf) +{ + struct mock_dmabuf *mock = to_mock(dma_buf); + + return vm_map_ram(mock->pages, mock->npages, 0, PAGE_KERNEL); +} + +static void mock_dmabuf_vunmap(struct dma_buf *dma_buf, void *vaddr) +{ + struct mock_dmabuf *mock = to_mock(dma_buf); + + vm_unmap_ram(vaddr, mock->npages); +} + +static void *mock_dmabuf_kmap(struct dma_buf *dma_buf, unsigned long page_num) +{ + struct mock_dmabuf *mock = to_mock(dma_buf); + + return kmap(mock->pages[page_num]); +} + +static void mock_dmabuf_kunmap(struct dma_buf *dma_buf, unsigned long page_num, void *addr) +{ + struct mock_dmabuf *mock = to_mock(dma_buf); + + return kunmap(mock->pages[page_num]); +} + +static int mock_dmabuf_mmap(struct dma_buf *dma_buf, struct vm_area_struct *vma) +{ + return -ENODEV; +} + +static const struct dma_buf_ops mock_dmabuf_ops = { + .map_dma_buf = mock_map_dma_buf, + .unmap_dma_buf = mock_unmap_dma_buf, + .release = mock_dmabuf_release, + .map = mock_dmabuf_kmap, + .unmap = mock_dmabuf_kunmap, + .mmap = mock_dmabuf_mmap, + .vmap = mock_dmabuf_vmap, + .vunmap = mock_dmabuf_vunmap, +}; + +static struct dma_buf *mock_dmabuf(int npages) +{ + struct mock_dmabuf *mock; + DEFINE_DMA_BUF_EXPORT_INFO(exp_info); + struct dma_buf *dmabuf; + int i; + + mock = kmalloc(sizeof(*mock) + npages * sizeof(struct page *), + GFP_KERNEL); + if (!mock) + return ERR_PTR(-ENOMEM); + + mock->npages = npages; + for (i = 0; i < npages; i++) { + mock->pages[i] = alloc_page(GFP_KERNEL); + if (!mock->pages[i]) + goto err; + } + + exp_info.ops = &mock_dmabuf_ops; + exp_info.size = npages * PAGE_SIZE; + exp_info.flags = O_CLOEXEC; + exp_info.priv = mock; + + dmabuf = dma_buf_export(&exp_info); + if (IS_ERR(dmabuf)) + goto err; + + return dmabuf; + +err: + while (i--) + put_page(mock->pages[i]); + kfree(mock); + return ERR_PTR(-ENOMEM); +} diff --git a/drivers/gpu/drm/i915/gem/selftests/mock_dmabuf.h b/drivers/gpu/drm/i915/gem/selftests/mock_dmabuf.h new file mode 100644 index 000000000000..f0f8bbd82dfc --- /dev/null +++ b/drivers/gpu/drm/i915/gem/selftests/mock_dmabuf.h @@ -0,0 +1,22 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2016 Intel Corporation + */ + +#ifndef __MOCK_DMABUF_H__ +#define __MOCK_DMABUF_H__ + +#include + +struct mock_dmabuf { + int npages; + struct page *pages[]; +}; + +static struct mock_dmabuf *to_mock(struct dma_buf *buf) +{ + return buf->priv; +} + +#endif /* !__MOCK_DMABUF_H__ */ diff --git a/drivers/gpu/drm/i915/gem/selftests/mock_gem_object.h b/drivers/gpu/drm/i915/gem/selftests/mock_gem_object.h new file mode 100644 index 000000000000..370360b4a148 --- /dev/null +++ b/drivers/gpu/drm/i915/gem/selftests/mock_gem_object.h @@ -0,0 +1,14 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2016 Intel Corporation + */ + +#ifndef __MOCK_GEM_OBJECT_H__ +#define __MOCK_GEM_OBJECT_H__ + +struct mock_object { + struct drm_i915_gem_object base; +}; + +#endif /* !__MOCK_GEM_OBJECT_H__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c index 5b31e1e05ddd..c78ec0b58e77 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.c +++ b/drivers/gpu/drm/i915/gt/intel_context.c @@ -4,8 +4,10 @@ * Copyright © 2019 Intel Corporation */ +#include "gem/i915_gem_context.h" +#include "gem/i915_gem_pm.h" + #include "i915_drv.h" -#include "i915_gem_context.h" #include "i915_globals.h" #include "intel_context.h" diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 1c83ea9adac0..672dde71a46c 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -24,10 +24,13 @@ #include +#include "gem/i915_gem_context.h" + #include "i915_drv.h" #include "intel_engine.h" #include "intel_engine_pm.h" +#include "intel_context.h" #include "intel_lrc.h" #include "intel_reset.h" diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 38a8e55a7c85..448f3c0d8704 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -133,6 +133,8 @@ */ #include +#include "gem/i915_gem_context.h" + #include "i915_drv.h" #include "i915_gem_render_state.h" #include "i915_vgpu.h" diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.h b/drivers/gpu/drm/i915/gt/intel_lrc.h index e029aee87adf..c2bba82bcc16 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.h +++ b/drivers/gpu/drm/i915/gt/intel_lrc.h @@ -24,7 +24,15 @@ #ifndef _INTEL_LRC_H_ #define _INTEL_LRC_H_ -#include "intel_engine.h" +#include + +struct drm_printer; + +struct drm_i915_private; +struct i915_gem_context; +struct i915_request; +struct intel_context; +struct intel_engine_cs; /* Execlists regs */ #define RING_ELSP(base) _MMIO((base) + 0x230) @@ -96,10 +104,6 @@ int intel_execlists_submission_init(struct intel_engine_cs *engine); */ #define LRC_HEADER_PAGES LRC_PPHWSP_PN -struct drm_printer; - -struct drm_i915_private; - void intel_execlists_set_default_submission(struct intel_engine_cs *engine); void intel_lr_context_reset(struct intel_engine_cs *engine, diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index 8c60f7550f9c..377bc546a68f 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -7,6 +7,8 @@ #include #include +#include "gem/i915_gem_context.h" + #include "i915_drv.h" #include "i915_gpu_error.h" #include "i915_irq.h" diff --git a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c index ac93080bd863..66d5a52d505c 100644 --- a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c @@ -31,9 +31,12 @@ #include +#include "gem/i915_gem_context.h" + #include "i915_drv.h" #include "i915_gem_render_state.h" #include "i915_trace.h" +#include "intel_context.h" #include "intel_reset.h" #include "intel_workarounds.h" diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index ce4bcca3f83c..133d069244f4 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -5,6 +5,7 @@ */ #include "i915_drv.h" +#include "intel_context.h" #include "intel_workarounds.h" /** diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c index 2941916b37bf..6d7562769eb2 100644 --- a/drivers/gpu/drm/i915/gt/mock_engine.c +++ b/drivers/gpu/drm/i915/gt/mock_engine.c @@ -22,8 +22,9 @@ * */ +#include "gem/i915_gem_context.h" + #include "i915_drv.h" -#include "i915_gem_context.h" #include "intel_context.h" #include "intel_engine_pm.h" diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c index 48a51739b926..690d77f5ecf6 100644 --- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c +++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c @@ -24,19 +24,21 @@ #include +#include "gem/i915_gem_context.h" #include "intel_engine_pm.h" #include "i915_selftest.h" #include "selftests/i915_random.h" #include "selftests/igt_flush_test.h" -#include "selftests/igt_gem_utils.h" #include "selftests/igt_reset.h" #include "selftests/igt_wedge_me.h" #include "selftests/igt_atomic.h" -#include "selftests/mock_context.h" #include "selftests/mock_drm.h" +#include "gem/selftests/mock_context.h" +#include "gem/selftests/igt_gem_utils.h" + #define IGT_IDLE_TIMEOUT 50 /* ms; time to wait after flushing between tests */ struct hang { diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c index a8c50900e2d4..dfacc46ae7d3 100644 --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c @@ -6,15 +6,18 @@ #include +#include "gem/i915_gem_pm.h" #include "gt/intel_reset.h" + #include "i915_selftest.h" #include "selftests/i915_random.h" #include "selftests/igt_flush_test.h" -#include "selftests/igt_gem_utils.h" #include "selftests/igt_live_test.h" #include "selftests/igt_spinner.h" #include "selftests/lib_sw_fence.h" -#include "selftests/mock_context.h" + +#include "gem/selftests/igt_gem_utils.h" +#include "gem/selftests/mock_context.h" static int live_sanitycheck(void *arg) { diff --git a/drivers/gpu/drm/i915/gt/selftest_workarounds.c b/drivers/gpu/drm/i915/gt/selftest_workarounds.c index f9c9e7291187..9040cae38fc5 100644 --- a/drivers/gpu/drm/i915/gt/selftest_workarounds.c +++ b/drivers/gpu/drm/i915/gt/selftest_workarounds.c @@ -4,17 +4,19 @@ * Copyright © 2018 Intel Corporation */ +#include "gem/i915_gem_pm.h" #include "i915_selftest.h" #include "intel_reset.h" #include "selftests/igt_flush_test.h" -#include "selftests/igt_gem_utils.h" #include "selftests/igt_reset.h" #include "selftests/igt_spinner.h" #include "selftests/igt_wedge_me.h" -#include "selftests/mock_context.h" #include "selftests/mock_drm.h" +#include "gem/selftests/igt_gem_utils.h" +#include "gem/selftests/mock_context.h" + static const struct wo_register { enum intel_platform platform; u32 reg; diff --git a/drivers/gpu/drm/i915/gvt/mmio_context.c b/drivers/gpu/drm/i915/gvt/mmio_context.c index 96e1edf21b3f..2998999e8568 100644 --- a/drivers/gpu/drm/i915/gvt/mmio_context.c +++ b/drivers/gpu/drm/i915/gvt/mmio_context.c @@ -34,6 +34,7 @@ */ #include "i915_drv.h" +#include "gt/intel_context.h" #include "gvt.h" #include "trace.h" diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index 3a691447f76c..d66bf77f55fd 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -35,8 +35,11 @@ #include +#include "gem/i915_gem_context.h" +#include "gem/i915_gem_pm.h" +#include "gt/intel_context.h" + #include "i915_drv.h" -#include "i915_gem_pm.h" #include "gvt.h" #define RING_CTX_OFF(x) \ diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 344beab229a0..7ab8340af991 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -32,10 +32,10 @@ #include #include +#include "gem/i915_gem_context.h" #include "gt/intel_reset.h" #include "i915_debugfs.h" -#include "i915_gem_context.h" #include "i915_irq.h" #include "intel_csr.h" #include "intel_dp.h" diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index a1f43dc5a8b5..5ca1594f3075 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -47,6 +47,7 @@ #include #include +#include "gem/i915_gem_context.h" #include "gem/i915_gem_ioctls.h" #include "gt/intel_gt_pm.h" #include "gt/intel_reset.h" diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 596af542afea..38da46e773a3 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -80,7 +80,7 @@ #include "intel_wopcm.h" #include "i915_gem.h" -#include "i915_gem_context.h" +#include "gem/i915_gem_context_types.h" #include "i915_gem_fence_reg.h" #include "i915_gem_gtt.h" #include "i915_gpu_error.h" diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 0570907cc9d2..096e31e3df92 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -38,7 +38,11 @@ #include #include +#include "gem/i915_gem_clflush.h" +#include "gem/i915_gem_context.h" #include "gem/i915_gem_ioctls.h" +#include "gem/i915_gem_pm.h" +#include "gem/i915_gemfs.h" #include "gt/intel_engine_pm.h" #include "gt/intel_gt_pm.h" #include "gt/intel_mocs.h" @@ -46,9 +50,6 @@ #include "gt/intel_workarounds.h" #include "i915_drv.h" -#include "i915_gem_clflush.h" -#include "i915_gemfs.h" -#include "i915_gem_pm.h" #include "i915_trace.h" #include "i915_vgpu.h" @@ -2371,9 +2372,5 @@ void i915_gem_track_fb(struct drm_i915_gem_object *old, #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "selftests/scatterlist.c" #include "selftests/mock_gem_device.c" -#include "selftests/huge_gem_object.c" -#include "selftests/huge_pages.c" -#include "selftests/i915_gem_object.c" -#include "selftests/i915_gem_coherency.c" #include "selftests/i915_gem.c" #endif diff --git a/drivers/gpu/drm/i915/i915_gem_clflush.c b/drivers/gpu/drm/i915/i915_gem_clflush.c deleted file mode 100644 index 8e74c23cbd91..000000000000 --- a/drivers/gpu/drm/i915/i915_gem_clflush.c +++ /dev/null @@ -1,178 +0,0 @@ -/* - * Copyright © 2016 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - */ - -#include "i915_drv.h" -#include "intel_frontbuffer.h" -#include "i915_gem_clflush.h" - -static DEFINE_SPINLOCK(clflush_lock); - -struct clflush { - struct dma_fence dma; /* Must be first for dma_fence_free() */ - struct i915_sw_fence wait; - struct work_struct work; - struct drm_i915_gem_object *obj; -}; - -static const char *i915_clflush_get_driver_name(struct dma_fence *fence) -{ - return DRIVER_NAME; -} - -static const char *i915_clflush_get_timeline_name(struct dma_fence *fence) -{ - return "clflush"; -} - -static void i915_clflush_release(struct dma_fence *fence) -{ - struct clflush *clflush = container_of(fence, typeof(*clflush), dma); - - i915_sw_fence_fini(&clflush->wait); - - BUILD_BUG_ON(offsetof(typeof(*clflush), dma)); - dma_fence_free(&clflush->dma); -} - -static const struct dma_fence_ops i915_clflush_ops = { - .get_driver_name = i915_clflush_get_driver_name, - .get_timeline_name = i915_clflush_get_timeline_name, - .release = i915_clflush_release, -}; - -static void __i915_do_clflush(struct drm_i915_gem_object *obj) -{ - GEM_BUG_ON(!i915_gem_object_has_pages(obj)); - drm_clflush_sg(obj->mm.pages); - intel_fb_obj_flush(obj, ORIGIN_CPU); -} - -static void i915_clflush_work(struct work_struct *work) -{ - struct clflush *clflush = container_of(work, typeof(*clflush), work); - struct drm_i915_gem_object *obj = clflush->obj; - - if (i915_gem_object_pin_pages(obj)) { - DRM_ERROR("Failed to acquire obj->pages for clflushing\n"); - goto out; - } - - __i915_do_clflush(obj); - - i915_gem_object_unpin_pages(obj); - -out: - i915_gem_object_put(obj); - - dma_fence_signal(&clflush->dma); - dma_fence_put(&clflush->dma); -} - -static int __i915_sw_fence_call -i915_clflush_notify(struct i915_sw_fence *fence, - enum i915_sw_fence_notify state) -{ - struct clflush *clflush = container_of(fence, typeof(*clflush), wait); - - switch (state) { - case FENCE_COMPLETE: - schedule_work(&clflush->work); - break; - - case FENCE_FREE: - dma_fence_put(&clflush->dma); - break; - } - - return NOTIFY_DONE; -} - -bool i915_gem_clflush_object(struct drm_i915_gem_object *obj, - unsigned int flags) -{ - struct clflush *clflush; - - /* - * Stolen memory is always coherent with the GPU as it is explicitly - * marked as wc by the system, or the system is cache-coherent. - * Similarly, we only access struct pages through the CPU cache, so - * anything not backed by physical memory we consider to be always - * coherent and not need clflushing. - */ - if (!i915_gem_object_has_struct_page(obj)) { - obj->cache_dirty = false; - return false; - } - - /* If the GPU is snooping the contents of the CPU cache, - * we do not need to manually clear the CPU cache lines. However, - * the caches are only snooped when the render cache is - * flushed/invalidated. As we always have to emit invalidations - * and flushes when moving into and out of the RENDER domain, correct - * snooping behaviour occurs naturally as the result of our domain - * tracking. - */ - if (!(flags & I915_CLFLUSH_FORCE) && - obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ) - return false; - - trace_i915_gem_object_clflush(obj); - - clflush = NULL; - if (!(flags & I915_CLFLUSH_SYNC)) - clflush = kmalloc(sizeof(*clflush), GFP_KERNEL); - if (clflush) { - GEM_BUG_ON(!obj->cache_dirty); - - dma_fence_init(&clflush->dma, - &i915_clflush_ops, - &clflush_lock, - to_i915(obj->base.dev)->mm.unordered_timeline, - 0); - i915_sw_fence_init(&clflush->wait, i915_clflush_notify); - - clflush->obj = i915_gem_object_get(obj); - INIT_WORK(&clflush->work, i915_clflush_work); - - dma_fence_get(&clflush->dma); - - i915_sw_fence_await_reservation(&clflush->wait, - obj->resv, NULL, - true, I915_FENCE_TIMEOUT, - I915_FENCE_GFP); - - reservation_object_lock(obj->resv, NULL); - reservation_object_add_excl_fence(obj->resv, &clflush->dma); - reservation_object_unlock(obj->resv); - - i915_sw_fence_commit(&clflush->wait); - } else if (obj->mm.pages) { - __i915_do_clflush(obj); - } else { - GEM_BUG_ON(obj->write_domain != I915_GEM_DOMAIN_CPU); - } - - obj->cache_dirty = false; - return true; -} diff --git a/drivers/gpu/drm/i915/i915_gem_clflush.h b/drivers/gpu/drm/i915/i915_gem_clflush.h deleted file mode 100644 index f390247561b3..000000000000 --- a/drivers/gpu/drm/i915/i915_gem_clflush.h +++ /dev/null @@ -1,36 +0,0 @@ -/* - * Copyright © 2016 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - */ - -#ifndef __I915_GEM_CLFLUSH_H__ -#define __I915_GEM_CLFLUSH_H__ - -struct drm_i915_private; -struct drm_i915_gem_object; - -bool i915_gem_clflush_object(struct drm_i915_gem_object *obj, - unsigned int flags); -#define I915_CLFLUSH_FORCE BIT(0) -#define I915_CLFLUSH_SYNC BIT(1) - -#endif /* __I915_GEM_CLFLUSH_H__ */ diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c deleted file mode 100644 index 5d2f8ba92b59..000000000000 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ /dev/null @@ -1,2474 +0,0 @@ -/* - * Copyright © 2011-2012 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - * Authors: - * Ben Widawsky - * - */ - -/* - * This file implements HW context support. On gen5+ a HW context consists of an - * opaque GPU object which is referenced at times of context saves and restores. - * With RC6 enabled, the context is also referenced as the GPU enters and exists - * from RC6 (GPU has it's own internal power context, except on gen5). Though - * something like a context does exist for the media ring, the code only - * supports contexts for the render ring. - * - * In software, there is a distinction between contexts created by the user, - * and the default HW context. The default HW context is used by GPU clients - * that do not request setup of their own hardware context. The default - * context's state is never restored to help prevent programming errors. This - * would happen if a client ran and piggy-backed off another clients GPU state. - * The default context only exists to give the GPU some offset to load as the - * current to invoke a save of the context we actually care about. In fact, the - * code could likely be constructed, albeit in a more complicated fashion, to - * never use the default context, though that limits the driver's ability to - * swap out, and/or destroy other contexts. - * - * All other contexts are created as a request by the GPU client. These contexts - * store GPU state, and thus allow GPU clients to not re-emit state (and - * potentially query certain state) at any time. The kernel driver makes - * certain that the appropriate commands are inserted. - * - * The context life cycle is semi-complicated in that context BOs may live - * longer than the context itself because of the way the hardware, and object - * tracking works. Below is a very crude representation of the state machine - * describing the context life. - * refcount pincount active - * S0: initial state 0 0 0 - * S1: context created 1 0 0 - * S2: context is currently running 2 1 X - * S3: GPU referenced, but not current 2 0 1 - * S4: context is current, but destroyed 1 1 0 - * S5: like S3, but destroyed 1 0 1 - * - * The most common (but not all) transitions: - * S0->S1: client creates a context - * S1->S2: client submits execbuf with context - * S2->S3: other clients submits execbuf with context - * S3->S1: context object was retired - * S3->S2: clients submits another execbuf - * S2->S4: context destroy called with current context - * S3->S5->S0: destroy path - * S4->S5->S0: destroy path on current context - * - * There are two confusing terms used above: - * The "current context" means the context which is currently running on the - * GPU. The GPU has loaded its state already and has stored away the gtt - * offset of the BO. The GPU is not actively referencing the data at this - * offset, but it will on the next context switch. The only way to avoid this - * is to do a GPU reset. - * - * An "active context' is one which was previously the "current context" and is - * on the active list waiting for the next context switch to occur. Until this - * happens, the object must remain at the same gtt offset. It is therefore - * possible to destroy a context, but it is still active. - * - */ - -#include -#include - -#include - -#include "gt/intel_lrc_reg.h" - -#include "i915_drv.h" -#include "i915_globals.h" -#include "i915_trace.h" -#include "i915_user_extensions.h" - -#define ALL_L3_SLICES(dev) (1 << NUM_L3_SLICES(dev)) - 1 - -static struct i915_global_gem_context { - struct i915_global base; - struct kmem_cache *slab_luts; -} global; - -struct i915_lut_handle *i915_lut_handle_alloc(void) -{ - return kmem_cache_alloc(global.slab_luts, GFP_KERNEL); -} - -void i915_lut_handle_free(struct i915_lut_handle *lut) -{ - return kmem_cache_free(global.slab_luts, lut); -} - -static void lut_close(struct i915_gem_context *ctx) -{ - struct i915_lut_handle *lut, *ln; - struct radix_tree_iter iter; - void __rcu **slot; - - list_for_each_entry_safe(lut, ln, &ctx->handles_list, ctx_link) { - list_del(&lut->obj_link); - i915_lut_handle_free(lut); - } - INIT_LIST_HEAD(&ctx->handles_list); - - rcu_read_lock(); - radix_tree_for_each_slot(slot, &ctx->handles_vma, &iter, 0) { - struct i915_vma *vma = rcu_dereference_raw(*slot); - - radix_tree_iter_delete(&ctx->handles_vma, &iter, slot); - - vma->open_count--; - __i915_gem_object_release_unless_active(vma->obj); - } - rcu_read_unlock(); -} - -static struct intel_context * -lookup_user_engine(struct i915_gem_context *ctx, - unsigned long flags, - const struct i915_engine_class_instance *ci) -#define LOOKUP_USER_INDEX BIT(0) -{ - int idx; - - if (!!(flags & LOOKUP_USER_INDEX) != i915_gem_context_user_engines(ctx)) - return ERR_PTR(-EINVAL); - - if (!i915_gem_context_user_engines(ctx)) { - struct intel_engine_cs *engine; - - engine = intel_engine_lookup_user(ctx->i915, - ci->engine_class, - ci->engine_instance); - if (!engine) - return ERR_PTR(-EINVAL); - - idx = engine->id; - } else { - idx = ci->engine_instance; - } - - return i915_gem_context_get_engine(ctx, idx); -} - -static inline int new_hw_id(struct drm_i915_private *i915, gfp_t gfp) -{ - unsigned int max; - - lockdep_assert_held(&i915->contexts.mutex); - - if (INTEL_GEN(i915) >= 11) - max = GEN11_MAX_CONTEXT_HW_ID; - else if (USES_GUC_SUBMISSION(i915)) - /* - * When using GuC in proxy submission, GuC consumes the - * highest bit in the context id to indicate proxy submission. - */ - max = MAX_GUC_CONTEXT_HW_ID; - else - max = MAX_CONTEXT_HW_ID; - - return ida_simple_get(&i915->contexts.hw_ida, 0, max, gfp); -} - -static int steal_hw_id(struct drm_i915_private *i915) -{ - struct i915_gem_context *ctx, *cn; - LIST_HEAD(pinned); - int id = -ENOSPC; - - lockdep_assert_held(&i915->contexts.mutex); - - list_for_each_entry_safe(ctx, cn, - &i915->contexts.hw_id_list, hw_id_link) { - if (atomic_read(&ctx->hw_id_pin_count)) { - list_move_tail(&ctx->hw_id_link, &pinned); - continue; - } - - GEM_BUG_ON(!ctx->hw_id); /* perma-pinned kernel context */ - list_del_init(&ctx->hw_id_link); - id = ctx->hw_id; - break; - } - - /* - * Remember how far we got up on the last repossesion scan, so the - * list is kept in a "least recently scanned" order. - */ - list_splice_tail(&pinned, &i915->contexts.hw_id_list); - return id; -} - -static int assign_hw_id(struct drm_i915_private *i915, unsigned int *out) -{ - int ret; - - lockdep_assert_held(&i915->contexts.mutex); - - /* - * We prefer to steal/stall ourselves and our users over that of the - * entire system. That may be a little unfair to our users, and - * even hurt high priority clients. The choice is whether to oomkill - * something else, or steal a context id. - */ - ret = new_hw_id(i915, GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN); - if (unlikely(ret < 0)) { - ret = steal_hw_id(i915); - if (ret < 0) /* once again for the correct errno code */ - ret = new_hw_id(i915, GFP_KERNEL); - if (ret < 0) - return ret; - } - - *out = ret; - return 0; -} - -static void release_hw_id(struct i915_gem_context *ctx) -{ - struct drm_i915_private *i915 = ctx->i915; - - if (list_empty(&ctx->hw_id_link)) - return; - - mutex_lock(&i915->contexts.mutex); - if (!list_empty(&ctx->hw_id_link)) { - ida_simple_remove(&i915->contexts.hw_ida, ctx->hw_id); - list_del_init(&ctx->hw_id_link); - } - mutex_unlock(&i915->contexts.mutex); -} - -static void __free_engines(struct i915_gem_engines *e, unsigned int count) -{ - while (count--) { - if (!e->engines[count]) - continue; - - intel_context_put(e->engines[count]); - } - kfree(e); -} - -static void free_engines(struct i915_gem_engines *e) -{ - __free_engines(e, e->num_engines); -} - -static void free_engines_rcu(struct work_struct *wrk) -{ - struct i915_gem_engines *e = - container_of(wrk, struct i915_gem_engines, rcu.work); - struct drm_i915_private *i915 = e->i915; - - mutex_lock(&i915->drm.struct_mutex); - free_engines(e); - mutex_unlock(&i915->drm.struct_mutex); -} - -static struct i915_gem_engines *default_engines(struct i915_gem_context *ctx) -{ - struct intel_engine_cs *engine; - struct i915_gem_engines *e; - enum intel_engine_id id; - - e = kzalloc(struct_size(e, engines, I915_NUM_ENGINES), GFP_KERNEL); - if (!e) - return ERR_PTR(-ENOMEM); - - e->i915 = ctx->i915; - for_each_engine(engine, ctx->i915, id) { - struct intel_context *ce; - - ce = intel_context_create(ctx, engine); - if (IS_ERR(ce)) { - __free_engines(e, id); - return ERR_CAST(ce); - } - - e->engines[id] = ce; - } - e->num_engines = id; - - return e; -} - -static void i915_gem_context_free(struct i915_gem_context *ctx) -{ - lockdep_assert_held(&ctx->i915->drm.struct_mutex); - GEM_BUG_ON(!i915_gem_context_is_closed(ctx)); - - release_hw_id(ctx); - i915_ppgtt_put(ctx->ppgtt); - - free_engines(rcu_access_pointer(ctx->engines)); - mutex_destroy(&ctx->engines_mutex); - - if (ctx->timeline) - i915_timeline_put(ctx->timeline); - - kfree(ctx->name); - put_pid(ctx->pid); - - list_del(&ctx->link); - mutex_destroy(&ctx->mutex); - - kfree_rcu(ctx, rcu); -} - -static void contexts_free(struct drm_i915_private *i915) -{ - struct llist_node *freed = llist_del_all(&i915->contexts.free_list); - struct i915_gem_context *ctx, *cn; - - lockdep_assert_held(&i915->drm.struct_mutex); - - llist_for_each_entry_safe(ctx, cn, freed, free_link) - i915_gem_context_free(ctx); -} - -static void contexts_free_first(struct drm_i915_private *i915) -{ - struct i915_gem_context *ctx; - struct llist_node *freed; - - lockdep_assert_held(&i915->drm.struct_mutex); - - freed = llist_del_first(&i915->contexts.free_list); - if (!freed) - return; - - ctx = container_of(freed, typeof(*ctx), free_link); - i915_gem_context_free(ctx); -} - -static void contexts_free_worker(struct work_struct *work) -{ - struct drm_i915_private *i915 = - container_of(work, typeof(*i915), contexts.free_work); - - mutex_lock(&i915->drm.struct_mutex); - contexts_free(i915); - mutex_unlock(&i915->drm.struct_mutex); -} - -void i915_gem_context_release(struct kref *ref) -{ - struct i915_gem_context *ctx = container_of(ref, typeof(*ctx), ref); - struct drm_i915_private *i915 = ctx->i915; - - trace_i915_context_free(ctx); - if (llist_add(&ctx->free_link, &i915->contexts.free_list)) - queue_work(i915->wq, &i915->contexts.free_work); -} - -static void context_close(struct i915_gem_context *ctx) -{ - i915_gem_context_set_closed(ctx); - - /* - * This context will never again be assinged to HW, so we can - * reuse its ID for the next context. - */ - release_hw_id(ctx); - - /* - * The LUT uses the VMA as a backpointer to unref the object, - * so we need to clear the LUT before we close all the VMA (inside - * the ppgtt). - */ - lut_close(ctx); - - ctx->file_priv = ERR_PTR(-EBADF); - i915_gem_context_put(ctx); -} - -static u32 default_desc_template(const struct drm_i915_private *i915, - const struct i915_hw_ppgtt *ppgtt) -{ - u32 address_mode; - u32 desc; - - desc = GEN8_CTX_VALID | GEN8_CTX_PRIVILEGE; - - address_mode = INTEL_LEGACY_32B_CONTEXT; - if (ppgtt && i915_vm_is_4lvl(&ppgtt->vm)) - address_mode = INTEL_LEGACY_64B_CONTEXT; - desc |= address_mode << GEN8_CTX_ADDRESSING_MODE_SHIFT; - - if (IS_GEN(i915, 8)) - desc |= GEN8_CTX_L3LLC_COHERENT; - - /* TODO: WaDisableLiteRestore when we start using semaphore - * signalling between Command Streamers - * ring->ctx_desc_template |= GEN8_CTX_FORCE_RESTORE; - */ - - return desc; -} - -static struct i915_gem_context * -__create_context(struct drm_i915_private *dev_priv) -{ - struct i915_gem_context *ctx; - struct i915_gem_engines *e; - int err; - int i; - - ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); - if (!ctx) - return ERR_PTR(-ENOMEM); - - kref_init(&ctx->ref); - list_add_tail(&ctx->link, &dev_priv->contexts.list); - ctx->i915 = dev_priv; - ctx->sched.priority = I915_USER_PRIORITY(I915_PRIORITY_NORMAL); - mutex_init(&ctx->mutex); - - mutex_init(&ctx->engines_mutex); - e = default_engines(ctx); - if (IS_ERR(e)) { - err = PTR_ERR(e); - goto err_free; - } - RCU_INIT_POINTER(ctx->engines, e); - - INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL); - INIT_LIST_HEAD(&ctx->handles_list); - INIT_LIST_HEAD(&ctx->hw_id_link); - - /* NB: Mark all slices as needing a remap so that when the context first - * loads it will restore whatever remap state already exists. If there - * is no remap info, it will be a NOP. */ - ctx->remap_slice = ALL_L3_SLICES(dev_priv); - - i915_gem_context_set_bannable(ctx); - i915_gem_context_set_recoverable(ctx); - - ctx->ring_size = 4 * PAGE_SIZE; - ctx->desc_template = - default_desc_template(dev_priv, dev_priv->mm.aliasing_ppgtt); - - for (i = 0; i < ARRAY_SIZE(ctx->hang_timestamp); i++) - ctx->hang_timestamp[i] = jiffies - CONTEXT_FAST_HANG_JIFFIES; - - return ctx; - -err_free: - kfree(ctx); - return ERR_PTR(err); -} - -static struct i915_hw_ppgtt * -__set_ppgtt(struct i915_gem_context *ctx, struct i915_hw_ppgtt *ppgtt) -{ - struct i915_hw_ppgtt *old = ctx->ppgtt; - - ctx->ppgtt = i915_ppgtt_get(ppgtt); - ctx->desc_template = default_desc_template(ctx->i915, ppgtt); - - return old; -} - -static void __assign_ppgtt(struct i915_gem_context *ctx, - struct i915_hw_ppgtt *ppgtt) -{ - if (ppgtt == ctx->ppgtt) - return; - - ppgtt = __set_ppgtt(ctx, ppgtt); - if (ppgtt) - i915_ppgtt_put(ppgtt); -} - -static struct i915_gem_context * -i915_gem_create_context(struct drm_i915_private *dev_priv, unsigned int flags) -{ - struct i915_gem_context *ctx; - - lockdep_assert_held(&dev_priv->drm.struct_mutex); - - if (flags & I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE && - !HAS_EXECLISTS(dev_priv)) - return ERR_PTR(-EINVAL); - - /* Reap the most stale context */ - contexts_free_first(dev_priv); - - ctx = __create_context(dev_priv); - if (IS_ERR(ctx)) - return ctx; - - if (HAS_FULL_PPGTT(dev_priv)) { - struct i915_hw_ppgtt *ppgtt; - - ppgtt = i915_ppgtt_create(dev_priv); - if (IS_ERR(ppgtt)) { - DRM_DEBUG_DRIVER("PPGTT setup failed (%ld)\n", - PTR_ERR(ppgtt)); - context_close(ctx); - return ERR_CAST(ppgtt); - } - - __assign_ppgtt(ctx, ppgtt); - i915_ppgtt_put(ppgtt); - } - - if (flags & I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE) { - struct i915_timeline *timeline; - - timeline = i915_timeline_create(dev_priv, NULL); - if (IS_ERR(timeline)) { - context_close(ctx); - return ERR_CAST(timeline); - } - - ctx->timeline = timeline; - } - - trace_i915_context_create(ctx); - - return ctx; -} - -/** - * i915_gem_context_create_gvt - create a GVT GEM context - * @dev: drm device * - * - * This function is used to create a GVT specific GEM context. - * - * Returns: - * pointer to i915_gem_context on success, error pointer if failed - * - */ -struct i915_gem_context * -i915_gem_context_create_gvt(struct drm_device *dev) -{ - struct i915_gem_context *ctx; - int ret; - - if (!IS_ENABLED(CONFIG_DRM_I915_GVT)) - return ERR_PTR(-ENODEV); - - ret = i915_mutex_lock_interruptible(dev); - if (ret) - return ERR_PTR(ret); - - ctx = i915_gem_create_context(to_i915(dev), 0); - if (IS_ERR(ctx)) - goto out; - - ret = i915_gem_context_pin_hw_id(ctx); - if (ret) { - context_close(ctx); - ctx = ERR_PTR(ret); - goto out; - } - - ctx->file_priv = ERR_PTR(-EBADF); - i915_gem_context_set_closed(ctx); /* not user accessible */ - i915_gem_context_clear_bannable(ctx); - i915_gem_context_set_force_single_submission(ctx); - if (!USES_GUC_SUBMISSION(to_i915(dev))) - ctx->ring_size = 512 * PAGE_SIZE; /* Max ring buffer size */ - - GEM_BUG_ON(i915_gem_context_is_kernel(ctx)); -out: - mutex_unlock(&dev->struct_mutex); - return ctx; -} - -static void -destroy_kernel_context(struct i915_gem_context **ctxp) -{ - struct i915_gem_context *ctx; - - /* Keep the context ref so that we can free it immediately ourselves */ - ctx = i915_gem_context_get(fetch_and_zero(ctxp)); - GEM_BUG_ON(!i915_gem_context_is_kernel(ctx)); - - context_close(ctx); - i915_gem_context_free(ctx); -} - -struct i915_gem_context * -i915_gem_context_create_kernel(struct drm_i915_private *i915, int prio) -{ - struct i915_gem_context *ctx; - int err; - - ctx = i915_gem_create_context(i915, 0); - if (IS_ERR(ctx)) - return ctx; - - err = i915_gem_context_pin_hw_id(ctx); - if (err) { - destroy_kernel_context(&ctx); - return ERR_PTR(err); - } - - i915_gem_context_clear_bannable(ctx); - ctx->sched.priority = I915_USER_PRIORITY(prio); - ctx->ring_size = PAGE_SIZE; - - GEM_BUG_ON(!i915_gem_context_is_kernel(ctx)); - - return ctx; -} - -static void init_contexts(struct drm_i915_private *i915) -{ - mutex_init(&i915->contexts.mutex); - INIT_LIST_HEAD(&i915->contexts.list); - - /* Using the simple ida interface, the max is limited by sizeof(int) */ - BUILD_BUG_ON(MAX_CONTEXT_HW_ID > INT_MAX); - BUILD_BUG_ON(GEN11_MAX_CONTEXT_HW_ID > INT_MAX); - ida_init(&i915->contexts.hw_ida); - INIT_LIST_HEAD(&i915->contexts.hw_id_list); - - INIT_WORK(&i915->contexts.free_work, contexts_free_worker); - init_llist_head(&i915->contexts.free_list); -} - -static bool needs_preempt_context(struct drm_i915_private *i915) -{ - return HAS_EXECLISTS(i915); -} - -int i915_gem_contexts_init(struct drm_i915_private *dev_priv) -{ - struct i915_gem_context *ctx; - - /* Reassure ourselves we are only called once */ - GEM_BUG_ON(dev_priv->kernel_context); - GEM_BUG_ON(dev_priv->preempt_context); - - intel_engine_init_ctx_wa(dev_priv->engine[RCS0]); - init_contexts(dev_priv); - - /* lowest priority; idle task */ - ctx = i915_gem_context_create_kernel(dev_priv, I915_PRIORITY_MIN); - if (IS_ERR(ctx)) { - DRM_ERROR("Failed to create default global context\n"); - return PTR_ERR(ctx); - } - /* - * For easy recognisablity, we want the kernel context to be 0 and then - * all user contexts will have non-zero hw_id. Kernel contexts are - * permanently pinned, so that we never suffer a stall and can - * use them from any allocation context (e.g. for evicting other - * contexts and from inside the shrinker). - */ - GEM_BUG_ON(ctx->hw_id); - GEM_BUG_ON(!atomic_read(&ctx->hw_id_pin_count)); - dev_priv->kernel_context = ctx; - - /* highest priority; preempting task */ - if (needs_preempt_context(dev_priv)) { - ctx = i915_gem_context_create_kernel(dev_priv, INT_MAX); - if (!IS_ERR(ctx)) - dev_priv->preempt_context = ctx; - else - DRM_ERROR("Failed to create preempt context; disabling preemption\n"); - } - - DRM_DEBUG_DRIVER("%s context support initialized\n", - DRIVER_CAPS(dev_priv)->has_logical_contexts ? - "logical" : "fake"); - return 0; -} - -void i915_gem_contexts_lost(struct drm_i915_private *dev_priv) -{ - struct intel_engine_cs *engine; - enum intel_engine_id id; - - lockdep_assert_held(&dev_priv->drm.struct_mutex); - - for_each_engine(engine, dev_priv, id) - intel_engine_lost_context(engine); -} - -void i915_gem_contexts_fini(struct drm_i915_private *i915) -{ - lockdep_assert_held(&i915->drm.struct_mutex); - - if (i915->preempt_context) - destroy_kernel_context(&i915->preempt_context); - destroy_kernel_context(&i915->kernel_context); - - /* Must free all deferred contexts (via flush_workqueue) first */ - GEM_BUG_ON(!list_empty(&i915->contexts.hw_id_list)); - ida_destroy(&i915->contexts.hw_ida); -} - -static int context_idr_cleanup(int id, void *p, void *data) -{ - context_close(p); - return 0; -} - -static int vm_idr_cleanup(int id, void *p, void *data) -{ - i915_ppgtt_put(p); - return 0; -} - -static int gem_context_register(struct i915_gem_context *ctx, - struct drm_i915_file_private *fpriv) -{ - int ret; - - ctx->file_priv = fpriv; - if (ctx->ppgtt) - ctx->ppgtt->vm.file = fpriv; - - ctx->pid = get_task_pid(current, PIDTYPE_PID); - ctx->name = kasprintf(GFP_KERNEL, "%s[%d]", - current->comm, pid_nr(ctx->pid)); - if (!ctx->name) { - ret = -ENOMEM; - goto err_pid; - } - - /* And finally expose ourselves to userspace via the idr */ - mutex_lock(&fpriv->context_idr_lock); - ret = idr_alloc(&fpriv->context_idr, ctx, 0, 0, GFP_KERNEL); - mutex_unlock(&fpriv->context_idr_lock); - if (ret >= 0) - goto out; - - kfree(fetch_and_zero(&ctx->name)); -err_pid: - put_pid(fetch_and_zero(&ctx->pid)); -out: - return ret; -} - -int i915_gem_context_open(struct drm_i915_private *i915, - struct drm_file *file) -{ - struct drm_i915_file_private *file_priv = file->driver_priv; - struct i915_gem_context *ctx; - int err; - - mutex_init(&file_priv->context_idr_lock); - mutex_init(&file_priv->vm_idr_lock); - - idr_init(&file_priv->context_idr); - idr_init_base(&file_priv->vm_idr, 1); - - mutex_lock(&i915->drm.struct_mutex); - ctx = i915_gem_create_context(i915, 0); - mutex_unlock(&i915->drm.struct_mutex); - if (IS_ERR(ctx)) { - err = PTR_ERR(ctx); - goto err; - } - - err = gem_context_register(ctx, file_priv); - if (err < 0) - goto err_ctx; - - GEM_BUG_ON(i915_gem_context_is_kernel(ctx)); - GEM_BUG_ON(err > 0); - - return 0; - -err_ctx: - mutex_lock(&i915->drm.struct_mutex); - context_close(ctx); - mutex_unlock(&i915->drm.struct_mutex); -err: - idr_destroy(&file_priv->vm_idr); - idr_destroy(&file_priv->context_idr); - mutex_destroy(&file_priv->vm_idr_lock); - mutex_destroy(&file_priv->context_idr_lock); - return err; -} - -void i915_gem_context_close(struct drm_file *file) -{ - struct drm_i915_file_private *file_priv = file->driver_priv; - - lockdep_assert_held(&file_priv->dev_priv->drm.struct_mutex); - - idr_for_each(&file_priv->context_idr, context_idr_cleanup, NULL); - idr_destroy(&file_priv->context_idr); - mutex_destroy(&file_priv->context_idr_lock); - - idr_for_each(&file_priv->vm_idr, vm_idr_cleanup, NULL); - idr_destroy(&file_priv->vm_idr); - mutex_destroy(&file_priv->vm_idr_lock); -} - -int i915_gem_vm_create_ioctl(struct drm_device *dev, void *data, - struct drm_file *file) -{ - struct drm_i915_private *i915 = to_i915(dev); - struct drm_i915_gem_vm_control *args = data; - struct drm_i915_file_private *file_priv = file->driver_priv; - struct i915_hw_ppgtt *ppgtt; - int err; - - if (!HAS_FULL_PPGTT(i915)) - return -ENODEV; - - if (args->flags) - return -EINVAL; - - ppgtt = i915_ppgtt_create(i915); - if (IS_ERR(ppgtt)) - return PTR_ERR(ppgtt); - - ppgtt->vm.file = file_priv; - - if (args->extensions) { - err = i915_user_extensions(u64_to_user_ptr(args->extensions), - NULL, 0, - ppgtt); - if (err) - goto err_put; - } - - err = mutex_lock_interruptible(&file_priv->vm_idr_lock); - if (err) - goto err_put; - - err = idr_alloc(&file_priv->vm_idr, ppgtt, 0, 0, GFP_KERNEL); - if (err < 0) - goto err_unlock; - - GEM_BUG_ON(err == 0); /* reserved for invalid/unassigned ppgtt */ - - mutex_unlock(&file_priv->vm_idr_lock); - - args->vm_id = err; - return 0; - -err_unlock: - mutex_unlock(&file_priv->vm_idr_lock); -err_put: - i915_ppgtt_put(ppgtt); - return err; -} - -int i915_gem_vm_destroy_ioctl(struct drm_device *dev, void *data, - struct drm_file *file) -{ - struct drm_i915_file_private *file_priv = file->driver_priv; - struct drm_i915_gem_vm_control *args = data; - struct i915_hw_ppgtt *ppgtt; - int err; - u32 id; - - if (args->flags) - return -EINVAL; - - if (args->extensions) - return -EINVAL; - - id = args->vm_id; - if (!id) - return -ENOENT; - - err = mutex_lock_interruptible(&file_priv->vm_idr_lock); - if (err) - return err; - - ppgtt = idr_remove(&file_priv->vm_idr, id); - - mutex_unlock(&file_priv->vm_idr_lock); - if (!ppgtt) - return -ENOENT; - - i915_ppgtt_put(ppgtt); - return 0; -} - -struct context_barrier_task { - struct i915_active base; - void (*task)(void *data); - void *data; -}; - -static void cb_retire(struct i915_active *base) -{ - struct context_barrier_task *cb = container_of(base, typeof(*cb), base); - - if (cb->task) - cb->task(cb->data); - - i915_active_fini(&cb->base); - kfree(cb); -} - -I915_SELFTEST_DECLARE(static intel_engine_mask_t context_barrier_inject_fault); -static int context_barrier_task(struct i915_gem_context *ctx, - intel_engine_mask_t engines, - int (*emit)(struct i915_request *rq, void *data), - void (*task)(void *data), - void *data) -{ - struct drm_i915_private *i915 = ctx->i915; - struct context_barrier_task *cb; - struct i915_gem_engines_iter it; - struct intel_context *ce; - int err = 0; - - lockdep_assert_held(&i915->drm.struct_mutex); - GEM_BUG_ON(!task); - - cb = kmalloc(sizeof(*cb), GFP_KERNEL); - if (!cb) - return -ENOMEM; - - i915_active_init(i915, &cb->base, cb_retire); - i915_active_acquire(&cb->base); - - for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) { - struct i915_request *rq; - - if (I915_SELFTEST_ONLY(context_barrier_inject_fault & - ce->engine->mask)) { - err = -ENXIO; - break; - } - - if (!(ce->engine->mask & engines) || !ce->state) - continue; - - rq = intel_context_create_request(ce); - if (IS_ERR(rq)) { - err = PTR_ERR(rq); - break; - } - - err = 0; - if (emit) - err = emit(rq, data); - if (err == 0) - err = i915_active_ref(&cb->base, rq->fence.context, rq); - - i915_request_add(rq); - if (err) - break; - } - i915_gem_context_unlock_engines(ctx); - - cb->task = err ? NULL : task; /* caller needs to unwind instead */ - cb->data = data; - - i915_active_release(&cb->base); - - return err; -} - -static int get_ppgtt(struct drm_i915_file_private *file_priv, - struct i915_gem_context *ctx, - struct drm_i915_gem_context_param *args) -{ - struct i915_hw_ppgtt *ppgtt; - int ret; - - if (!ctx->ppgtt) - return -ENODEV; - - /* XXX rcu acquire? */ - ret = mutex_lock_interruptible(&ctx->i915->drm.struct_mutex); - if (ret) - return ret; - - ppgtt = i915_ppgtt_get(ctx->ppgtt); - mutex_unlock(&ctx->i915->drm.struct_mutex); - - ret = mutex_lock_interruptible(&file_priv->vm_idr_lock); - if (ret) - goto err_put; - - ret = idr_alloc(&file_priv->vm_idr, ppgtt, 0, 0, GFP_KERNEL); - GEM_BUG_ON(!ret); - if (ret < 0) - goto err_unlock; - - i915_ppgtt_get(ppgtt); - - args->size = 0; - args->value = ret; - - ret = 0; -err_unlock: - mutex_unlock(&file_priv->vm_idr_lock); -err_put: - i915_ppgtt_put(ppgtt); - return ret; -} - -static void set_ppgtt_barrier(void *data) -{ - struct i915_hw_ppgtt *old = data; - - if (INTEL_GEN(old->vm.i915) < 8) - gen6_ppgtt_unpin_all(old); - - i915_ppgtt_put(old); -} - -static int emit_ppgtt_update(struct i915_request *rq, void *data) -{ - struct i915_hw_ppgtt *ppgtt = rq->gem_context->ppgtt; - struct intel_engine_cs *engine = rq->engine; - u32 base = engine->mmio_base; - u32 *cs; - int i; - - if (i915_vm_is_4lvl(&ppgtt->vm)) { - const dma_addr_t pd_daddr = px_dma(&ppgtt->pml4); - - cs = intel_ring_begin(rq, 6); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - *cs++ = MI_LOAD_REGISTER_IMM(2); - - *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, 0)); - *cs++ = upper_32_bits(pd_daddr); - *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, 0)); - *cs++ = lower_32_bits(pd_daddr); - - *cs++ = MI_NOOP; - intel_ring_advance(rq, cs); - } else if (HAS_LOGICAL_RING_CONTEXTS(engine->i915)) { - cs = intel_ring_begin(rq, 4 * GEN8_3LVL_PDPES + 2); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - *cs++ = MI_LOAD_REGISTER_IMM(2 * GEN8_3LVL_PDPES); - for (i = GEN8_3LVL_PDPES; i--; ) { - const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i); - - *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, i)); - *cs++ = upper_32_bits(pd_daddr); - *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, i)); - *cs++ = lower_32_bits(pd_daddr); - } - *cs++ = MI_NOOP; - intel_ring_advance(rq, cs); - } else { - /* ppGTT is not part of the legacy context image */ - gen6_ppgtt_pin(ppgtt); - } - - return 0; -} - -static int set_ppgtt(struct drm_i915_file_private *file_priv, - struct i915_gem_context *ctx, - struct drm_i915_gem_context_param *args) -{ - struct i915_hw_ppgtt *ppgtt, *old; - int err; - - if (args->size) - return -EINVAL; - - if (!ctx->ppgtt) - return -ENODEV; - - if (upper_32_bits(args->value)) - return -ENOENT; - - err = mutex_lock_interruptible(&file_priv->vm_idr_lock); - if (err) - return err; - - ppgtt = idr_find(&file_priv->vm_idr, args->value); - if (ppgtt) - i915_ppgtt_get(ppgtt); - mutex_unlock(&file_priv->vm_idr_lock); - if (!ppgtt) - return -ENOENT; - - err = mutex_lock_interruptible(&ctx->i915->drm.struct_mutex); - if (err) - goto out; - - if (ppgtt == ctx->ppgtt) - goto unlock; - - /* Teardown the existing obj:vma cache, it will have to be rebuilt. */ - lut_close(ctx); - - old = __set_ppgtt(ctx, ppgtt); - - /* - * We need to flush any requests using the current ppgtt before - * we release it as the requests do not hold a reference themselves, - * only indirectly through the context. - */ - err = context_barrier_task(ctx, ALL_ENGINES, - emit_ppgtt_update, - set_ppgtt_barrier, - old); - if (err) { - ctx->ppgtt = old; - ctx->desc_template = default_desc_template(ctx->i915, old); - i915_ppgtt_put(ppgtt); - } - -unlock: - mutex_unlock(&ctx->i915->drm.struct_mutex); - -out: - i915_ppgtt_put(ppgtt); - return err; -} - -static int gen8_emit_rpcs_config(struct i915_request *rq, - struct intel_context *ce, - struct intel_sseu sseu) -{ - u64 offset; - u32 *cs; - - cs = intel_ring_begin(rq, 4); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - offset = i915_ggtt_offset(ce->state) + - LRC_STATE_PN * PAGE_SIZE + - (CTX_R_PWR_CLK_STATE + 1) * 4; - - *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; - *cs++ = lower_32_bits(offset); - *cs++ = upper_32_bits(offset); - *cs++ = intel_sseu_make_rpcs(rq->i915, &sseu); - - intel_ring_advance(rq, cs); - - return 0; -} - -static int -gen8_modify_rpcs(struct intel_context *ce, struct intel_sseu sseu) -{ - struct i915_request *rq; - int ret; - - lockdep_assert_held(&ce->pin_mutex); - - /* - * If the context is not idle, we have to submit an ordered request to - * modify its context image via the kernel context (writing to our own - * image, or into the registers directory, does not stick). Pristine - * and idle contexts will be configured on pinning. - */ - if (!intel_context_is_pinned(ce)) - return 0; - - rq = i915_request_create(ce->engine->kernel_context); - if (IS_ERR(rq)) - return PTR_ERR(rq); - - /* Queue this switch after all other activity by this context. */ - ret = i915_active_request_set(&ce->ring->timeline->last_request, rq); - if (ret) - goto out_add; - - ret = gen8_emit_rpcs_config(rq, ce, sseu); - if (ret) - goto out_add; - - /* - * Guarantee context image and the timeline remains pinned until the - * modifying request is retired by setting the ce activity tracker. - * - * But we only need to take one pin on the account of it. Or in other - * words transfer the pinned ce object to tracked active request. - */ - if (!i915_active_request_isset(&ce->active_tracker)) - __intel_context_pin(ce); - __i915_active_request_set(&ce->active_tracker, rq); - -out_add: - i915_request_add(rq); - return ret; -} - -static int -__intel_context_reconfigure_sseu(struct intel_context *ce, - struct intel_sseu sseu) -{ - int ret; - - GEM_BUG_ON(INTEL_GEN(ce->gem_context->i915) < 8); - - ret = intel_context_lock_pinned(ce); - if (ret) - return ret; - - /* Nothing to do if unmodified. */ - if (!memcmp(&ce->sseu, &sseu, sizeof(sseu))) - goto unlock; - - ret = gen8_modify_rpcs(ce, sseu); - if (!ret) - ce->sseu = sseu; - -unlock: - intel_context_unlock_pinned(ce); - return ret; -} - -static int -intel_context_reconfigure_sseu(struct intel_context *ce, struct intel_sseu sseu) -{ - struct drm_i915_private *i915 = ce->gem_context->i915; - int ret; - - ret = mutex_lock_interruptible(&i915->drm.struct_mutex); - if (ret) - return ret; - - ret = __intel_context_reconfigure_sseu(ce, sseu); - - mutex_unlock(&i915->drm.struct_mutex); - - return ret; -} - -static int -user_to_context_sseu(struct drm_i915_private *i915, - const struct drm_i915_gem_context_param_sseu *user, - struct intel_sseu *context) -{ - const struct sseu_dev_info *device = &RUNTIME_INFO(i915)->sseu; - - /* No zeros in any field. */ - if (!user->slice_mask || !user->subslice_mask || - !user->min_eus_per_subslice || !user->max_eus_per_subslice) - return -EINVAL; - - /* Max > min. */ - if (user->max_eus_per_subslice < user->min_eus_per_subslice) - return -EINVAL; - - /* - * Some future proofing on the types since the uAPI is wider than the - * current internal implementation. - */ - if (overflows_type(user->slice_mask, context->slice_mask) || - overflows_type(user->subslice_mask, context->subslice_mask) || - overflows_type(user->min_eus_per_subslice, - context->min_eus_per_subslice) || - overflows_type(user->max_eus_per_subslice, - context->max_eus_per_subslice)) - return -EINVAL; - - /* Check validity against hardware. */ - if (user->slice_mask & ~device->slice_mask) - return -EINVAL; - - if (user->subslice_mask & ~device->subslice_mask[0]) - return -EINVAL; - - if (user->max_eus_per_subslice > device->max_eus_per_subslice) - return -EINVAL; - - context->slice_mask = user->slice_mask; - context->subslice_mask = user->subslice_mask; - context->min_eus_per_subslice = user->min_eus_per_subslice; - context->max_eus_per_subslice = user->max_eus_per_subslice; - - /* Part specific restrictions. */ - if (IS_GEN(i915, 11)) { - unsigned int hw_s = hweight8(device->slice_mask); - unsigned int hw_ss_per_s = hweight8(device->subslice_mask[0]); - unsigned int req_s = hweight8(context->slice_mask); - unsigned int req_ss = hweight8(context->subslice_mask); - - /* - * Only full subslice enablement is possible if more than one - * slice is turned on. - */ - if (req_s > 1 && req_ss != hw_ss_per_s) - return -EINVAL; - - /* - * If more than four (SScount bitfield limit) subslices are - * requested then the number has to be even. - */ - if (req_ss > 4 && (req_ss & 1)) - return -EINVAL; - - /* - * If only one slice is enabled and subslice count is below the - * device full enablement, it must be at most half of the all - * available subslices. - */ - if (req_s == 1 && req_ss < hw_ss_per_s && - req_ss > (hw_ss_per_s / 2)) - return -EINVAL; - - /* ABI restriction - VME use case only. */ - - /* All slices or one slice only. */ - if (req_s != 1 && req_s != hw_s) - return -EINVAL; - - /* - * Half subslices or full enablement only when one slice is - * enabled. - */ - if (req_s == 1 && - (req_ss != hw_ss_per_s && req_ss != (hw_ss_per_s / 2))) - return -EINVAL; - - /* No EU configuration changes. */ - if ((user->min_eus_per_subslice != - device->max_eus_per_subslice) || - (user->max_eus_per_subslice != - device->max_eus_per_subslice)) - return -EINVAL; - } - - return 0; -} - -static int set_sseu(struct i915_gem_context *ctx, - struct drm_i915_gem_context_param *args) -{ - struct drm_i915_private *i915 = ctx->i915; - struct drm_i915_gem_context_param_sseu user_sseu; - struct intel_context *ce; - struct intel_sseu sseu; - unsigned long lookup; - int ret; - - if (args->size < sizeof(user_sseu)) - return -EINVAL; - - if (!IS_GEN(i915, 11)) - return -ENODEV; - - if (copy_from_user(&user_sseu, u64_to_user_ptr(args->value), - sizeof(user_sseu))) - return -EFAULT; - - if (user_sseu.rsvd) - return -EINVAL; - - if (user_sseu.flags & ~(I915_CONTEXT_SSEU_FLAG_ENGINE_INDEX)) - return -EINVAL; - - lookup = 0; - if (user_sseu.flags & I915_CONTEXT_SSEU_FLAG_ENGINE_INDEX) - lookup |= LOOKUP_USER_INDEX; - - ce = lookup_user_engine(ctx, lookup, &user_sseu.engine); - if (IS_ERR(ce)) - return PTR_ERR(ce); - - /* Only render engine supports RPCS configuration. */ - if (ce->engine->class != RENDER_CLASS) { - ret = -ENODEV; - goto out_ce; - } - - ret = user_to_context_sseu(i915, &user_sseu, &sseu); - if (ret) - goto out_ce; - - ret = intel_context_reconfigure_sseu(ce, sseu); - if (ret) - goto out_ce; - - args->size = sizeof(user_sseu); - -out_ce: - intel_context_put(ce); - return ret; -} - -struct set_engines { - struct i915_gem_context *ctx; - struct i915_gem_engines *engines; -}; - -static int -set_engines__load_balance(struct i915_user_extension __user *base, void *data) -{ - struct i915_context_engines_load_balance __user *ext = - container_of_user(base, typeof(*ext), base); - const struct set_engines *set = data; - struct intel_engine_cs *stack[16]; - struct intel_engine_cs **siblings; - struct intel_context *ce; - u16 num_siblings, idx; - unsigned int n; - int err; - - if (!HAS_EXECLISTS(set->ctx->i915)) - return -ENODEV; - - if (USES_GUC_SUBMISSION(set->ctx->i915)) - return -ENODEV; /* not implement yet */ - - if (get_user(idx, &ext->engine_index)) - return -EFAULT; - - if (idx >= set->engines->num_engines) { - DRM_DEBUG("Invalid placement value, %d >= %d\n", - idx, set->engines->num_engines); - return -EINVAL; - } - - idx = array_index_nospec(idx, set->engines->num_engines); - if (set->engines->engines[idx]) { - DRM_DEBUG("Invalid placement[%d], already occupied\n", idx); - return -EEXIST; - } - - if (get_user(num_siblings, &ext->num_siblings)) - return -EFAULT; - - err = check_user_mbz(&ext->flags); - if (err) - return err; - - err = check_user_mbz(&ext->mbz64); - if (err) - return err; - - siblings = stack; - if (num_siblings > ARRAY_SIZE(stack)) { - siblings = kmalloc_array(num_siblings, - sizeof(*siblings), - GFP_KERNEL); - if (!siblings) - return -ENOMEM; - } - - for (n = 0; n < num_siblings; n++) { - struct i915_engine_class_instance ci; - - if (copy_from_user(&ci, &ext->engines[n], sizeof(ci))) { - err = -EFAULT; - goto out_siblings; - } - - siblings[n] = intel_engine_lookup_user(set->ctx->i915, - ci.engine_class, - ci.engine_instance); - if (!siblings[n]) { - DRM_DEBUG("Invalid sibling[%d]: { class:%d, inst:%d }\n", - n, ci.engine_class, ci.engine_instance); - err = -EINVAL; - goto out_siblings; - } - } - - ce = intel_execlists_create_virtual(set->ctx, siblings, n); - if (IS_ERR(ce)) { - err = PTR_ERR(ce); - goto out_siblings; - } - - if (cmpxchg(&set->engines->engines[idx], NULL, ce)) { - intel_context_put(ce); - err = -EEXIST; - goto out_siblings; - } - -out_siblings: - if (siblings != stack) - kfree(siblings); - - return err; -} - -static int -set_engines__bond(struct i915_user_extension __user *base, void *data) -{ - struct i915_context_engines_bond __user *ext = - container_of_user(base, typeof(*ext), base); - const struct set_engines *set = data; - struct i915_engine_class_instance ci; - struct intel_engine_cs *virtual; - struct intel_engine_cs *master; - u16 idx, num_bonds; - int err, n; - - if (get_user(idx, &ext->virtual_index)) - return -EFAULT; - - if (idx >= set->engines->num_engines) { - DRM_DEBUG("Invalid index for virtual engine: %d >= %d\n", - idx, set->engines->num_engines); - return -EINVAL; - } - - idx = array_index_nospec(idx, set->engines->num_engines); - if (!set->engines->engines[idx]) { - DRM_DEBUG("Invalid engine at %d\n", idx); - return -EINVAL; - } - virtual = set->engines->engines[idx]->engine; - - err = check_user_mbz(&ext->flags); - if (err) - return err; - - for (n = 0; n < ARRAY_SIZE(ext->mbz64); n++) { - err = check_user_mbz(&ext->mbz64[n]); - if (err) - return err; - } - - if (copy_from_user(&ci, &ext->master, sizeof(ci))) - return -EFAULT; - - master = intel_engine_lookup_user(set->ctx->i915, - ci.engine_class, ci.engine_instance); - if (!master) { - DRM_DEBUG("Unrecognised master engine: { class:%u, instance:%u }\n", - ci.engine_class, ci.engine_instance); - return -EINVAL; - } - - if (get_user(num_bonds, &ext->num_bonds)) - return -EFAULT; - - for (n = 0; n < num_bonds; n++) { - struct intel_engine_cs *bond; - - if (copy_from_user(&ci, &ext->engines[n], sizeof(ci))) - return -EFAULT; - - bond = intel_engine_lookup_user(set->ctx->i915, - ci.engine_class, - ci.engine_instance); - if (!bond) { - DRM_DEBUG("Unrecognised engine[%d] for bonding: { class:%d, instance: %d }\n", - n, ci.engine_class, ci.engine_instance); - return -EINVAL; - } - - /* - * A non-virtual engine has no siblings to choose between; and - * a submit fence will always be directed to the one engine. - */ - if (intel_engine_is_virtual(virtual)) { - err = intel_virtual_engine_attach_bond(virtual, - master, - bond); - if (err) - return err; - } - } - - return 0; -} - -static const i915_user_extension_fn set_engines__extensions[] = { - [I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE] = set_engines__load_balance, - [I915_CONTEXT_ENGINES_EXT_BOND] = set_engines__bond, -}; - -static int -set_engines(struct i915_gem_context *ctx, - const struct drm_i915_gem_context_param *args) -{ - struct i915_context_param_engines __user *user = - u64_to_user_ptr(args->value); - struct set_engines set = { .ctx = ctx }; - unsigned int num_engines, n; - u64 extensions; - int err; - - if (!args->size) { /* switch back to legacy user_ring_map */ - if (!i915_gem_context_user_engines(ctx)) - return 0; - - set.engines = default_engines(ctx); - if (IS_ERR(set.engines)) - return PTR_ERR(set.engines); - - goto replace; - } - - BUILD_BUG_ON(!IS_ALIGNED(sizeof(*user), sizeof(*user->engines))); - if (args->size < sizeof(*user) || - !IS_ALIGNED(args->size, sizeof(*user->engines))) { - DRM_DEBUG("Invalid size for engine array: %d\n", - args->size); - return -EINVAL; - } - - /* - * Note that I915_EXEC_RING_MASK limits execbuf to only using the - * first 64 engines defined here. - */ - num_engines = (args->size - sizeof(*user)) / sizeof(*user->engines); - - set.engines = kmalloc(struct_size(set.engines, engines, num_engines), - GFP_KERNEL); - if (!set.engines) - return -ENOMEM; - - set.engines->i915 = ctx->i915; - for (n = 0; n < num_engines; n++) { - struct i915_engine_class_instance ci; - struct intel_engine_cs *engine; - - if (copy_from_user(&ci, &user->engines[n], sizeof(ci))) { - __free_engines(set.engines, n); - return -EFAULT; - } - - if (ci.engine_class == (u16)I915_ENGINE_CLASS_INVALID && - ci.engine_instance == (u16)I915_ENGINE_CLASS_INVALID_NONE) { - set.engines->engines[n] = NULL; - continue; - } - - engine = intel_engine_lookup_user(ctx->i915, - ci.engine_class, - ci.engine_instance); - if (!engine) { - DRM_DEBUG("Invalid engine[%d]: { class:%d, instance:%d }\n", - n, ci.engine_class, ci.engine_instance); - __free_engines(set.engines, n); - return -ENOENT; - } - - set.engines->engines[n] = intel_context_create(ctx, engine); - if (!set.engines->engines[n]) { - __free_engines(set.engines, n); - return -ENOMEM; - } - } - set.engines->num_engines = num_engines; - - err = -EFAULT; - if (!get_user(extensions, &user->extensions)) - err = i915_user_extensions(u64_to_user_ptr(extensions), - set_engines__extensions, - ARRAY_SIZE(set_engines__extensions), - &set); - if (err) { - free_engines(set.engines); - return err; - } - -replace: - mutex_lock(&ctx->engines_mutex); - if (args->size) - i915_gem_context_set_user_engines(ctx); - else - i915_gem_context_clear_user_engines(ctx); - rcu_swap_protected(ctx->engines, set.engines, 1); - mutex_unlock(&ctx->engines_mutex); - - INIT_RCU_WORK(&set.engines->rcu, free_engines_rcu); - queue_rcu_work(system_wq, &set.engines->rcu); - - return 0; -} - -static struct i915_gem_engines * -__copy_engines(struct i915_gem_engines *e) -{ - struct i915_gem_engines *copy; - unsigned int n; - - copy = kmalloc(struct_size(e, engines, e->num_engines), GFP_KERNEL); - if (!copy) - return ERR_PTR(-ENOMEM); - - copy->i915 = e->i915; - for (n = 0; n < e->num_engines; n++) { - if (e->engines[n]) - copy->engines[n] = intel_context_get(e->engines[n]); - else - copy->engines[n] = NULL; - } - copy->num_engines = n; - - return copy; -} - -static int -get_engines(struct i915_gem_context *ctx, - struct drm_i915_gem_context_param *args) -{ - struct i915_context_param_engines __user *user; - struct i915_gem_engines *e; - size_t n, count, size; - int err = 0; - - err = mutex_lock_interruptible(&ctx->engines_mutex); - if (err) - return err; - - e = NULL; - if (i915_gem_context_user_engines(ctx)) - e = __copy_engines(i915_gem_context_engines(ctx)); - mutex_unlock(&ctx->engines_mutex); - if (IS_ERR_OR_NULL(e)) { - args->size = 0; - return PTR_ERR_OR_ZERO(e); - } - - count = e->num_engines; - - /* Be paranoid in case we have an impedance mismatch */ - if (!check_struct_size(user, engines, count, &size)) { - err = -EINVAL; - goto err_free; - } - if (overflows_type(size, args->size)) { - err = -EINVAL; - goto err_free; - } - - if (!args->size) { - args->size = size; - goto err_free; - } - - if (args->size < size) { - err = -EINVAL; - goto err_free; - } - - user = u64_to_user_ptr(args->value); - if (!access_ok(user, size)) { - err = -EFAULT; - goto err_free; - } - - if (put_user(0, &user->extensions)) { - err = -EFAULT; - goto err_free; - } - - for (n = 0; n < count; n++) { - struct i915_engine_class_instance ci = { - .engine_class = I915_ENGINE_CLASS_INVALID, - .engine_instance = I915_ENGINE_CLASS_INVALID_NONE, - }; - - if (e->engines[n]) { - ci.engine_class = e->engines[n]->engine->uabi_class; - ci.engine_instance = e->engines[n]->engine->instance; - } - - if (copy_to_user(&user->engines[n], &ci, sizeof(ci))) { - err = -EFAULT; - goto err_free; - } - } - - args->size = size; - -err_free: - INIT_RCU_WORK(&e->rcu, free_engines_rcu); - queue_rcu_work(system_wq, &e->rcu); - return err; -} - -static int ctx_setparam(struct drm_i915_file_private *fpriv, - struct i915_gem_context *ctx, - struct drm_i915_gem_context_param *args) -{ - int ret = 0; - - switch (args->param) { - case I915_CONTEXT_PARAM_NO_ZEROMAP: - if (args->size) - ret = -EINVAL; - else if (args->value) - set_bit(UCONTEXT_NO_ZEROMAP, &ctx->user_flags); - else - clear_bit(UCONTEXT_NO_ZEROMAP, &ctx->user_flags); - break; - - case I915_CONTEXT_PARAM_NO_ERROR_CAPTURE: - if (args->size) - ret = -EINVAL; - else if (args->value) - i915_gem_context_set_no_error_capture(ctx); - else - i915_gem_context_clear_no_error_capture(ctx); - break; - - case I915_CONTEXT_PARAM_BANNABLE: - if (args->size) - ret = -EINVAL; - else if (!capable(CAP_SYS_ADMIN) && !args->value) - ret = -EPERM; - else if (args->value) - i915_gem_context_set_bannable(ctx); - else - i915_gem_context_clear_bannable(ctx); - break; - - case I915_CONTEXT_PARAM_RECOVERABLE: - if (args->size) - ret = -EINVAL; - else if (args->value) - i915_gem_context_set_recoverable(ctx); - else - i915_gem_context_clear_recoverable(ctx); - break; - - case I915_CONTEXT_PARAM_PRIORITY: - { - s64 priority = args->value; - - if (args->size) - ret = -EINVAL; - else if (!(ctx->i915->caps.scheduler & I915_SCHEDULER_CAP_PRIORITY)) - ret = -ENODEV; - else if (priority > I915_CONTEXT_MAX_USER_PRIORITY || - priority < I915_CONTEXT_MIN_USER_PRIORITY) - ret = -EINVAL; - else if (priority > I915_CONTEXT_DEFAULT_PRIORITY && - !capable(CAP_SYS_NICE)) - ret = -EPERM; - else - ctx->sched.priority = - I915_USER_PRIORITY(priority); - } - break; - - case I915_CONTEXT_PARAM_SSEU: - ret = set_sseu(ctx, args); - break; - - case I915_CONTEXT_PARAM_VM: - ret = set_ppgtt(fpriv, ctx, args); - break; - - case I915_CONTEXT_PARAM_ENGINES: - ret = set_engines(ctx, args); - break; - - case I915_CONTEXT_PARAM_BAN_PERIOD: - default: - ret = -EINVAL; - break; - } - - return ret; -} - -struct create_ext { - struct i915_gem_context *ctx; - struct drm_i915_file_private *fpriv; -}; - -static int create_setparam(struct i915_user_extension __user *ext, void *data) -{ - struct drm_i915_gem_context_create_ext_setparam local; - const struct create_ext *arg = data; - - if (copy_from_user(&local, ext, sizeof(local))) - return -EFAULT; - - if (local.param.ctx_id) - return -EINVAL; - - return ctx_setparam(arg->fpriv, arg->ctx, &local.param); -} - -static int clone_engines(struct i915_gem_context *dst, - struct i915_gem_context *src) -{ - struct i915_gem_engines *e = i915_gem_context_lock_engines(src); - struct i915_gem_engines *clone; - bool user_engines; - unsigned long n; - - clone = kmalloc(struct_size(e, engines, e->num_engines), GFP_KERNEL); - if (!clone) - goto err_unlock; - - clone->i915 = dst->i915; - for (n = 0; n < e->num_engines; n++) { - struct intel_engine_cs *engine; - - if (!e->engines[n]) { - clone->engines[n] = NULL; - continue; - } - engine = e->engines[n]->engine; - - /* - * Virtual engines are singletons; they can only exist - * inside a single context, because they embed their - * HW context... As each virtual context implies a single - * timeline (each engine can only dequeue a single request - * at any time), it would be surprising for two contexts - * to use the same engine. So let's create a copy of - * the virtual engine instead. - */ - if (intel_engine_is_virtual(engine)) - clone->engines[n] = - intel_execlists_clone_virtual(dst, engine); - else - clone->engines[n] = intel_context_create(dst, engine); - if (IS_ERR_OR_NULL(clone->engines[n])) { - __free_engines(clone, n); - goto err_unlock; - } - } - clone->num_engines = n; - - user_engines = i915_gem_context_user_engines(src); - i915_gem_context_unlock_engines(src); - - free_engines(dst->engines); - RCU_INIT_POINTER(dst->engines, clone); - if (user_engines) - i915_gem_context_set_user_engines(dst); - else - i915_gem_context_clear_user_engines(dst); - return 0; - -err_unlock: - i915_gem_context_unlock_engines(src); - return -ENOMEM; -} - -static int clone_flags(struct i915_gem_context *dst, - struct i915_gem_context *src) -{ - dst->user_flags = src->user_flags; - return 0; -} - -static int clone_schedattr(struct i915_gem_context *dst, - struct i915_gem_context *src) -{ - dst->sched = src->sched; - return 0; -} - -static int clone_sseu(struct i915_gem_context *dst, - struct i915_gem_context *src) -{ - struct i915_gem_engines *e = i915_gem_context_lock_engines(src); - struct i915_gem_engines *clone; - unsigned long n; - int err; - - clone = dst->engines; /* no locking required; sole access */ - if (e->num_engines != clone->num_engines) { - err = -EINVAL; - goto unlock; - } - - for (n = 0; n < e->num_engines; n++) { - struct intel_context *ce = e->engines[n]; - - if (clone->engines[n]->engine->class != ce->engine->class) { - /* Must have compatible engine maps! */ - err = -EINVAL; - goto unlock; - } - - /* serialises with set_sseu */ - err = intel_context_lock_pinned(ce); - if (err) - goto unlock; - - clone->engines[n]->sseu = ce->sseu; - intel_context_unlock_pinned(ce); - } - - err = 0; -unlock: - i915_gem_context_unlock_engines(src); - return err; -} - -static int clone_timeline(struct i915_gem_context *dst, - struct i915_gem_context *src) -{ - if (src->timeline) { - GEM_BUG_ON(src->timeline == dst->timeline); - - if (dst->timeline) - i915_timeline_put(dst->timeline); - dst->timeline = i915_timeline_get(src->timeline); - } - - return 0; -} - -static int clone_vm(struct i915_gem_context *dst, - struct i915_gem_context *src) -{ - struct i915_hw_ppgtt *ppgtt; - - rcu_read_lock(); - do { - ppgtt = READ_ONCE(src->ppgtt); - if (!ppgtt) - break; - - if (!kref_get_unless_zero(&ppgtt->ref)) - continue; - - /* - * This ppgtt may have be reallocated between - * the read and the kref, and reassigned to a third - * context. In order to avoid inadvertent sharing - * of this ppgtt with that third context (and not - * src), we have to confirm that we have the same - * ppgtt after passing through the strong memory - * barrier implied by a successful - * kref_get_unless_zero(). - * - * Once we have acquired the current ppgtt of src, - * we no longer care if it is released from src, as - * it cannot be reallocated elsewhere. - */ - - if (ppgtt == READ_ONCE(src->ppgtt)) - break; - - i915_ppgtt_put(ppgtt); - } while (1); - rcu_read_unlock(); - - if (ppgtt) { - __assign_ppgtt(dst, ppgtt); - i915_ppgtt_put(ppgtt); - } - - return 0; -} - -static int create_clone(struct i915_user_extension __user *ext, void *data) -{ - static int (* const fn[])(struct i915_gem_context *dst, - struct i915_gem_context *src) = { -#define MAP(x, y) [ilog2(I915_CONTEXT_CLONE_##x)] = y - MAP(ENGINES, clone_engines), - MAP(FLAGS, clone_flags), - MAP(SCHEDATTR, clone_schedattr), - MAP(SSEU, clone_sseu), - MAP(TIMELINE, clone_timeline), - MAP(VM, clone_vm), -#undef MAP - }; - struct drm_i915_gem_context_create_ext_clone local; - const struct create_ext *arg = data; - struct i915_gem_context *dst = arg->ctx; - struct i915_gem_context *src; - int err, bit; - - if (copy_from_user(&local, ext, sizeof(local))) - return -EFAULT; - - BUILD_BUG_ON(GENMASK(BITS_PER_TYPE(local.flags) - 1, ARRAY_SIZE(fn)) != - I915_CONTEXT_CLONE_UNKNOWN); - - if (local.flags & I915_CONTEXT_CLONE_UNKNOWN) - return -EINVAL; - - if (local.rsvd) - return -EINVAL; - - rcu_read_lock(); - src = __i915_gem_context_lookup_rcu(arg->fpriv, local.clone_id); - rcu_read_unlock(); - if (!src) - return -ENOENT; - - GEM_BUG_ON(src == dst); - - for (bit = 0; bit < ARRAY_SIZE(fn); bit++) { - if (!(local.flags & BIT(bit))) - continue; - - err = fn[bit](dst, src); - if (err) - return err; - } - - return 0; -} - -static const i915_user_extension_fn create_extensions[] = { - [I915_CONTEXT_CREATE_EXT_SETPARAM] = create_setparam, - [I915_CONTEXT_CREATE_EXT_CLONE] = create_clone, -}; - -static bool client_is_banned(struct drm_i915_file_private *file_priv) -{ - return atomic_read(&file_priv->ban_score) >= I915_CLIENT_SCORE_BANNED; -} - -int i915_gem_context_create_ioctl(struct drm_device *dev, void *data, - struct drm_file *file) -{ - struct drm_i915_private *i915 = to_i915(dev); - struct drm_i915_gem_context_create_ext *args = data; - struct create_ext ext_data; - int ret; - - if (!DRIVER_CAPS(i915)->has_logical_contexts) - return -ENODEV; - - if (args->flags & I915_CONTEXT_CREATE_FLAGS_UNKNOWN) - return -EINVAL; - - ret = i915_terminally_wedged(i915); - if (ret) - return ret; - - ext_data.fpriv = file->driver_priv; - if (client_is_banned(ext_data.fpriv)) { - DRM_DEBUG("client %s[%d] banned from creating ctx\n", - current->comm, - pid_nr(get_task_pid(current, PIDTYPE_PID))); - return -EIO; - } - - ret = i915_mutex_lock_interruptible(dev); - if (ret) - return ret; - - ext_data.ctx = i915_gem_create_context(i915, args->flags); - mutex_unlock(&dev->struct_mutex); - if (IS_ERR(ext_data.ctx)) - return PTR_ERR(ext_data.ctx); - - if (args->flags & I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS) { - ret = i915_user_extensions(u64_to_user_ptr(args->extensions), - create_extensions, - ARRAY_SIZE(create_extensions), - &ext_data); - if (ret) - goto err_ctx; - } - - ret = gem_context_register(ext_data.ctx, ext_data.fpriv); - if (ret < 0) - goto err_ctx; - - args->ctx_id = ret; - DRM_DEBUG("HW context %d created\n", args->ctx_id); - - return 0; - -err_ctx: - mutex_lock(&dev->struct_mutex); - context_close(ext_data.ctx); - mutex_unlock(&dev->struct_mutex); - return ret; -} - -int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data, - struct drm_file *file) -{ - struct drm_i915_gem_context_destroy *args = data; - struct drm_i915_file_private *file_priv = file->driver_priv; - struct i915_gem_context *ctx; - - if (args->pad != 0) - return -EINVAL; - - if (!args->ctx_id) - return -ENOENT; - - if (mutex_lock_interruptible(&file_priv->context_idr_lock)) - return -EINTR; - - ctx = idr_remove(&file_priv->context_idr, args->ctx_id); - mutex_unlock(&file_priv->context_idr_lock); - if (!ctx) - return -ENOENT; - - mutex_lock(&dev->struct_mutex); - context_close(ctx); - mutex_unlock(&dev->struct_mutex); - - return 0; -} - -static int get_sseu(struct i915_gem_context *ctx, - struct drm_i915_gem_context_param *args) -{ - struct drm_i915_gem_context_param_sseu user_sseu; - struct intel_context *ce; - unsigned long lookup; - int err; - - if (args->size == 0) - goto out; - else if (args->size < sizeof(user_sseu)) - return -EINVAL; - - if (copy_from_user(&user_sseu, u64_to_user_ptr(args->value), - sizeof(user_sseu))) - return -EFAULT; - - if (user_sseu.rsvd) - return -EINVAL; - - if (user_sseu.flags & ~(I915_CONTEXT_SSEU_FLAG_ENGINE_INDEX)) - return -EINVAL; - - lookup = 0; - if (user_sseu.flags & I915_CONTEXT_SSEU_FLAG_ENGINE_INDEX) - lookup |= LOOKUP_USER_INDEX; - - ce = lookup_user_engine(ctx, lookup, &user_sseu.engine); - if (IS_ERR(ce)) - return PTR_ERR(ce); - - err = intel_context_lock_pinned(ce); /* serialises with set_sseu */ - if (err) { - intel_context_put(ce); - return err; - } - - user_sseu.slice_mask = ce->sseu.slice_mask; - user_sseu.subslice_mask = ce->sseu.subslice_mask; - user_sseu.min_eus_per_subslice = ce->sseu.min_eus_per_subslice; - user_sseu.max_eus_per_subslice = ce->sseu.max_eus_per_subslice; - - intel_context_unlock_pinned(ce); - intel_context_put(ce); - - if (copy_to_user(u64_to_user_ptr(args->value), &user_sseu, - sizeof(user_sseu))) - return -EFAULT; - -out: - args->size = sizeof(user_sseu); - - return 0; -} - -int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data, - struct drm_file *file) -{ - struct drm_i915_file_private *file_priv = file->driver_priv; - struct drm_i915_gem_context_param *args = data; - struct i915_gem_context *ctx; - int ret = 0; - - ctx = i915_gem_context_lookup(file_priv, args->ctx_id); - if (!ctx) - return -ENOENT; - - switch (args->param) { - case I915_CONTEXT_PARAM_NO_ZEROMAP: - args->size = 0; - args->value = test_bit(UCONTEXT_NO_ZEROMAP, &ctx->user_flags); - break; - - case I915_CONTEXT_PARAM_GTT_SIZE: - args->size = 0; - if (ctx->ppgtt) - args->value = ctx->ppgtt->vm.total; - else if (to_i915(dev)->mm.aliasing_ppgtt) - args->value = to_i915(dev)->mm.aliasing_ppgtt->vm.total; - else - args->value = to_i915(dev)->ggtt.vm.total; - break; - - case I915_CONTEXT_PARAM_NO_ERROR_CAPTURE: - args->size = 0; - args->value = i915_gem_context_no_error_capture(ctx); - break; - - case I915_CONTEXT_PARAM_BANNABLE: - args->size = 0; - args->value = i915_gem_context_is_bannable(ctx); - break; - - case I915_CONTEXT_PARAM_RECOVERABLE: - args->size = 0; - args->value = i915_gem_context_is_recoverable(ctx); - break; - - case I915_CONTEXT_PARAM_PRIORITY: - args->size = 0; - args->value = ctx->sched.priority >> I915_USER_PRIORITY_SHIFT; - break; - - case I915_CONTEXT_PARAM_SSEU: - ret = get_sseu(ctx, args); - break; - - case I915_CONTEXT_PARAM_VM: - ret = get_ppgtt(file_priv, ctx, args); - break; - - case I915_CONTEXT_PARAM_ENGINES: - ret = get_engines(ctx, args); - break; - - case I915_CONTEXT_PARAM_BAN_PERIOD: - default: - ret = -EINVAL; - break; - } - - i915_gem_context_put(ctx); - return ret; -} - -int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data, - struct drm_file *file) -{ - struct drm_i915_file_private *file_priv = file->driver_priv; - struct drm_i915_gem_context_param *args = data; - struct i915_gem_context *ctx; - int ret; - - ctx = i915_gem_context_lookup(file_priv, args->ctx_id); - if (!ctx) - return -ENOENT; - - ret = ctx_setparam(file_priv, ctx, args); - - i915_gem_context_put(ctx); - return ret; -} - -int i915_gem_context_reset_stats_ioctl(struct drm_device *dev, - void *data, struct drm_file *file) -{ - struct drm_i915_private *dev_priv = to_i915(dev); - struct drm_i915_reset_stats *args = data; - struct i915_gem_context *ctx; - int ret; - - if (args->flags || args->pad) - return -EINVAL; - - ret = -ENOENT; - rcu_read_lock(); - ctx = __i915_gem_context_lookup_rcu(file->driver_priv, args->ctx_id); - if (!ctx) - goto out; - - /* - * We opt for unserialised reads here. This may result in tearing - * in the extremely unlikely event of a GPU hang on this context - * as we are querying them. If we need that extra layer of protection, - * we should wrap the hangstats with a seqlock. - */ - - if (capable(CAP_SYS_ADMIN)) - args->reset_count = i915_reset_count(&dev_priv->gpu_error); - else - args->reset_count = 0; - - args->batch_active = atomic_read(&ctx->guilty_count); - args->batch_pending = atomic_read(&ctx->active_count); - - ret = 0; -out: - rcu_read_unlock(); - return ret; -} - -int __i915_gem_context_pin_hw_id(struct i915_gem_context *ctx) -{ - struct drm_i915_private *i915 = ctx->i915; - int err = 0; - - mutex_lock(&i915->contexts.mutex); - - GEM_BUG_ON(i915_gem_context_is_closed(ctx)); - - if (list_empty(&ctx->hw_id_link)) { - GEM_BUG_ON(atomic_read(&ctx->hw_id_pin_count)); - - err = assign_hw_id(i915, &ctx->hw_id); - if (err) - goto out_unlock; - - list_add_tail(&ctx->hw_id_link, &i915->contexts.hw_id_list); - } - - GEM_BUG_ON(atomic_read(&ctx->hw_id_pin_count) == ~0u); - atomic_inc(&ctx->hw_id_pin_count); - -out_unlock: - mutex_unlock(&i915->contexts.mutex); - return err; -} - -/* GEM context-engines iterator: for_each_gem_engine() */ -struct intel_context * -i915_gem_engines_iter_next(struct i915_gem_engines_iter *it) -{ - const struct i915_gem_engines *e = it->engines; - struct intel_context *ctx; - - do { - if (it->idx >= e->num_engines) - return NULL; - - ctx = e->engines[it->idx++]; - } while (!ctx); - - return ctx; -} - -#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) -#include "selftests/mock_context.c" -#include "selftests/i915_gem_context.c" -#endif - -static void i915_global_gem_context_shrink(void) -{ - kmem_cache_shrink(global.slab_luts); -} - -static void i915_global_gem_context_exit(void) -{ - kmem_cache_destroy(global.slab_luts); -} - -static struct i915_global_gem_context global = { { - .shrink = i915_global_gem_context_shrink, - .exit = i915_global_gem_context_exit, -} }; - -int __init i915_global_gem_context_init(void) -{ - global.slab_luts = KMEM_CACHE(i915_lut_handle, 0); - if (!global.slab_luts) - return -ENOMEM; - - i915_global_register(&global.base); - return 0; -} diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h deleted file mode 100644 index 9ad4a6362438..000000000000 --- a/drivers/gpu/drm/i915/i915_gem_context.h +++ /dev/null @@ -1,258 +0,0 @@ -/* - * Copyright © 2016 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - */ - -#ifndef __I915_GEM_CONTEXT_H__ -#define __I915_GEM_CONTEXT_H__ - -#include "i915_gem_context_types.h" - -#include "gt/intel_context.h" - -#include "i915_gem.h" -#include "i915_scheduler.h" -#include "intel_device_info.h" - -struct drm_device; -struct drm_file; - -static inline bool i915_gem_context_is_closed(const struct i915_gem_context *ctx) -{ - return test_bit(CONTEXT_CLOSED, &ctx->flags); -} - -static inline void i915_gem_context_set_closed(struct i915_gem_context *ctx) -{ - GEM_BUG_ON(i915_gem_context_is_closed(ctx)); - set_bit(CONTEXT_CLOSED, &ctx->flags); -} - -static inline bool i915_gem_context_no_error_capture(const struct i915_gem_context *ctx) -{ - return test_bit(UCONTEXT_NO_ERROR_CAPTURE, &ctx->user_flags); -} - -static inline void i915_gem_context_set_no_error_capture(struct i915_gem_context *ctx) -{ - set_bit(UCONTEXT_NO_ERROR_CAPTURE, &ctx->user_flags); -} - -static inline void i915_gem_context_clear_no_error_capture(struct i915_gem_context *ctx) -{ - clear_bit(UCONTEXT_NO_ERROR_CAPTURE, &ctx->user_flags); -} - -static inline bool i915_gem_context_is_bannable(const struct i915_gem_context *ctx) -{ - return test_bit(UCONTEXT_BANNABLE, &ctx->user_flags); -} - -static inline void i915_gem_context_set_bannable(struct i915_gem_context *ctx) -{ - set_bit(UCONTEXT_BANNABLE, &ctx->user_flags); -} - -static inline void i915_gem_context_clear_bannable(struct i915_gem_context *ctx) -{ - clear_bit(UCONTEXT_BANNABLE, &ctx->user_flags); -} - -static inline bool i915_gem_context_is_recoverable(const struct i915_gem_context *ctx) -{ - return test_bit(UCONTEXT_RECOVERABLE, &ctx->user_flags); -} - -static inline void i915_gem_context_set_recoverable(struct i915_gem_context *ctx) -{ - set_bit(UCONTEXT_RECOVERABLE, &ctx->user_flags); -} - -static inline void i915_gem_context_clear_recoverable(struct i915_gem_context *ctx) -{ - clear_bit(UCONTEXT_RECOVERABLE, &ctx->user_flags); -} - -static inline bool i915_gem_context_is_banned(const struct i915_gem_context *ctx) -{ - return test_bit(CONTEXT_BANNED, &ctx->flags); -} - -static inline void i915_gem_context_set_banned(struct i915_gem_context *ctx) -{ - set_bit(CONTEXT_BANNED, &ctx->flags); -} - -static inline bool i915_gem_context_force_single_submission(const struct i915_gem_context *ctx) -{ - return test_bit(CONTEXT_FORCE_SINGLE_SUBMISSION, &ctx->flags); -} - -static inline void i915_gem_context_set_force_single_submission(struct i915_gem_context *ctx) -{ - __set_bit(CONTEXT_FORCE_SINGLE_SUBMISSION, &ctx->flags); -} - -static inline bool -i915_gem_context_user_engines(const struct i915_gem_context *ctx) -{ - return test_bit(CONTEXT_USER_ENGINES, &ctx->flags); -} - -static inline void -i915_gem_context_set_user_engines(struct i915_gem_context *ctx) -{ - set_bit(CONTEXT_USER_ENGINES, &ctx->flags); -} - -static inline void -i915_gem_context_clear_user_engines(struct i915_gem_context *ctx) -{ - clear_bit(CONTEXT_USER_ENGINES, &ctx->flags); -} - -int __i915_gem_context_pin_hw_id(struct i915_gem_context *ctx); -static inline int i915_gem_context_pin_hw_id(struct i915_gem_context *ctx) -{ - if (atomic_inc_not_zero(&ctx->hw_id_pin_count)) - return 0; - - return __i915_gem_context_pin_hw_id(ctx); -} - -static inline void i915_gem_context_unpin_hw_id(struct i915_gem_context *ctx) -{ - GEM_BUG_ON(atomic_read(&ctx->hw_id_pin_count) == 0u); - atomic_dec(&ctx->hw_id_pin_count); -} - -static inline bool i915_gem_context_is_kernel(struct i915_gem_context *ctx) -{ - return !ctx->file_priv; -} - -/* i915_gem_context.c */ -int __must_check i915_gem_contexts_init(struct drm_i915_private *dev_priv); -void i915_gem_contexts_lost(struct drm_i915_private *dev_priv); -void i915_gem_contexts_fini(struct drm_i915_private *dev_priv); - -int i915_gem_context_open(struct drm_i915_private *i915, - struct drm_file *file); -void i915_gem_context_close(struct drm_file *file); - -void i915_gem_context_release(struct kref *ctx_ref); -struct i915_gem_context * -i915_gem_context_create_gvt(struct drm_device *dev); - -int i915_gem_vm_create_ioctl(struct drm_device *dev, void *data, - struct drm_file *file); -int i915_gem_vm_destroy_ioctl(struct drm_device *dev, void *data, - struct drm_file *file); - -int i915_gem_context_create_ioctl(struct drm_device *dev, void *data, - struct drm_file *file); -int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data, - struct drm_file *file); -int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data, - struct drm_file *file_priv); -int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data, - struct drm_file *file_priv); -int i915_gem_context_reset_stats_ioctl(struct drm_device *dev, void *data, - struct drm_file *file); - -struct i915_gem_context * -i915_gem_context_create_kernel(struct drm_i915_private *i915, int prio); - -static inline struct i915_gem_context * -i915_gem_context_get(struct i915_gem_context *ctx) -{ - kref_get(&ctx->ref); - return ctx; -} - -static inline void i915_gem_context_put(struct i915_gem_context *ctx) -{ - kref_put(&ctx->ref, i915_gem_context_release); -} - -static inline struct i915_gem_engines * -i915_gem_context_engines(struct i915_gem_context *ctx) -{ - return rcu_dereference_protected(ctx->engines, - lockdep_is_held(&ctx->engines_mutex)); -} - -static inline struct i915_gem_engines * -i915_gem_context_lock_engines(struct i915_gem_context *ctx) - __acquires(&ctx->engines_mutex) -{ - mutex_lock(&ctx->engines_mutex); - return i915_gem_context_engines(ctx); -} - -static inline void -i915_gem_context_unlock_engines(struct i915_gem_context *ctx) - __releases(&ctx->engines_mutex) -{ - mutex_unlock(&ctx->engines_mutex); -} - -static inline struct intel_context * -i915_gem_context_lookup_engine(struct i915_gem_context *ctx, unsigned int idx) -{ - return i915_gem_context_engines(ctx)->engines[idx]; -} - -static inline struct intel_context * -i915_gem_context_get_engine(struct i915_gem_context *ctx, unsigned int idx) -{ - struct intel_context *ce = ERR_PTR(-EINVAL); - - rcu_read_lock(); { - struct i915_gem_engines *e = rcu_dereference(ctx->engines); - if (likely(idx < e->num_engines && e->engines[idx])) - ce = intel_context_get(e->engines[idx]); - } rcu_read_unlock(); - - return ce; -} - -static inline void -i915_gem_engines_iter_init(struct i915_gem_engines_iter *it, - struct i915_gem_engines *engines) -{ - GEM_BUG_ON(!engines); - it->engines = engines; - it->idx = 0; -} - -struct intel_context * -i915_gem_engines_iter_next(struct i915_gem_engines_iter *it); - -#define for_each_gem_engine(ce, engines, it) \ - for (i915_gem_engines_iter_init(&(it), (engines)); \ - ((ce) = i915_gem_engines_iter_next(&(it)));) - -struct i915_lut_handle *i915_lut_handle_alloc(void); -void i915_lut_handle_free(struct i915_lut_handle *lut); - -#endif /* !__I915_GEM_CONTEXT_H__ */ diff --git a/drivers/gpu/drm/i915/i915_gem_context_types.h b/drivers/gpu/drm/i915/i915_gem_context_types.h deleted file mode 100644 index fb965ded2508..000000000000 --- a/drivers/gpu/drm/i915/i915_gem_context_types.h +++ /dev/null @@ -1,208 +0,0 @@ -/* - * SPDX-License-Identifier: MIT - * - * Copyright © 2019 Intel Corporation - */ - -#ifndef __I915_GEM_CONTEXT_TYPES_H__ -#define __I915_GEM_CONTEXT_TYPES_H__ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "gt/intel_context_types.h" - -#include "i915_scheduler.h" - -struct pid; - -struct drm_i915_private; -struct drm_i915_file_private; -struct i915_hw_ppgtt; -struct i915_timeline; -struct intel_ring; - -struct i915_gem_engines { - struct rcu_work rcu; - struct drm_i915_private *i915; - unsigned int num_engines; - struct intel_context *engines[]; -}; - -struct i915_gem_engines_iter { - unsigned int idx; - const struct i915_gem_engines *engines; -}; - -/** - * struct i915_gem_context - client state - * - * The struct i915_gem_context represents the combined view of the driver and - * logical hardware state for a particular client. - */ -struct i915_gem_context { - /** i915: i915 device backpointer */ - struct drm_i915_private *i915; - - /** file_priv: owning file descriptor */ - struct drm_i915_file_private *file_priv; - - /** - * @engines: User defined engines for this context - * - * Various uAPI offer the ability to lookup up an - * index from this array to select an engine operate on. - * - * Multiple logically distinct instances of the same engine - * may be defined in the array, as well as composite virtual - * engines. - * - * Execbuf uses the I915_EXEC_RING_MASK as an index into this - * array to select which HW context + engine to execute on. For - * the default array, the user_ring_map[] is used to translate - * the legacy uABI onto the approprate index (e.g. both - * I915_EXEC_DEFAULT and I915_EXEC_RENDER select the same - * context, and I915_EXEC_BSD is weird). For a use defined - * array, execbuf uses I915_EXEC_RING_MASK as a plain index. - * - * User defined by I915_CONTEXT_PARAM_ENGINE (when the - * CONTEXT_USER_ENGINES flag is set). - */ - struct i915_gem_engines __rcu *engines; - struct mutex engines_mutex; /* guards writes to engines */ - - struct i915_timeline *timeline; - - /** - * @ppgtt: unique address space (GTT) - * - * In full-ppgtt mode, each context has its own address space ensuring - * complete seperation of one client from all others. - * - * In other modes, this is a NULL pointer with the expectation that - * the caller uses the shared global GTT. - */ - struct i915_hw_ppgtt *ppgtt; - - /** - * @pid: process id of creator - * - * Note that who created the context may not be the principle user, - * as the context may be shared across a local socket. However, - * that should only affect the default context, all contexts created - * explicitly by the client are expected to be isolated. - */ - struct pid *pid; - - /** - * @name: arbitrary name - * - * A name is constructed for the context from the creator's process - * name, pid and user handle in order to uniquely identify the - * context in messages. - */ - const char *name; - - /** link: place with &drm_i915_private.context_list */ - struct list_head link; - struct llist_node free_link; - - /** - * @ref: reference count - * - * A reference to a context is held by both the client who created it - * and on each request submitted to the hardware using the request - * (to ensure the hardware has access to the state until it has - * finished all pending writes). See i915_gem_context_get() and - * i915_gem_context_put() for access. - */ - struct kref ref; - - /** - * @rcu: rcu_head for deferred freeing. - */ - struct rcu_head rcu; - - /** - * @user_flags: small set of booleans controlled by the user - */ - unsigned long user_flags; -#define UCONTEXT_NO_ZEROMAP 0 -#define UCONTEXT_NO_ERROR_CAPTURE 1 -#define UCONTEXT_BANNABLE 2 -#define UCONTEXT_RECOVERABLE 3 - - /** - * @flags: small set of booleans - */ - unsigned long flags; -#define CONTEXT_BANNED 0 -#define CONTEXT_CLOSED 1 -#define CONTEXT_FORCE_SINGLE_SUBMISSION 2 -#define CONTEXT_USER_ENGINES 3 - - /** - * @hw_id: - unique identifier for the context - * - * The hardware needs to uniquely identify the context for a few - * functions like fault reporting, PASID, scheduling. The - * &drm_i915_private.context_hw_ida is used to assign a unqiue - * id for the lifetime of the context. - * - * @hw_id_pin_count: - number of times this context had been pinned - * for use (should be, at most, once per engine). - * - * @hw_id_link: - all contexts with an assigned id are tracked - * for possible repossession. - */ - unsigned int hw_id; - atomic_t hw_id_pin_count; - struct list_head hw_id_link; - - struct mutex mutex; - - struct i915_sched_attr sched; - - /** ring_size: size for allocating the per-engine ring buffer */ - u32 ring_size; - /** desc_template: invariant fields for the HW context descriptor */ - u32 desc_template; - - /** guilty_count: How many times this context has caused a GPU hang. */ - atomic_t guilty_count; - /** - * @active_count: How many times this context was active during a GPU - * hang, but did not cause it. - */ - atomic_t active_count; - - /** - * @hang_timestamp: The last time(s) this context caused a GPU hang - */ - unsigned long hang_timestamp[2]; -#define CONTEXT_FAST_HANG_JIFFIES (120 * HZ) /* 3 hangs within 120s? Banned! */ - - /** remap_slice: Bitmask of cache lines that need remapping */ - u8 remap_slice; - - /** handles_vma: rbtree to look up our context specific obj/vma for - * the user handle. (user handles are per fd, but the binding is - * per vm, which may be one per context or shared with the global GTT) - */ - struct radix_tree_root handles_vma; - - /** handles_list: reverse list of all the rbtree entries in use for - * this context, which allows us to free all the allocations on - * context close. - */ - struct list_head handles_list; -}; - -#endif /* __I915_GEM_CONTEXT_TYPES_H__ */ diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/i915_gem_dmabuf.c deleted file mode 100644 index 5a101a9462d8..000000000000 --- a/drivers/gpu/drm/i915/i915_gem_dmabuf.c +++ /dev/null @@ -1,337 +0,0 @@ -/* - * Copyright 2012 Red Hat Inc - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - * - * Authors: - * Dave Airlie - */ - -#include -#include - - -#include "i915_drv.h" - -static struct drm_i915_gem_object *dma_buf_to_obj(struct dma_buf *buf) -{ - return to_intel_bo(buf->priv); -} - -static struct sg_table *i915_gem_map_dma_buf(struct dma_buf_attachment *attachment, - enum dma_data_direction dir) -{ - struct drm_i915_gem_object *obj = dma_buf_to_obj(attachment->dmabuf); - struct sg_table *st; - struct scatterlist *src, *dst; - int ret, i; - - ret = i915_gem_object_pin_pages(obj); - if (ret) - goto err; - - /* Copy sg so that we make an independent mapping */ - st = kmalloc(sizeof(struct sg_table), GFP_KERNEL); - if (st == NULL) { - ret = -ENOMEM; - goto err_unpin_pages; - } - - ret = sg_alloc_table(st, obj->mm.pages->nents, GFP_KERNEL); - if (ret) - goto err_free; - - src = obj->mm.pages->sgl; - dst = st->sgl; - for (i = 0; i < obj->mm.pages->nents; i++) { - sg_set_page(dst, sg_page(src), src->length, 0); - dst = sg_next(dst); - src = sg_next(src); - } - - if (!dma_map_sg(attachment->dev, st->sgl, st->nents, dir)) { - ret = -ENOMEM; - goto err_free_sg; - } - - return st; - -err_free_sg: - sg_free_table(st); -err_free: - kfree(st); -err_unpin_pages: - i915_gem_object_unpin_pages(obj); -err: - return ERR_PTR(ret); -} - -static void i915_gem_unmap_dma_buf(struct dma_buf_attachment *attachment, - struct sg_table *sg, - enum dma_data_direction dir) -{ - struct drm_i915_gem_object *obj = dma_buf_to_obj(attachment->dmabuf); - - dma_unmap_sg(attachment->dev, sg->sgl, sg->nents, dir); - sg_free_table(sg); - kfree(sg); - - i915_gem_object_unpin_pages(obj); -} - -static void *i915_gem_dmabuf_vmap(struct dma_buf *dma_buf) -{ - struct drm_i915_gem_object *obj = dma_buf_to_obj(dma_buf); - - return i915_gem_object_pin_map(obj, I915_MAP_WB); -} - -static void i915_gem_dmabuf_vunmap(struct dma_buf *dma_buf, void *vaddr) -{ - struct drm_i915_gem_object *obj = dma_buf_to_obj(dma_buf); - - i915_gem_object_flush_map(obj); - i915_gem_object_unpin_map(obj); -} - -static void *i915_gem_dmabuf_kmap(struct dma_buf *dma_buf, unsigned long page_num) -{ - struct drm_i915_gem_object *obj = dma_buf_to_obj(dma_buf); - struct page *page; - - if (page_num >= obj->base.size >> PAGE_SHIFT) - return NULL; - - if (!i915_gem_object_has_struct_page(obj)) - return NULL; - - if (i915_gem_object_pin_pages(obj)) - return NULL; - - /* Synchronisation is left to the caller (via .begin_cpu_access()) */ - page = i915_gem_object_get_page(obj, page_num); - if (IS_ERR(page)) - goto err_unpin; - - return kmap(page); - -err_unpin: - i915_gem_object_unpin_pages(obj); - return NULL; -} - -static void i915_gem_dmabuf_kunmap(struct dma_buf *dma_buf, unsigned long page_num, void *addr) -{ - struct drm_i915_gem_object *obj = dma_buf_to_obj(dma_buf); - - kunmap(virt_to_page(addr)); - i915_gem_object_unpin_pages(obj); -} - -static int i915_gem_dmabuf_mmap(struct dma_buf *dma_buf, struct vm_area_struct *vma) -{ - struct drm_i915_gem_object *obj = dma_buf_to_obj(dma_buf); - int ret; - - if (obj->base.size < vma->vm_end - vma->vm_start) - return -EINVAL; - - if (!obj->base.filp) - return -ENODEV; - - ret = call_mmap(obj->base.filp, vma); - if (ret) - return ret; - - fput(vma->vm_file); - vma->vm_file = get_file(obj->base.filp); - - return 0; -} - -static int i915_gem_begin_cpu_access(struct dma_buf *dma_buf, enum dma_data_direction direction) -{ - struct drm_i915_gem_object *obj = dma_buf_to_obj(dma_buf); - struct drm_device *dev = obj->base.dev; - bool write = (direction == DMA_BIDIRECTIONAL || direction == DMA_TO_DEVICE); - int err; - - err = i915_gem_object_pin_pages(obj); - if (err) - return err; - - err = i915_mutex_lock_interruptible(dev); - if (err) - goto out; - - err = i915_gem_object_set_to_cpu_domain(obj, write); - mutex_unlock(&dev->struct_mutex); - -out: - i915_gem_object_unpin_pages(obj); - return err; -} - -static int i915_gem_end_cpu_access(struct dma_buf *dma_buf, enum dma_data_direction direction) -{ - struct drm_i915_gem_object *obj = dma_buf_to_obj(dma_buf); - struct drm_device *dev = obj->base.dev; - int err; - - err = i915_gem_object_pin_pages(obj); - if (err) - return err; - - err = i915_mutex_lock_interruptible(dev); - if (err) - goto out; - - err = i915_gem_object_set_to_gtt_domain(obj, false); - mutex_unlock(&dev->struct_mutex); - -out: - i915_gem_object_unpin_pages(obj); - return err; -} - -static const struct dma_buf_ops i915_dmabuf_ops = { - .map_dma_buf = i915_gem_map_dma_buf, - .unmap_dma_buf = i915_gem_unmap_dma_buf, - .release = drm_gem_dmabuf_release, - .map = i915_gem_dmabuf_kmap, - .unmap = i915_gem_dmabuf_kunmap, - .mmap = i915_gem_dmabuf_mmap, - .vmap = i915_gem_dmabuf_vmap, - .vunmap = i915_gem_dmabuf_vunmap, - .begin_cpu_access = i915_gem_begin_cpu_access, - .end_cpu_access = i915_gem_end_cpu_access, -}; - -struct dma_buf *i915_gem_prime_export(struct drm_device *dev, - struct drm_gem_object *gem_obj, int flags) -{ - struct drm_i915_gem_object *obj = to_intel_bo(gem_obj); - DEFINE_DMA_BUF_EXPORT_INFO(exp_info); - - exp_info.ops = &i915_dmabuf_ops; - exp_info.size = gem_obj->size; - exp_info.flags = flags; - exp_info.priv = gem_obj; - exp_info.resv = obj->resv; - - if (obj->ops->dmabuf_export) { - int ret = obj->ops->dmabuf_export(obj); - if (ret) - return ERR_PTR(ret); - } - - return drm_gem_dmabuf_export(dev, &exp_info); -} - -static int i915_gem_object_get_pages_dmabuf(struct drm_i915_gem_object *obj) -{ - struct sg_table *pages; - unsigned int sg_page_sizes; - - pages = dma_buf_map_attachment(obj->base.import_attach, - DMA_BIDIRECTIONAL); - if (IS_ERR(pages)) - return PTR_ERR(pages); - - sg_page_sizes = i915_sg_page_sizes(pages->sgl); - - __i915_gem_object_set_pages(obj, pages, sg_page_sizes); - - return 0; -} - -static void i915_gem_object_put_pages_dmabuf(struct drm_i915_gem_object *obj, - struct sg_table *pages) -{ - dma_buf_unmap_attachment(obj->base.import_attach, pages, - DMA_BIDIRECTIONAL); -} - -static const struct drm_i915_gem_object_ops i915_gem_object_dmabuf_ops = { - .get_pages = i915_gem_object_get_pages_dmabuf, - .put_pages = i915_gem_object_put_pages_dmabuf, -}; - -struct drm_gem_object *i915_gem_prime_import(struct drm_device *dev, - struct dma_buf *dma_buf) -{ - struct dma_buf_attachment *attach; - struct drm_i915_gem_object *obj; - int ret; - - /* is this one of own objects? */ - if (dma_buf->ops == &i915_dmabuf_ops) { - obj = dma_buf_to_obj(dma_buf); - /* is it from our device? */ - if (obj->base.dev == dev) { - /* - * Importing dmabuf exported from out own gem increases - * refcount on gem itself instead of f_count of dmabuf. - */ - return &i915_gem_object_get(obj)->base; - } - } - - /* need to attach */ - attach = dma_buf_attach(dma_buf, dev->dev); - if (IS_ERR(attach)) - return ERR_CAST(attach); - - get_dma_buf(dma_buf); - - obj = i915_gem_object_alloc(); - if (obj == NULL) { - ret = -ENOMEM; - goto fail_detach; - } - - drm_gem_private_object_init(dev, &obj->base, dma_buf->size); - i915_gem_object_init(obj, &i915_gem_object_dmabuf_ops); - obj->base.import_attach = attach; - obj->resv = dma_buf->resv; - - /* We use GTT as shorthand for a coherent domain, one that is - * neither in the GPU cache nor in the CPU cache, where all - * writes are immediately visible in memory. (That's not strictly - * true, but it's close! There are internal buffers such as the - * write-combined buffer or a delay through the chipset for GTT - * writes that do require us to treat GTT as a separate cache domain.) - */ - obj->read_domains = I915_GEM_DOMAIN_GTT; - obj->write_domain = 0; - - return &obj->base; - -fail_detach: - dma_buf_detach(dma_buf, attach); - dma_buf_put(dma_buf); - - return ERR_PTR(ret); -} - -#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) -#include "selftests/mock_dmabuf.c" -#include "selftests/i915_gem_dmabuf.c" -#endif diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c index 0bdb3e072ba5..a5783c4cb98b 100644 --- a/drivers/gpu/drm/i915/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/i915_gem_evict.c @@ -28,6 +28,8 @@ #include +#include "gem/i915_gem_context.h" + #include "i915_drv.h" #include "intel_drv.h" #include "i915_trace.h" diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c deleted file mode 100644 index 699f3f180d8a..000000000000 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ /dev/null @@ -1,2788 +0,0 @@ -/* - * Copyright © 2008,2010 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - * Authors: - * Eric Anholt - * Chris Wilson - * - */ - -#include -#include -#include -#include - -#include -#include - -#include "gem/i915_gem_ioctls.h" -#include "gt/intel_gt_pm.h" - -#include "i915_drv.h" -#include "i915_gem_clflush.h" -#include "i915_trace.h" -#include "intel_drv.h" -#include "intel_frontbuffer.h" - -enum { - FORCE_CPU_RELOC = 1, - FORCE_GTT_RELOC, - FORCE_GPU_RELOC, -#define DBG_FORCE_RELOC 0 /* choose one of the above! */ -}; - -#define __EXEC_OBJECT_HAS_REF BIT(31) -#define __EXEC_OBJECT_HAS_PIN BIT(30) -#define __EXEC_OBJECT_HAS_FENCE BIT(29) -#define __EXEC_OBJECT_NEEDS_MAP BIT(28) -#define __EXEC_OBJECT_NEEDS_BIAS BIT(27) -#define __EXEC_OBJECT_INTERNAL_FLAGS (~0u << 27) /* all of the above */ -#define __EXEC_OBJECT_RESERVED (__EXEC_OBJECT_HAS_PIN | __EXEC_OBJECT_HAS_FENCE) - -#define __EXEC_HAS_RELOC BIT(31) -#define __EXEC_VALIDATED BIT(30) -#define __EXEC_INTERNAL_FLAGS (~0u << 30) -#define UPDATE PIN_OFFSET_FIXED - -#define BATCH_OFFSET_BIAS (256*1024) - -#define __I915_EXEC_ILLEGAL_FLAGS \ - (__I915_EXEC_UNKNOWN_FLAGS | \ - I915_EXEC_CONSTANTS_MASK | \ - I915_EXEC_RESOURCE_STREAMER) - -/* Catch emission of unexpected errors for CI! */ -#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) -#undef EINVAL -#define EINVAL ({ \ - DRM_DEBUG_DRIVER("EINVAL at %s:%d\n", __func__, __LINE__); \ - 22; \ -}) -#endif - -/** - * DOC: User command execution - * - * Userspace submits commands to be executed on the GPU as an instruction - * stream within a GEM object we call a batchbuffer. This instructions may - * refer to other GEM objects containing auxiliary state such as kernels, - * samplers, render targets and even secondary batchbuffers. Userspace does - * not know where in the GPU memory these objects reside and so before the - * batchbuffer is passed to the GPU for execution, those addresses in the - * batchbuffer and auxiliary objects are updated. This is known as relocation, - * or patching. To try and avoid having to relocate each object on the next - * execution, userspace is told the location of those objects in this pass, - * but this remains just a hint as the kernel may choose a new location for - * any object in the future. - * - * At the level of talking to the hardware, submitting a batchbuffer for the - * GPU to execute is to add content to a buffer from which the HW - * command streamer is reading. - * - * 1. Add a command to load the HW context. For Logical Ring Contexts, i.e. - * Execlists, this command is not placed on the same buffer as the - * remaining items. - * - * 2. Add a command to invalidate caches to the buffer. - * - * 3. Add a batchbuffer start command to the buffer; the start command is - * essentially a token together with the GPU address of the batchbuffer - * to be executed. - * - * 4. Add a pipeline flush to the buffer. - * - * 5. Add a memory write command to the buffer to record when the GPU - * is done executing the batchbuffer. The memory write writes the - * global sequence number of the request, ``i915_request::global_seqno``; - * the i915 driver uses the current value in the register to determine - * if the GPU has completed the batchbuffer. - * - * 6. Add a user interrupt command to the buffer. This command instructs - * the GPU to issue an interrupt when the command, pipeline flush and - * memory write are completed. - * - * 7. Inform the hardware of the additional commands added to the buffer - * (by updating the tail pointer). - * - * Processing an execbuf ioctl is conceptually split up into a few phases. - * - * 1. Validation - Ensure all the pointers, handles and flags are valid. - * 2. Reservation - Assign GPU address space for every object - * 3. Relocation - Update any addresses to point to the final locations - * 4. Serialisation - Order the request with respect to its dependencies - * 5. Construction - Construct a request to execute the batchbuffer - * 6. Submission (at some point in the future execution) - * - * Reserving resources for the execbuf is the most complicated phase. We - * neither want to have to migrate the object in the address space, nor do - * we want to have to update any relocations pointing to this object. Ideally, - * we want to leave the object where it is and for all the existing relocations - * to match. If the object is given a new address, or if userspace thinks the - * object is elsewhere, we have to parse all the relocation entries and update - * the addresses. Userspace can set the I915_EXEC_NORELOC flag to hint that - * all the target addresses in all of its objects match the value in the - * relocation entries and that they all match the presumed offsets given by the - * list of execbuffer objects. Using this knowledge, we know that if we haven't - * moved any buffers, all the relocation entries are valid and we can skip - * the update. (If userspace is wrong, the likely outcome is an impromptu GPU - * hang.) The requirement for using I915_EXEC_NO_RELOC are: - * - * The addresses written in the objects must match the corresponding - * reloc.presumed_offset which in turn must match the corresponding - * execobject.offset. - * - * Any render targets written to in the batch must be flagged with - * EXEC_OBJECT_WRITE. - * - * To avoid stalling, execobject.offset should match the current - * address of that object within the active context. - * - * The reservation is done is multiple phases. First we try and keep any - * object already bound in its current location - so as long as meets the - * constraints imposed by the new execbuffer. Any object left unbound after the - * first pass is then fitted into any available idle space. If an object does - * not fit, all objects are removed from the reservation and the process rerun - * after sorting the objects into a priority order (more difficult to fit - * objects are tried first). Failing that, the entire VM is cleared and we try - * to fit the execbuf once last time before concluding that it simply will not - * fit. - * - * A small complication to all of this is that we allow userspace not only to - * specify an alignment and a size for the object in the address space, but - * we also allow userspace to specify the exact offset. This objects are - * simpler to place (the location is known a priori) all we have to do is make - * sure the space is available. - * - * Once all the objects are in place, patching up the buried pointers to point - * to the final locations is a fairly simple job of walking over the relocation - * entry arrays, looking up the right address and rewriting the value into - * the object. Simple! ... The relocation entries are stored in user memory - * and so to access them we have to copy them into a local buffer. That copy - * has to avoid taking any pagefaults as they may lead back to a GEM object - * requiring the struct_mutex (i.e. recursive deadlock). So once again we split - * the relocation into multiple passes. First we try to do everything within an - * atomic context (avoid the pagefaults) which requires that we never wait. If - * we detect that we may wait, or if we need to fault, then we have to fallback - * to a slower path. The slowpath has to drop the mutex. (Can you hear alarm - * bells yet?) Dropping the mutex means that we lose all the state we have - * built up so far for the execbuf and we must reset any global data. However, - * we do leave the objects pinned in their final locations - which is a - * potential issue for concurrent execbufs. Once we have left the mutex, we can - * allocate and copy all the relocation entries into a large array at our - * leisure, reacquire the mutex, reclaim all the objects and other state and - * then proceed to update any incorrect addresses with the objects. - * - * As we process the relocation entries, we maintain a record of whether the - * object is being written to. Using NORELOC, we expect userspace to provide - * this information instead. We also check whether we can skip the relocation - * by comparing the expected value inside the relocation entry with the target's - * final address. If they differ, we have to map the current object and rewrite - * the 4 or 8 byte pointer within. - * - * Serialising an execbuf is quite simple according to the rules of the GEM - * ABI. Execution within each context is ordered by the order of submission. - * Writes to any GEM object are in order of submission and are exclusive. Reads - * from a GEM object are unordered with respect to other reads, but ordered by - * writes. A write submitted after a read cannot occur before the read, and - * similarly any read submitted after a write cannot occur before the write. - * Writes are ordered between engines such that only one write occurs at any - * time (completing any reads beforehand) - using semaphores where available - * and CPU serialisation otherwise. Other GEM access obey the same rules, any - * write (either via mmaps using set-domain, or via pwrite) must flush all GPU - * reads before starting, and any read (either using set-domain or pread) must - * flush all GPU writes before starting. (Note we only employ a barrier before, - * we currently rely on userspace not concurrently starting a new execution - * whilst reading or writing to an object. This may be an advantage or not - * depending on how much you trust userspace not to shoot themselves in the - * foot.) Serialisation may just result in the request being inserted into - * a DAG awaiting its turn, but most simple is to wait on the CPU until - * all dependencies are resolved. - * - * After all of that, is just a matter of closing the request and handing it to - * the hardware (well, leaving it in a queue to be executed). However, we also - * offer the ability for batchbuffers to be run with elevated privileges so - * that they access otherwise hidden registers. (Used to adjust L3 cache etc.) - * Before any batch is given extra privileges we first must check that it - * contains no nefarious instructions, we check that each instruction is from - * our whitelist and all registers are also from an allowed list. We first - * copy the user's batchbuffer to a shadow (so that the user doesn't have - * access to it, either by the CPU or GPU as we scan it) and then parse each - * instruction. If everything is ok, we set a flag telling the hardware to run - * the batchbuffer in trusted mode, otherwise the ioctl is rejected. - */ - -struct i915_execbuffer { - struct drm_i915_private *i915; /** i915 backpointer */ - struct drm_file *file; /** per-file lookup tables and limits */ - struct drm_i915_gem_execbuffer2 *args; /** ioctl parameters */ - struct drm_i915_gem_exec_object2 *exec; /** ioctl execobj[] */ - struct i915_vma **vma; - unsigned int *flags; - - struct intel_engine_cs *engine; /** engine to queue the request to */ - struct intel_context *context; /* logical state for the request */ - struct i915_gem_context *gem_context; /** caller's context */ - struct i915_address_space *vm; /** GTT and vma for the request */ - - struct i915_request *request; /** our request to build */ - struct i915_vma *batch; /** identity of the batch obj/vma */ - - /** actual size of execobj[] as we may extend it for the cmdparser */ - unsigned int buffer_count; - - /** list of vma not yet bound during reservation phase */ - struct list_head unbound; - - /** list of vma that have execobj.relocation_count */ - struct list_head relocs; - - /** - * Track the most recently used object for relocations, as we - * frequently have to perform multiple relocations within the same - * obj/page - */ - struct reloc_cache { - struct drm_mm_node node; /** temporary GTT binding */ - unsigned long vaddr; /** Current kmap address */ - unsigned long page; /** Currently mapped page index */ - unsigned int gen; /** Cached value of INTEL_GEN */ - bool use_64bit_reloc : 1; - bool has_llc : 1; - bool has_fence : 1; - bool needs_unfenced : 1; - - struct i915_request *rq; - u32 *rq_cmd; - unsigned int rq_size; - } reloc_cache; - - u64 invalid_flags; /** Set of execobj.flags that are invalid */ - u32 context_flags; /** Set of execobj.flags to insert from the ctx */ - - u32 batch_start_offset; /** Location within object of batch */ - u32 batch_len; /** Length of batch within object */ - u32 batch_flags; /** Flags composed for emit_bb_start() */ - - /** - * Indicate either the size of the hastable used to resolve - * relocation handles, or if negative that we are using a direct - * index into the execobj[]. - */ - int lut_size; - struct hlist_head *buckets; /** ht for relocation handles */ -}; - -#define exec_entry(EB, VMA) (&(EB)->exec[(VMA)->exec_flags - (EB)->flags]) - -/* - * Used to convert any address to canonical form. - * Starting from gen8, some commands (e.g. STATE_BASE_ADDRESS, - * MI_LOAD_REGISTER_MEM and others, see Broadwell PRM Vol2a) require the - * addresses to be in a canonical form: - * "GraphicsAddress[63:48] are ignored by the HW and assumed to be in correct - * canonical form [63:48] == [47]." - */ -#define GEN8_HIGH_ADDRESS_BIT 47 -static inline u64 gen8_canonical_addr(u64 address) -{ - return sign_extend64(address, GEN8_HIGH_ADDRESS_BIT); -} - -static inline u64 gen8_noncanonical_addr(u64 address) -{ - return address & GENMASK_ULL(GEN8_HIGH_ADDRESS_BIT, 0); -} - -static inline bool eb_use_cmdparser(const struct i915_execbuffer *eb) -{ - return intel_engine_needs_cmd_parser(eb->engine) && eb->batch_len; -} - -static int eb_create(struct i915_execbuffer *eb) -{ - if (!(eb->args->flags & I915_EXEC_HANDLE_LUT)) { - unsigned int size = 1 + ilog2(eb->buffer_count); - - /* - * Without a 1:1 association between relocation handles and - * the execobject[] index, we instead create a hashtable. - * We size it dynamically based on available memory, starting - * first with 1:1 assocative hash and scaling back until - * the allocation succeeds. - * - * Later on we use a positive lut_size to indicate we are - * using this hashtable, and a negative value to indicate a - * direct lookup. - */ - do { - gfp_t flags; - - /* While we can still reduce the allocation size, don't - * raise a warning and allow the allocation to fail. - * On the last pass though, we want to try as hard - * as possible to perform the allocation and warn - * if it fails. - */ - flags = GFP_KERNEL; - if (size > 1) - flags |= __GFP_NORETRY | __GFP_NOWARN; - - eb->buckets = kzalloc(sizeof(struct hlist_head) << size, - flags); - if (eb->buckets) - break; - } while (--size); - - if (unlikely(!size)) - return -ENOMEM; - - eb->lut_size = size; - } else { - eb->lut_size = -eb->buffer_count; - } - - return 0; -} - -static bool -eb_vma_misplaced(const struct drm_i915_gem_exec_object2 *entry, - const struct i915_vma *vma, - unsigned int flags) -{ - if (vma->node.size < entry->pad_to_size) - return true; - - if (entry->alignment && !IS_ALIGNED(vma->node.start, entry->alignment)) - return true; - - if (flags & EXEC_OBJECT_PINNED && - vma->node.start != entry->offset) - return true; - - if (flags & __EXEC_OBJECT_NEEDS_BIAS && - vma->node.start < BATCH_OFFSET_BIAS) - return true; - - if (!(flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS) && - (vma->node.start + vma->node.size - 1) >> 32) - return true; - - if (flags & __EXEC_OBJECT_NEEDS_MAP && - !i915_vma_is_map_and_fenceable(vma)) - return true; - - return false; -} - -static inline bool -eb_pin_vma(struct i915_execbuffer *eb, - const struct drm_i915_gem_exec_object2 *entry, - struct i915_vma *vma) -{ - unsigned int exec_flags = *vma->exec_flags; - u64 pin_flags; - - if (vma->node.size) - pin_flags = vma->node.start; - else - pin_flags = entry->offset & PIN_OFFSET_MASK; - - pin_flags |= PIN_USER | PIN_NOEVICT | PIN_OFFSET_FIXED; - if (unlikely(exec_flags & EXEC_OBJECT_NEEDS_GTT)) - pin_flags |= PIN_GLOBAL; - - if (unlikely(i915_vma_pin(vma, 0, 0, pin_flags))) - return false; - - if (unlikely(exec_flags & EXEC_OBJECT_NEEDS_FENCE)) { - if (unlikely(i915_vma_pin_fence(vma))) { - i915_vma_unpin(vma); - return false; - } - - if (vma->fence) - exec_flags |= __EXEC_OBJECT_HAS_FENCE; - } - - *vma->exec_flags = exec_flags | __EXEC_OBJECT_HAS_PIN; - return !eb_vma_misplaced(entry, vma, exec_flags); -} - -static inline void __eb_unreserve_vma(struct i915_vma *vma, unsigned int flags) -{ - GEM_BUG_ON(!(flags & __EXEC_OBJECT_HAS_PIN)); - - if (unlikely(flags & __EXEC_OBJECT_HAS_FENCE)) - __i915_vma_unpin_fence(vma); - - __i915_vma_unpin(vma); -} - -static inline void -eb_unreserve_vma(struct i915_vma *vma, unsigned int *flags) -{ - if (!(*flags & __EXEC_OBJECT_HAS_PIN)) - return; - - __eb_unreserve_vma(vma, *flags); - *flags &= ~__EXEC_OBJECT_RESERVED; -} - -static int -eb_validate_vma(struct i915_execbuffer *eb, - struct drm_i915_gem_exec_object2 *entry, - struct i915_vma *vma) -{ - if (unlikely(entry->flags & eb->invalid_flags)) - return -EINVAL; - - if (unlikely(entry->alignment && !is_power_of_2(entry->alignment))) - return -EINVAL; - - /* - * Offset can be used as input (EXEC_OBJECT_PINNED), reject - * any non-page-aligned or non-canonical addresses. - */ - if (unlikely(entry->flags & EXEC_OBJECT_PINNED && - entry->offset != gen8_canonical_addr(entry->offset & I915_GTT_PAGE_MASK))) - return -EINVAL; - - /* pad_to_size was once a reserved field, so sanitize it */ - if (entry->flags & EXEC_OBJECT_PAD_TO_SIZE) { - if (unlikely(offset_in_page(entry->pad_to_size))) - return -EINVAL; - } else { - entry->pad_to_size = 0; - } - - if (unlikely(vma->exec_flags)) { - DRM_DEBUG("Object [handle %d, index %d] appears more than once in object list\n", - entry->handle, (int)(entry - eb->exec)); - return -EINVAL; - } - - /* - * From drm_mm perspective address space is continuous, - * so from this point we're always using non-canonical - * form internally. - */ - entry->offset = gen8_noncanonical_addr(entry->offset); - - if (!eb->reloc_cache.has_fence) { - entry->flags &= ~EXEC_OBJECT_NEEDS_FENCE; - } else { - if ((entry->flags & EXEC_OBJECT_NEEDS_FENCE || - eb->reloc_cache.needs_unfenced) && - i915_gem_object_is_tiled(vma->obj)) - entry->flags |= EXEC_OBJECT_NEEDS_GTT | __EXEC_OBJECT_NEEDS_MAP; - } - - if (!(entry->flags & EXEC_OBJECT_PINNED)) - entry->flags |= eb->context_flags; - - return 0; -} - -static int -eb_add_vma(struct i915_execbuffer *eb, - unsigned int i, unsigned batch_idx, - struct i915_vma *vma) -{ - struct drm_i915_gem_exec_object2 *entry = &eb->exec[i]; - int err; - - GEM_BUG_ON(i915_vma_is_closed(vma)); - - if (!(eb->args->flags & __EXEC_VALIDATED)) { - err = eb_validate_vma(eb, entry, vma); - if (unlikely(err)) - return err; - } - - if (eb->lut_size > 0) { - vma->exec_handle = entry->handle; - hlist_add_head(&vma->exec_node, - &eb->buckets[hash_32(entry->handle, - eb->lut_size)]); - } - - if (entry->relocation_count) - list_add_tail(&vma->reloc_link, &eb->relocs); - - /* - * Stash a pointer from the vma to execobj, so we can query its flags, - * size, alignment etc as provided by the user. Also we stash a pointer - * to the vma inside the execobj so that we can use a direct lookup - * to find the right target VMA when doing relocations. - */ - eb->vma[i] = vma; - eb->flags[i] = entry->flags; - vma->exec_flags = &eb->flags[i]; - - /* - * SNA is doing fancy tricks with compressing batch buffers, which leads - * to negative relocation deltas. Usually that works out ok since the - * relocate address is still positive, except when the batch is placed - * very low in the GTT. Ensure this doesn't happen. - * - * Note that actual hangs have only been observed on gen7, but for - * paranoia do it everywhere. - */ - if (i == batch_idx) { - if (entry->relocation_count && - !(eb->flags[i] & EXEC_OBJECT_PINNED)) - eb->flags[i] |= __EXEC_OBJECT_NEEDS_BIAS; - if (eb->reloc_cache.has_fence) - eb->flags[i] |= EXEC_OBJECT_NEEDS_FENCE; - - eb->batch = vma; - } - - err = 0; - if (eb_pin_vma(eb, entry, vma)) { - if (entry->offset != vma->node.start) { - entry->offset = vma->node.start | UPDATE; - eb->args->flags |= __EXEC_HAS_RELOC; - } - } else { - eb_unreserve_vma(vma, vma->exec_flags); - - list_add_tail(&vma->exec_link, &eb->unbound); - if (drm_mm_node_allocated(&vma->node)) - err = i915_vma_unbind(vma); - if (unlikely(err)) - vma->exec_flags = NULL; - } - return err; -} - -static inline int use_cpu_reloc(const struct reloc_cache *cache, - const struct drm_i915_gem_object *obj) -{ - if (!i915_gem_object_has_struct_page(obj)) - return false; - - if (DBG_FORCE_RELOC == FORCE_CPU_RELOC) - return true; - - if (DBG_FORCE_RELOC == FORCE_GTT_RELOC) - return false; - - return (cache->has_llc || - obj->cache_dirty || - obj->cache_level != I915_CACHE_NONE); -} - -static int eb_reserve_vma(const struct i915_execbuffer *eb, - struct i915_vma *vma) -{ - struct drm_i915_gem_exec_object2 *entry = exec_entry(eb, vma); - unsigned int exec_flags = *vma->exec_flags; - u64 pin_flags; - int err; - - pin_flags = PIN_USER | PIN_NONBLOCK; - if (exec_flags & EXEC_OBJECT_NEEDS_GTT) - pin_flags |= PIN_GLOBAL; - - /* - * Wa32bitGeneralStateOffset & Wa32bitInstructionBaseOffset, - * limit address to the first 4GBs for unflagged objects. - */ - if (!(exec_flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS)) - pin_flags |= PIN_ZONE_4G; - - if (exec_flags & __EXEC_OBJECT_NEEDS_MAP) - pin_flags |= PIN_MAPPABLE; - - if (exec_flags & EXEC_OBJECT_PINNED) { - pin_flags |= entry->offset | PIN_OFFSET_FIXED; - pin_flags &= ~PIN_NONBLOCK; /* force overlapping checks */ - } else if (exec_flags & __EXEC_OBJECT_NEEDS_BIAS) { - pin_flags |= BATCH_OFFSET_BIAS | PIN_OFFSET_BIAS; - } - - err = i915_vma_pin(vma, - entry->pad_to_size, entry->alignment, - pin_flags); - if (err) - return err; - - if (entry->offset != vma->node.start) { - entry->offset = vma->node.start | UPDATE; - eb->args->flags |= __EXEC_HAS_RELOC; - } - - if (unlikely(exec_flags & EXEC_OBJECT_NEEDS_FENCE)) { - err = i915_vma_pin_fence(vma); - if (unlikely(err)) { - i915_vma_unpin(vma); - return err; - } - - if (vma->fence) - exec_flags |= __EXEC_OBJECT_HAS_FENCE; - } - - *vma->exec_flags = exec_flags | __EXEC_OBJECT_HAS_PIN; - GEM_BUG_ON(eb_vma_misplaced(entry, vma, exec_flags)); - - return 0; -} - -static int eb_reserve(struct i915_execbuffer *eb) -{ - const unsigned int count = eb->buffer_count; - struct list_head last; - struct i915_vma *vma; - unsigned int i, pass; - int err; - - /* - * Attempt to pin all of the buffers into the GTT. - * This is done in 3 phases: - * - * 1a. Unbind all objects that do not match the GTT constraints for - * the execbuffer (fenceable, mappable, alignment etc). - * 1b. Increment pin count for already bound objects. - * 2. Bind new objects. - * 3. Decrement pin count. - * - * This avoid unnecessary unbinding of later objects in order to make - * room for the earlier objects *unless* we need to defragment. - */ - - pass = 0; - err = 0; - do { - list_for_each_entry(vma, &eb->unbound, exec_link) { - err = eb_reserve_vma(eb, vma); - if (err) - break; - } - if (err != -ENOSPC) - return err; - - /* Resort *all* the objects into priority order */ - INIT_LIST_HEAD(&eb->unbound); - INIT_LIST_HEAD(&last); - for (i = 0; i < count; i++) { - unsigned int flags = eb->flags[i]; - struct i915_vma *vma = eb->vma[i]; - - if (flags & EXEC_OBJECT_PINNED && - flags & __EXEC_OBJECT_HAS_PIN) - continue; - - eb_unreserve_vma(vma, &eb->flags[i]); - - if (flags & EXEC_OBJECT_PINNED) - /* Pinned must have their slot */ - list_add(&vma->exec_link, &eb->unbound); - else if (flags & __EXEC_OBJECT_NEEDS_MAP) - /* Map require the lowest 256MiB (aperture) */ - list_add_tail(&vma->exec_link, &eb->unbound); - else if (!(flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS)) - /* Prioritise 4GiB region for restricted bo */ - list_add(&vma->exec_link, &last); - else - list_add_tail(&vma->exec_link, &last); - } - list_splice_tail(&last, &eb->unbound); - - switch (pass++) { - case 0: - break; - - case 1: - /* Too fragmented, unbind everything and retry */ - err = i915_gem_evict_vm(eb->vm); - if (err) - return err; - break; - - default: - return -ENOSPC; - } - } while (1); -} - -static unsigned int eb_batch_index(const struct i915_execbuffer *eb) -{ - if (eb->args->flags & I915_EXEC_BATCH_FIRST) - return 0; - else - return eb->buffer_count - 1; -} - -static int eb_select_context(struct i915_execbuffer *eb) -{ - struct i915_gem_context *ctx; - - ctx = i915_gem_context_lookup(eb->file->driver_priv, eb->args->rsvd1); - if (unlikely(!ctx)) - return -ENOENT; - - eb->gem_context = ctx; - if (ctx->ppgtt) { - eb->vm = &ctx->ppgtt->vm; - eb->invalid_flags |= EXEC_OBJECT_NEEDS_GTT; - } else { - eb->vm = &eb->i915->ggtt.vm; - } - - eb->context_flags = 0; - if (test_bit(UCONTEXT_NO_ZEROMAP, &ctx->user_flags)) - eb->context_flags |= __EXEC_OBJECT_NEEDS_BIAS; - - return 0; -} - -static struct i915_request *__eb_wait_for_ring(struct intel_ring *ring) -{ - struct i915_request *rq; - - /* - * Completely unscientific finger-in-the-air estimates for suitable - * maximum user request size (to avoid blocking) and then backoff. - */ - if (intel_ring_update_space(ring) >= PAGE_SIZE) - return NULL; - - /* - * Find a request that after waiting upon, there will be at least half - * the ring available. The hysteresis allows us to compete for the - * shared ring and should mean that we sleep less often prior to - * claiming our resources, but not so long that the ring completely - * drains before we can submit our next request. - */ - list_for_each_entry(rq, &ring->request_list, ring_link) { - if (__intel_ring_space(rq->postfix, - ring->emit, ring->size) > ring->size / 2) - break; - } - if (&rq->ring_link == &ring->request_list) - return NULL; /* weird, we will check again later for real */ - - return i915_request_get(rq); -} - -static int eb_wait_for_ring(const struct i915_execbuffer *eb) -{ - struct i915_request *rq; - int ret = 0; - - /* - * Apply a light amount of backpressure to prevent excessive hogs - * from blocking waiting for space whilst holding struct_mutex and - * keeping all of their resources pinned. - */ - - rq = __eb_wait_for_ring(eb->context->ring); - if (rq) { - mutex_unlock(&eb->i915->drm.struct_mutex); - - if (i915_request_wait(rq, - I915_WAIT_INTERRUPTIBLE, - MAX_SCHEDULE_TIMEOUT) < 0) - ret = -EINTR; - - i915_request_put(rq); - - mutex_lock(&eb->i915->drm.struct_mutex); - } - - return ret; -} - -static int eb_lookup_vmas(struct i915_execbuffer *eb) -{ - struct radix_tree_root *handles_vma = &eb->gem_context->handles_vma; - struct drm_i915_gem_object *obj; - unsigned int i, batch; - int err; - - if (unlikely(i915_gem_context_is_closed(eb->gem_context))) - return -ENOENT; - - if (unlikely(i915_gem_context_is_banned(eb->gem_context))) - return -EIO; - - INIT_LIST_HEAD(&eb->relocs); - INIT_LIST_HEAD(&eb->unbound); - - batch = eb_batch_index(eb); - - for (i = 0; i < eb->buffer_count; i++) { - u32 handle = eb->exec[i].handle; - struct i915_lut_handle *lut; - struct i915_vma *vma; - - vma = radix_tree_lookup(handles_vma, handle); - if (likely(vma)) - goto add_vma; - - obj = i915_gem_object_lookup(eb->file, handle); - if (unlikely(!obj)) { - err = -ENOENT; - goto err_vma; - } - - vma = i915_vma_instance(obj, eb->vm, NULL); - if (IS_ERR(vma)) { - err = PTR_ERR(vma); - goto err_obj; - } - - lut = i915_lut_handle_alloc(); - if (unlikely(!lut)) { - err = -ENOMEM; - goto err_obj; - } - - err = radix_tree_insert(handles_vma, handle, vma); - if (unlikely(err)) { - i915_lut_handle_free(lut); - goto err_obj; - } - - /* transfer ref to ctx */ - if (!vma->open_count++) - i915_vma_reopen(vma); - list_add(&lut->obj_link, &obj->lut_list); - list_add(&lut->ctx_link, &eb->gem_context->handles_list); - lut->ctx = eb->gem_context; - lut->handle = handle; - -add_vma: - err = eb_add_vma(eb, i, batch, vma); - if (unlikely(err)) - goto err_vma; - - GEM_BUG_ON(vma != eb->vma[i]); - GEM_BUG_ON(vma->exec_flags != &eb->flags[i]); - GEM_BUG_ON(drm_mm_node_allocated(&vma->node) && - eb_vma_misplaced(&eb->exec[i], vma, eb->flags[i])); - } - - eb->args->flags |= __EXEC_VALIDATED; - return eb_reserve(eb); - -err_obj: - i915_gem_object_put(obj); -err_vma: - eb->vma[i] = NULL; - return err; -} - -static struct i915_vma * -eb_get_vma(const struct i915_execbuffer *eb, unsigned long handle) -{ - if (eb->lut_size < 0) { - if (handle >= -eb->lut_size) - return NULL; - return eb->vma[handle]; - } else { - struct hlist_head *head; - struct i915_vma *vma; - - head = &eb->buckets[hash_32(handle, eb->lut_size)]; - hlist_for_each_entry(vma, head, exec_node) { - if (vma->exec_handle == handle) - return vma; - } - return NULL; - } -} - -static void eb_release_vmas(const struct i915_execbuffer *eb) -{ - const unsigned int count = eb->buffer_count; - unsigned int i; - - for (i = 0; i < count; i++) { - struct i915_vma *vma = eb->vma[i]; - unsigned int flags = eb->flags[i]; - - if (!vma) - break; - - GEM_BUG_ON(vma->exec_flags != &eb->flags[i]); - vma->exec_flags = NULL; - eb->vma[i] = NULL; - - if (flags & __EXEC_OBJECT_HAS_PIN) - __eb_unreserve_vma(vma, flags); - - if (flags & __EXEC_OBJECT_HAS_REF) - i915_vma_put(vma); - } -} - -static void eb_reset_vmas(const struct i915_execbuffer *eb) -{ - eb_release_vmas(eb); - if (eb->lut_size > 0) - memset(eb->buckets, 0, - sizeof(struct hlist_head) << eb->lut_size); -} - -static void eb_destroy(const struct i915_execbuffer *eb) -{ - GEM_BUG_ON(eb->reloc_cache.rq); - - if (eb->lut_size > 0) - kfree(eb->buckets); -} - -static inline u64 -relocation_target(const struct drm_i915_gem_relocation_entry *reloc, - const struct i915_vma *target) -{ - return gen8_canonical_addr((int)reloc->delta + target->node.start); -} - -static void reloc_cache_init(struct reloc_cache *cache, - struct drm_i915_private *i915) -{ - cache->page = -1; - cache->vaddr = 0; - /* Must be a variable in the struct to allow GCC to unroll. */ - cache->gen = INTEL_GEN(i915); - cache->has_llc = HAS_LLC(i915); - cache->use_64bit_reloc = HAS_64BIT_RELOC(i915); - cache->has_fence = cache->gen < 4; - cache->needs_unfenced = INTEL_INFO(i915)->unfenced_needs_alignment; - cache->node.allocated = false; - cache->rq = NULL; - cache->rq_size = 0; -} - -static inline void *unmask_page(unsigned long p) -{ - return (void *)(uintptr_t)(p & PAGE_MASK); -} - -static inline unsigned int unmask_flags(unsigned long p) -{ - return p & ~PAGE_MASK; -} - -#define KMAP 0x4 /* after CLFLUSH_FLAGS */ - -static inline struct i915_ggtt *cache_to_ggtt(struct reloc_cache *cache) -{ - struct drm_i915_private *i915 = - container_of(cache, struct i915_execbuffer, reloc_cache)->i915; - return &i915->ggtt; -} - -static void reloc_gpu_flush(struct reloc_cache *cache) -{ - GEM_BUG_ON(cache->rq_size >= cache->rq->batch->obj->base.size / sizeof(u32)); - cache->rq_cmd[cache->rq_size] = MI_BATCH_BUFFER_END; - - __i915_gem_object_flush_map(cache->rq->batch->obj, 0, cache->rq_size); - i915_gem_object_unpin_map(cache->rq->batch->obj); - - i915_gem_chipset_flush(cache->rq->i915); - - i915_request_add(cache->rq); - cache->rq = NULL; -} - -static void reloc_cache_reset(struct reloc_cache *cache) -{ - void *vaddr; - - if (cache->rq) - reloc_gpu_flush(cache); - - if (!cache->vaddr) - return; - - vaddr = unmask_page(cache->vaddr); - if (cache->vaddr & KMAP) { - if (cache->vaddr & CLFLUSH_AFTER) - mb(); - - kunmap_atomic(vaddr); - i915_gem_object_finish_access((struct drm_i915_gem_object *)cache->node.mm); - } else { - wmb(); - io_mapping_unmap_atomic((void __iomem *)vaddr); - if (cache->node.allocated) { - struct i915_ggtt *ggtt = cache_to_ggtt(cache); - - ggtt->vm.clear_range(&ggtt->vm, - cache->node.start, - cache->node.size); - drm_mm_remove_node(&cache->node); - } else { - i915_vma_unpin((struct i915_vma *)cache->node.mm); - } - } - - cache->vaddr = 0; - cache->page = -1; -} - -static void *reloc_kmap(struct drm_i915_gem_object *obj, - struct reloc_cache *cache, - unsigned long page) -{ - void *vaddr; - - if (cache->vaddr) { - kunmap_atomic(unmask_page(cache->vaddr)); - } else { - unsigned int flushes; - int err; - - err = i915_gem_object_prepare_write(obj, &flushes); - if (err) - return ERR_PTR(err); - - BUILD_BUG_ON(KMAP & CLFLUSH_FLAGS); - BUILD_BUG_ON((KMAP | CLFLUSH_FLAGS) & PAGE_MASK); - - cache->vaddr = flushes | KMAP; - cache->node.mm = (void *)obj; - if (flushes) - mb(); - } - - vaddr = kmap_atomic(i915_gem_object_get_dirty_page(obj, page)); - cache->vaddr = unmask_flags(cache->vaddr) | (unsigned long)vaddr; - cache->page = page; - - return vaddr; -} - -static void *reloc_iomap(struct drm_i915_gem_object *obj, - struct reloc_cache *cache, - unsigned long page) -{ - struct i915_ggtt *ggtt = cache_to_ggtt(cache); - unsigned long offset; - void *vaddr; - - if (cache->vaddr) { - io_mapping_unmap_atomic((void __force __iomem *) unmask_page(cache->vaddr)); - } else { - struct i915_vma *vma; - int err; - - if (use_cpu_reloc(cache, obj)) - return NULL; - - err = i915_gem_object_set_to_gtt_domain(obj, true); - if (err) - return ERR_PTR(err); - - vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, - PIN_MAPPABLE | - PIN_NONBLOCK | - PIN_NONFAULT); - if (IS_ERR(vma)) { - memset(&cache->node, 0, sizeof(cache->node)); - err = drm_mm_insert_node_in_range - (&ggtt->vm.mm, &cache->node, - PAGE_SIZE, 0, I915_COLOR_UNEVICTABLE, - 0, ggtt->mappable_end, - DRM_MM_INSERT_LOW); - if (err) /* no inactive aperture space, use cpu reloc */ - return NULL; - } else { - err = i915_vma_put_fence(vma); - if (err) { - i915_vma_unpin(vma); - return ERR_PTR(err); - } - - cache->node.start = vma->node.start; - cache->node.mm = (void *)vma; - } - } - - offset = cache->node.start; - if (cache->node.allocated) { - wmb(); - ggtt->vm.insert_page(&ggtt->vm, - i915_gem_object_get_dma_address(obj, page), - offset, I915_CACHE_NONE, 0); - } else { - offset += page << PAGE_SHIFT; - } - - vaddr = (void __force *)io_mapping_map_atomic_wc(&ggtt->iomap, - offset); - cache->page = page; - cache->vaddr = (unsigned long)vaddr; - - return vaddr; -} - -static void *reloc_vaddr(struct drm_i915_gem_object *obj, - struct reloc_cache *cache, - unsigned long page) -{ - void *vaddr; - - if (cache->page == page) { - vaddr = unmask_page(cache->vaddr); - } else { - vaddr = NULL; - if ((cache->vaddr & KMAP) == 0) - vaddr = reloc_iomap(obj, cache, page); - if (!vaddr) - vaddr = reloc_kmap(obj, cache, page); - } - - return vaddr; -} - -static void clflush_write32(u32 *addr, u32 value, unsigned int flushes) -{ - if (unlikely(flushes & (CLFLUSH_BEFORE | CLFLUSH_AFTER))) { - if (flushes & CLFLUSH_BEFORE) { - clflushopt(addr); - mb(); - } - - *addr = value; - - /* - * Writes to the same cacheline are serialised by the CPU - * (including clflush). On the write path, we only require - * that it hits memory in an orderly fashion and place - * mb barriers at the start and end of the relocation phase - * to ensure ordering of clflush wrt to the system. - */ - if (flushes & CLFLUSH_AFTER) - clflushopt(addr); - } else - *addr = value; -} - -static int __reloc_gpu_alloc(struct i915_execbuffer *eb, - struct i915_vma *vma, - unsigned int len) -{ - struct reloc_cache *cache = &eb->reloc_cache; - struct drm_i915_gem_object *obj; - struct i915_request *rq; - struct i915_vma *batch; - u32 *cmd; - int err; - - if (DBG_FORCE_RELOC == FORCE_GPU_RELOC) { - obj = vma->obj; - if (obj->cache_dirty & ~obj->cache_coherent) - i915_gem_clflush_object(obj, 0); - obj->write_domain = 0; - } - - GEM_BUG_ON(vma->obj->write_domain & I915_GEM_DOMAIN_CPU); - - obj = i915_gem_batch_pool_get(&eb->engine->batch_pool, PAGE_SIZE); - if (IS_ERR(obj)) - return PTR_ERR(obj); - - cmd = i915_gem_object_pin_map(obj, - cache->has_llc ? - I915_MAP_FORCE_WB : - I915_MAP_FORCE_WC); - i915_gem_object_unpin_pages(obj); - if (IS_ERR(cmd)) - return PTR_ERR(cmd); - - batch = i915_vma_instance(obj, vma->vm, NULL); - if (IS_ERR(batch)) { - err = PTR_ERR(batch); - goto err_unmap; - } - - err = i915_vma_pin(batch, 0, 0, PIN_USER | PIN_NONBLOCK); - if (err) - goto err_unmap; - - rq = i915_request_create(eb->context); - if (IS_ERR(rq)) { - err = PTR_ERR(rq); - goto err_unpin; - } - - err = i915_request_await_object(rq, vma->obj, true); - if (err) - goto err_request; - - err = eb->engine->emit_bb_start(rq, - batch->node.start, PAGE_SIZE, - cache->gen > 5 ? 0 : I915_DISPATCH_SECURE); - if (err) - goto err_request; - - GEM_BUG_ON(!reservation_object_test_signaled_rcu(batch->resv, true)); - err = i915_vma_move_to_active(batch, rq, 0); - if (err) - goto skip_request; - - err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); - if (err) - goto skip_request; - - rq->batch = batch; - i915_vma_unpin(batch); - - cache->rq = rq; - cache->rq_cmd = cmd; - cache->rq_size = 0; - - /* Return with batch mapping (cmd) still pinned */ - return 0; - -skip_request: - i915_request_skip(rq, err); -err_request: - i915_request_add(rq); -err_unpin: - i915_vma_unpin(batch); -err_unmap: - i915_gem_object_unpin_map(obj); - return err; -} - -static u32 *reloc_gpu(struct i915_execbuffer *eb, - struct i915_vma *vma, - unsigned int len) -{ - struct reloc_cache *cache = &eb->reloc_cache; - u32 *cmd; - - if (cache->rq_size > PAGE_SIZE/sizeof(u32) - (len + 1)) - reloc_gpu_flush(cache); - - if (unlikely(!cache->rq)) { - int err; - - /* If we need to copy for the cmdparser, we will stall anyway */ - if (eb_use_cmdparser(eb)) - return ERR_PTR(-EWOULDBLOCK); - - if (!intel_engine_can_store_dword(eb->engine)) - return ERR_PTR(-ENODEV); - - err = __reloc_gpu_alloc(eb, vma, len); - if (unlikely(err)) - return ERR_PTR(err); - } - - cmd = cache->rq_cmd + cache->rq_size; - cache->rq_size += len; - - return cmd; -} - -static u64 -relocate_entry(struct i915_vma *vma, - const struct drm_i915_gem_relocation_entry *reloc, - struct i915_execbuffer *eb, - const struct i915_vma *target) -{ - u64 offset = reloc->offset; - u64 target_offset = relocation_target(reloc, target); - bool wide = eb->reloc_cache.use_64bit_reloc; - void *vaddr; - - if (!eb->reloc_cache.vaddr && - (DBG_FORCE_RELOC == FORCE_GPU_RELOC || - !reservation_object_test_signaled_rcu(vma->resv, true))) { - const unsigned int gen = eb->reloc_cache.gen; - unsigned int len; - u32 *batch; - u64 addr; - - if (wide) - len = offset & 7 ? 8 : 5; - else if (gen >= 4) - len = 4; - else - len = 3; - - batch = reloc_gpu(eb, vma, len); - if (IS_ERR(batch)) - goto repeat; - - addr = gen8_canonical_addr(vma->node.start + offset); - if (wide) { - if (offset & 7) { - *batch++ = MI_STORE_DWORD_IMM_GEN4; - *batch++ = lower_32_bits(addr); - *batch++ = upper_32_bits(addr); - *batch++ = lower_32_bits(target_offset); - - addr = gen8_canonical_addr(addr + 4); - - *batch++ = MI_STORE_DWORD_IMM_GEN4; - *batch++ = lower_32_bits(addr); - *batch++ = upper_32_bits(addr); - *batch++ = upper_32_bits(target_offset); - } else { - *batch++ = (MI_STORE_DWORD_IMM_GEN4 | (1 << 21)) + 1; - *batch++ = lower_32_bits(addr); - *batch++ = upper_32_bits(addr); - *batch++ = lower_32_bits(target_offset); - *batch++ = upper_32_bits(target_offset); - } - } else if (gen >= 6) { - *batch++ = MI_STORE_DWORD_IMM_GEN4; - *batch++ = 0; - *batch++ = addr; - *batch++ = target_offset; - } else if (gen >= 4) { - *batch++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; - *batch++ = 0; - *batch++ = addr; - *batch++ = target_offset; - } else { - *batch++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL; - *batch++ = addr; - *batch++ = target_offset; - } - - goto out; - } - -repeat: - vaddr = reloc_vaddr(vma->obj, &eb->reloc_cache, offset >> PAGE_SHIFT); - if (IS_ERR(vaddr)) - return PTR_ERR(vaddr); - - clflush_write32(vaddr + offset_in_page(offset), - lower_32_bits(target_offset), - eb->reloc_cache.vaddr); - - if (wide) { - offset += sizeof(u32); - target_offset >>= 32; - wide = false; - goto repeat; - } - -out: - return target->node.start | UPDATE; -} - -static u64 -eb_relocate_entry(struct i915_execbuffer *eb, - struct i915_vma *vma, - const struct drm_i915_gem_relocation_entry *reloc) -{ - struct i915_vma *target; - int err; - - /* we've already hold a reference to all valid objects */ - target = eb_get_vma(eb, reloc->target_handle); - if (unlikely(!target)) - return -ENOENT; - - /* Validate that the target is in a valid r/w GPU domain */ - if (unlikely(reloc->write_domain & (reloc->write_domain - 1))) { - DRM_DEBUG("reloc with multiple write domains: " - "target %d offset %d " - "read %08x write %08x", - reloc->target_handle, - (int) reloc->offset, - reloc->read_domains, - reloc->write_domain); - return -EINVAL; - } - if (unlikely((reloc->write_domain | reloc->read_domains) - & ~I915_GEM_GPU_DOMAINS)) { - DRM_DEBUG("reloc with read/write non-GPU domains: " - "target %d offset %d " - "read %08x write %08x", - reloc->target_handle, - (int) reloc->offset, - reloc->read_domains, - reloc->write_domain); - return -EINVAL; - } - - if (reloc->write_domain) { - *target->exec_flags |= EXEC_OBJECT_WRITE; - - /* - * Sandybridge PPGTT errata: We need a global gtt mapping - * for MI and pipe_control writes because the gpu doesn't - * properly redirect them through the ppgtt for non_secure - * batchbuffers. - */ - if (reloc->write_domain == I915_GEM_DOMAIN_INSTRUCTION && - IS_GEN(eb->i915, 6)) { - err = i915_vma_bind(target, target->obj->cache_level, - PIN_GLOBAL); - if (WARN_ONCE(err, - "Unexpected failure to bind target VMA!")) - return err; - } - } - - /* - * If the relocation already has the right value in it, no - * more work needs to be done. - */ - if (!DBG_FORCE_RELOC && - gen8_canonical_addr(target->node.start) == reloc->presumed_offset) - return 0; - - /* Check that the relocation address is valid... */ - if (unlikely(reloc->offset > - vma->size - (eb->reloc_cache.use_64bit_reloc ? 8 : 4))) { - DRM_DEBUG("Relocation beyond object bounds: " - "target %d offset %d size %d.\n", - reloc->target_handle, - (int)reloc->offset, - (int)vma->size); - return -EINVAL; - } - if (unlikely(reloc->offset & 3)) { - DRM_DEBUG("Relocation not 4-byte aligned: " - "target %d offset %d.\n", - reloc->target_handle, - (int)reloc->offset); - return -EINVAL; - } - - /* - * If we write into the object, we need to force the synchronisation - * barrier, either with an asynchronous clflush or if we executed the - * patching using the GPU (though that should be serialised by the - * timeline). To be completely sure, and since we are required to - * do relocations we are already stalling, disable the user's opt - * out of our synchronisation. - */ - *vma->exec_flags &= ~EXEC_OBJECT_ASYNC; - - /* and update the user's relocation entry */ - return relocate_entry(vma, reloc, eb, target); -} - -static int eb_relocate_vma(struct i915_execbuffer *eb, struct i915_vma *vma) -{ -#define N_RELOC(x) ((x) / sizeof(struct drm_i915_gem_relocation_entry)) - struct drm_i915_gem_relocation_entry stack[N_RELOC(512)]; - struct drm_i915_gem_relocation_entry __user *urelocs; - const struct drm_i915_gem_exec_object2 *entry = exec_entry(eb, vma); - unsigned int remain; - - urelocs = u64_to_user_ptr(entry->relocs_ptr); - remain = entry->relocation_count; - if (unlikely(remain > N_RELOC(ULONG_MAX))) - return -EINVAL; - - /* - * We must check that the entire relocation array is safe - * to read. However, if the array is not writable the user loses - * the updated relocation values. - */ - if (unlikely(!access_ok(urelocs, remain*sizeof(*urelocs)))) - return -EFAULT; - - do { - struct drm_i915_gem_relocation_entry *r = stack; - unsigned int count = - min_t(unsigned int, remain, ARRAY_SIZE(stack)); - unsigned int copied; - - /* - * This is the fast path and we cannot handle a pagefault - * whilst holding the struct mutex lest the user pass in the - * relocations contained within a mmaped bo. For in such a case - * we, the page fault handler would call i915_gem_fault() and - * we would try to acquire the struct mutex again. Obviously - * this is bad and so lockdep complains vehemently. - */ - pagefault_disable(); - copied = __copy_from_user_inatomic(r, urelocs, count * sizeof(r[0])); - pagefault_enable(); - if (unlikely(copied)) { - remain = -EFAULT; - goto out; - } - - remain -= count; - do { - u64 offset = eb_relocate_entry(eb, vma, r); - - if (likely(offset == 0)) { - } else if ((s64)offset < 0) { - remain = (int)offset; - goto out; - } else { - /* - * Note that reporting an error now - * leaves everything in an inconsistent - * state as we have *already* changed - * the relocation value inside the - * object. As we have not changed the - * reloc.presumed_offset or will not - * change the execobject.offset, on the - * call we may not rewrite the value - * inside the object, leaving it - * dangling and causing a GPU hang. Unless - * userspace dynamically rebuilds the - * relocations on each execbuf rather than - * presume a static tree. - * - * We did previously check if the relocations - * were writable (access_ok), an error now - * would be a strange race with mprotect, - * having already demonstrated that we - * can read from this userspace address. - */ - offset = gen8_canonical_addr(offset & ~UPDATE); - if (unlikely(__put_user(offset, &urelocs[r-stack].presumed_offset))) { - remain = -EFAULT; - goto out; - } - } - } while (r++, --count); - urelocs += ARRAY_SIZE(stack); - } while (remain); -out: - reloc_cache_reset(&eb->reloc_cache); - return remain; -} - -static int -eb_relocate_vma_slow(struct i915_execbuffer *eb, struct i915_vma *vma) -{ - const struct drm_i915_gem_exec_object2 *entry = exec_entry(eb, vma); - struct drm_i915_gem_relocation_entry *relocs = - u64_to_ptr(typeof(*relocs), entry->relocs_ptr); - unsigned int i; - int err; - - for (i = 0; i < entry->relocation_count; i++) { - u64 offset = eb_relocate_entry(eb, vma, &relocs[i]); - - if ((s64)offset < 0) { - err = (int)offset; - goto err; - } - } - err = 0; -err: - reloc_cache_reset(&eb->reloc_cache); - return err; -} - -static int check_relocations(const struct drm_i915_gem_exec_object2 *entry) -{ - const char __user *addr, *end; - unsigned long size; - char __maybe_unused c; - - size = entry->relocation_count; - if (size == 0) - return 0; - - if (size > N_RELOC(ULONG_MAX)) - return -EINVAL; - - addr = u64_to_user_ptr(entry->relocs_ptr); - size *= sizeof(struct drm_i915_gem_relocation_entry); - if (!access_ok(addr, size)) - return -EFAULT; - - end = addr + size; - for (; addr < end; addr += PAGE_SIZE) { - int err = __get_user(c, addr); - if (err) - return err; - } - return __get_user(c, end - 1); -} - -static int eb_copy_relocations(const struct i915_execbuffer *eb) -{ - const unsigned int count = eb->buffer_count; - unsigned int i; - int err; - - for (i = 0; i < count; i++) { - const unsigned int nreloc = eb->exec[i].relocation_count; - struct drm_i915_gem_relocation_entry __user *urelocs; - struct drm_i915_gem_relocation_entry *relocs; - unsigned long size; - unsigned long copied; - - if (nreloc == 0) - continue; - - err = check_relocations(&eb->exec[i]); - if (err) - goto err; - - urelocs = u64_to_user_ptr(eb->exec[i].relocs_ptr); - size = nreloc * sizeof(*relocs); - - relocs = kvmalloc_array(size, 1, GFP_KERNEL); - if (!relocs) { - err = -ENOMEM; - goto err; - } - - /* copy_from_user is limited to < 4GiB */ - copied = 0; - do { - unsigned int len = - min_t(u64, BIT_ULL(31), size - copied); - - if (__copy_from_user((char *)relocs + copied, - (char __user *)urelocs + copied, - len)) { -end_user: - user_access_end(); -end: - kvfree(relocs); - err = -EFAULT; - goto err; - } - - copied += len; - } while (copied < size); - - /* - * As we do not update the known relocation offsets after - * relocating (due to the complexities in lock handling), - * we need to mark them as invalid now so that we force the - * relocation processing next time. Just in case the target - * object is evicted and then rebound into its old - * presumed_offset before the next execbuffer - if that - * happened we would make the mistake of assuming that the - * relocations were valid. - */ - if (!user_access_begin(urelocs, size)) - goto end; - - for (copied = 0; copied < nreloc; copied++) - unsafe_put_user(-1, - &urelocs[copied].presumed_offset, - end_user); - user_access_end(); - - eb->exec[i].relocs_ptr = (uintptr_t)relocs; - } - - return 0; - -err: - while (i--) { - struct drm_i915_gem_relocation_entry *relocs = - u64_to_ptr(typeof(*relocs), eb->exec[i].relocs_ptr); - if (eb->exec[i].relocation_count) - kvfree(relocs); - } - return err; -} - -static int eb_prefault_relocations(const struct i915_execbuffer *eb) -{ - const unsigned int count = eb->buffer_count; - unsigned int i; - - if (unlikely(i915_modparams.prefault_disable)) - return 0; - - for (i = 0; i < count; i++) { - int err; - - err = check_relocations(&eb->exec[i]); - if (err) - return err; - } - - return 0; -} - -static noinline int eb_relocate_slow(struct i915_execbuffer *eb) -{ - struct drm_device *dev = &eb->i915->drm; - bool have_copy = false; - struct i915_vma *vma; - int err = 0; - -repeat: - if (signal_pending(current)) { - err = -ERESTARTSYS; - goto out; - } - - /* We may process another execbuffer during the unlock... */ - eb_reset_vmas(eb); - mutex_unlock(&dev->struct_mutex); - - /* - * We take 3 passes through the slowpatch. - * - * 1 - we try to just prefault all the user relocation entries and - * then attempt to reuse the atomic pagefault disabled fast path again. - * - * 2 - we copy the user entries to a local buffer here outside of the - * local and allow ourselves to wait upon any rendering before - * relocations - * - * 3 - we already have a local copy of the relocation entries, but - * were interrupted (EAGAIN) whilst waiting for the objects, try again. - */ - if (!err) { - err = eb_prefault_relocations(eb); - } else if (!have_copy) { - err = eb_copy_relocations(eb); - have_copy = err == 0; - } else { - cond_resched(); - err = 0; - } - if (err) { - mutex_lock(&dev->struct_mutex); - goto out; - } - - /* A frequent cause for EAGAIN are currently unavailable client pages */ - flush_workqueue(eb->i915->mm.userptr_wq); - - err = i915_mutex_lock_interruptible(dev); - if (err) { - mutex_lock(&dev->struct_mutex); - goto out; - } - - /* reacquire the objects */ - err = eb_lookup_vmas(eb); - if (err) - goto err; - - GEM_BUG_ON(!eb->batch); - - list_for_each_entry(vma, &eb->relocs, reloc_link) { - if (!have_copy) { - pagefault_disable(); - err = eb_relocate_vma(eb, vma); - pagefault_enable(); - if (err) - goto repeat; - } else { - err = eb_relocate_vma_slow(eb, vma); - if (err) - goto err; - } - } - - /* - * Leave the user relocations as are, this is the painfully slow path, - * and we want to avoid the complication of dropping the lock whilst - * having buffers reserved in the aperture and so causing spurious - * ENOSPC for random operations. - */ - -err: - if (err == -EAGAIN) - goto repeat; - -out: - if (have_copy) { - const unsigned int count = eb->buffer_count; - unsigned int i; - - for (i = 0; i < count; i++) { - const struct drm_i915_gem_exec_object2 *entry = - &eb->exec[i]; - struct drm_i915_gem_relocation_entry *relocs; - - if (!entry->relocation_count) - continue; - - relocs = u64_to_ptr(typeof(*relocs), entry->relocs_ptr); - kvfree(relocs); - } - } - - return err; -} - -static int eb_relocate(struct i915_execbuffer *eb) -{ - if (eb_lookup_vmas(eb)) - goto slow; - - /* The objects are in their final locations, apply the relocations. */ - if (eb->args->flags & __EXEC_HAS_RELOC) { - struct i915_vma *vma; - - list_for_each_entry(vma, &eb->relocs, reloc_link) { - if (eb_relocate_vma(eb, vma)) - goto slow; - } - } - - return 0; - -slow: - return eb_relocate_slow(eb); -} - -static int eb_move_to_gpu(struct i915_execbuffer *eb) -{ - const unsigned int count = eb->buffer_count; - unsigned int i; - int err; - - for (i = 0; i < count; i++) { - unsigned int flags = eb->flags[i]; - struct i915_vma *vma = eb->vma[i]; - struct drm_i915_gem_object *obj = vma->obj; - - if (flags & EXEC_OBJECT_CAPTURE) { - struct i915_capture_list *capture; - - capture = kmalloc(sizeof(*capture), GFP_KERNEL); - if (unlikely(!capture)) - return -ENOMEM; - - capture->next = eb->request->capture_list; - capture->vma = eb->vma[i]; - eb->request->capture_list = capture; - } - - /* - * If the GPU is not _reading_ through the CPU cache, we need - * to make sure that any writes (both previous GPU writes from - * before a change in snooping levels and normal CPU writes) - * caught in that cache are flushed to main memory. - * - * We want to say - * obj->cache_dirty && - * !(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ) - * but gcc's optimiser doesn't handle that as well and emits - * two jumps instead of one. Maybe one day... - */ - if (unlikely(obj->cache_dirty & ~obj->cache_coherent)) { - if (i915_gem_clflush_object(obj, 0)) - flags &= ~EXEC_OBJECT_ASYNC; - } - - if (flags & EXEC_OBJECT_ASYNC) - continue; - - err = i915_request_await_object - (eb->request, obj, flags & EXEC_OBJECT_WRITE); - if (err) - return err; - } - - for (i = 0; i < count; i++) { - unsigned int flags = eb->flags[i]; - struct i915_vma *vma = eb->vma[i]; - - err = i915_vma_move_to_active(vma, eb->request, flags); - if (unlikely(err)) { - i915_request_skip(eb->request, err); - return err; - } - - __eb_unreserve_vma(vma, flags); - vma->exec_flags = NULL; - - if (unlikely(flags & __EXEC_OBJECT_HAS_REF)) - i915_vma_put(vma); - } - eb->exec = NULL; - - /* Unconditionally flush any chipset caches (for streaming writes). */ - i915_gem_chipset_flush(eb->i915); - - return 0; -} - -static bool i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec) -{ - if (exec->flags & __I915_EXEC_ILLEGAL_FLAGS) - return false; - - /* Kernel clipping was a DRI1 misfeature */ - if (!(exec->flags & I915_EXEC_FENCE_ARRAY)) { - if (exec->num_cliprects || exec->cliprects_ptr) - return false; - } - - if (exec->DR4 == 0xffffffff) { - DRM_DEBUG("UXA submitting garbage DR4, fixing up\n"); - exec->DR4 = 0; - } - if (exec->DR1 || exec->DR4) - return false; - - if ((exec->batch_start_offset | exec->batch_len) & 0x7) - return false; - - return true; -} - -static int i915_reset_gen7_sol_offsets(struct i915_request *rq) -{ - u32 *cs; - int i; - - if (!IS_GEN(rq->i915, 7) || rq->engine->id != RCS0) { - DRM_DEBUG("sol reset is gen7/rcs only\n"); - return -EINVAL; - } - - cs = intel_ring_begin(rq, 4 * 2 + 2); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - *cs++ = MI_LOAD_REGISTER_IMM(4); - for (i = 0; i < 4; i++) { - *cs++ = i915_mmio_reg_offset(GEN7_SO_WRITE_OFFSET(i)); - *cs++ = 0; - } - *cs++ = MI_NOOP; - intel_ring_advance(rq, cs); - - return 0; -} - -static struct i915_vma *eb_parse(struct i915_execbuffer *eb, bool is_master) -{ - struct drm_i915_gem_object *shadow_batch_obj; - struct i915_vma *vma; - int err; - - shadow_batch_obj = i915_gem_batch_pool_get(&eb->engine->batch_pool, - PAGE_ALIGN(eb->batch_len)); - if (IS_ERR(shadow_batch_obj)) - return ERR_CAST(shadow_batch_obj); - - err = intel_engine_cmd_parser(eb->engine, - eb->batch->obj, - shadow_batch_obj, - eb->batch_start_offset, - eb->batch_len, - is_master); - if (err) { - if (err == -EACCES) /* unhandled chained batch */ - vma = NULL; - else - vma = ERR_PTR(err); - goto out; - } - - vma = i915_gem_object_ggtt_pin(shadow_batch_obj, NULL, 0, 0, 0); - if (IS_ERR(vma)) - goto out; - - eb->vma[eb->buffer_count] = i915_vma_get(vma); - eb->flags[eb->buffer_count] = - __EXEC_OBJECT_HAS_PIN | __EXEC_OBJECT_HAS_REF; - vma->exec_flags = &eb->flags[eb->buffer_count]; - eb->buffer_count++; - -out: - i915_gem_object_unpin_pages(shadow_batch_obj); - return vma; -} - -static void -add_to_client(struct i915_request *rq, struct drm_file *file) -{ - rq->file_priv = file->driver_priv; - list_add_tail(&rq->client_link, &rq->file_priv->mm.request_list); -} - -static int eb_submit(struct i915_execbuffer *eb) -{ - int err; - - err = eb_move_to_gpu(eb); - if (err) - return err; - - if (eb->args->flags & I915_EXEC_GEN7_SOL_RESET) { - err = i915_reset_gen7_sol_offsets(eb->request); - if (err) - return err; - } - - /* - * After we completed waiting for other engines (using HW semaphores) - * then we can signal that this request/batch is ready to run. This - * allows us to determine if the batch is still waiting on the GPU - * or actually running by checking the breadcrumb. - */ - if (eb->engine->emit_init_breadcrumb) { - err = eb->engine->emit_init_breadcrumb(eb->request); - if (err) - return err; - } - - err = eb->engine->emit_bb_start(eb->request, - eb->batch->node.start + - eb->batch_start_offset, - eb->batch_len, - eb->batch_flags); - if (err) - return err; - - return 0; -} - -/* - * Find one BSD ring to dispatch the corresponding BSD command. - * The engine index is returned. - */ -static unsigned int -gen8_dispatch_bsd_engine(struct drm_i915_private *dev_priv, - struct drm_file *file) -{ - struct drm_i915_file_private *file_priv = file->driver_priv; - - /* Check whether the file_priv has already selected one ring. */ - if ((int)file_priv->bsd_engine < 0) - file_priv->bsd_engine = atomic_fetch_xor(1, - &dev_priv->mm.bsd_engine_dispatch_index); - - return file_priv->bsd_engine; -} - -static const enum intel_engine_id user_ring_map[] = { - [I915_EXEC_DEFAULT] = RCS0, - [I915_EXEC_RENDER] = RCS0, - [I915_EXEC_BLT] = BCS0, - [I915_EXEC_BSD] = VCS0, - [I915_EXEC_VEBOX] = VECS0 -}; - -static int eb_pin_context(struct i915_execbuffer *eb, struct intel_context *ce) -{ - int err; - - /* - * ABI: Before userspace accesses the GPU (e.g. execbuffer), report - * EIO if the GPU is already wedged. - */ - err = i915_terminally_wedged(eb->i915); - if (err) - return err; - - /* - * Pinning the contexts may generate requests in order to acquire - * GGTT space, so do this first before we reserve a seqno for - * ourselves. - */ - err = intel_context_pin(ce); - if (err) - return err; - - eb->engine = ce->engine; - eb->context = ce; - return 0; -} - -static void eb_unpin_context(struct i915_execbuffer *eb) -{ - intel_context_unpin(eb->context); -} - -static unsigned int -eb_select_legacy_ring(struct i915_execbuffer *eb, - struct drm_file *file, - struct drm_i915_gem_execbuffer2 *args) -{ - struct drm_i915_private *i915 = eb->i915; - unsigned int user_ring_id = args->flags & I915_EXEC_RING_MASK; - - if (user_ring_id != I915_EXEC_BSD && - (args->flags & I915_EXEC_BSD_MASK)) { - DRM_DEBUG("execbuf with non bsd ring but with invalid " - "bsd dispatch flags: %d\n", (int)(args->flags)); - return -1; - } - - if (user_ring_id == I915_EXEC_BSD && HAS_ENGINE(i915, VCS1)) { - unsigned int bsd_idx = args->flags & I915_EXEC_BSD_MASK; - - if (bsd_idx == I915_EXEC_BSD_DEFAULT) { - bsd_idx = gen8_dispatch_bsd_engine(i915, file); - } else if (bsd_idx >= I915_EXEC_BSD_RING1 && - bsd_idx <= I915_EXEC_BSD_RING2) { - bsd_idx >>= I915_EXEC_BSD_SHIFT; - bsd_idx--; - } else { - DRM_DEBUG("execbuf with unknown bsd ring: %u\n", - bsd_idx); - return -1; - } - - return _VCS(bsd_idx); - } - - if (user_ring_id >= ARRAY_SIZE(user_ring_map)) { - DRM_DEBUG("execbuf with unknown ring: %u\n", user_ring_id); - return -1; - } - - return user_ring_map[user_ring_id]; -} - -static int -eb_select_engine(struct i915_execbuffer *eb, - struct drm_file *file, - struct drm_i915_gem_execbuffer2 *args) -{ - struct intel_context *ce; - unsigned int idx; - int err; - - if (i915_gem_context_user_engines(eb->gem_context)) - idx = args->flags & I915_EXEC_RING_MASK; - else - idx = eb_select_legacy_ring(eb, file, args); - - ce = i915_gem_context_get_engine(eb->gem_context, idx); - if (IS_ERR(ce)) - return PTR_ERR(ce); - - err = eb_pin_context(eb, ce); - intel_context_put(ce); - - return err; -} - -static void -__free_fence_array(struct drm_syncobj **fences, unsigned int n) -{ - while (n--) - drm_syncobj_put(ptr_mask_bits(fences[n], 2)); - kvfree(fences); -} - -static struct drm_syncobj ** -get_fence_array(struct drm_i915_gem_execbuffer2 *args, - struct drm_file *file) -{ - const unsigned long nfences = args->num_cliprects; - struct drm_i915_gem_exec_fence __user *user; - struct drm_syncobj **fences; - unsigned long n; - int err; - - if (!(args->flags & I915_EXEC_FENCE_ARRAY)) - return NULL; - - /* Check multiplication overflow for access_ok() and kvmalloc_array() */ - BUILD_BUG_ON(sizeof(size_t) > sizeof(unsigned long)); - if (nfences > min_t(unsigned long, - ULONG_MAX / sizeof(*user), - SIZE_MAX / sizeof(*fences))) - return ERR_PTR(-EINVAL); - - user = u64_to_user_ptr(args->cliprects_ptr); - if (!access_ok(user, nfences * sizeof(*user))) - return ERR_PTR(-EFAULT); - - fences = kvmalloc_array(nfences, sizeof(*fences), - __GFP_NOWARN | GFP_KERNEL); - if (!fences) - return ERR_PTR(-ENOMEM); - - for (n = 0; n < nfences; n++) { - struct drm_i915_gem_exec_fence fence; - struct drm_syncobj *syncobj; - - if (__copy_from_user(&fence, user++, sizeof(fence))) { - err = -EFAULT; - goto err; - } - - if (fence.flags & __I915_EXEC_FENCE_UNKNOWN_FLAGS) { - err = -EINVAL; - goto err; - } - - syncobj = drm_syncobj_find(file, fence.handle); - if (!syncobj) { - DRM_DEBUG("Invalid syncobj handle provided\n"); - err = -ENOENT; - goto err; - } - - BUILD_BUG_ON(~(ARCH_KMALLOC_MINALIGN - 1) & - ~__I915_EXEC_FENCE_UNKNOWN_FLAGS); - - fences[n] = ptr_pack_bits(syncobj, fence.flags, 2); - } - - return fences; - -err: - __free_fence_array(fences, n); - return ERR_PTR(err); -} - -static void -put_fence_array(struct drm_i915_gem_execbuffer2 *args, - struct drm_syncobj **fences) -{ - if (fences) - __free_fence_array(fences, args->num_cliprects); -} - -static int -await_fence_array(struct i915_execbuffer *eb, - struct drm_syncobj **fences) -{ - const unsigned int nfences = eb->args->num_cliprects; - unsigned int n; - int err; - - for (n = 0; n < nfences; n++) { - struct drm_syncobj *syncobj; - struct dma_fence *fence; - unsigned int flags; - - syncobj = ptr_unpack_bits(fences[n], &flags, 2); - if (!(flags & I915_EXEC_FENCE_WAIT)) - continue; - - fence = drm_syncobj_fence_get(syncobj); - if (!fence) - return -EINVAL; - - err = i915_request_await_dma_fence(eb->request, fence); - dma_fence_put(fence); - if (err < 0) - return err; - } - - return 0; -} - -static void -signal_fence_array(struct i915_execbuffer *eb, - struct drm_syncobj **fences) -{ - const unsigned int nfences = eb->args->num_cliprects; - struct dma_fence * const fence = &eb->request->fence; - unsigned int n; - - for (n = 0; n < nfences; n++) { - struct drm_syncobj *syncobj; - unsigned int flags; - - syncobj = ptr_unpack_bits(fences[n], &flags, 2); - if (!(flags & I915_EXEC_FENCE_SIGNAL)) - continue; - - drm_syncobj_replace_fence(syncobj, fence); - } -} - -static int -i915_gem_do_execbuffer(struct drm_device *dev, - struct drm_file *file, - struct drm_i915_gem_execbuffer2 *args, - struct drm_i915_gem_exec_object2 *exec, - struct drm_syncobj **fences) -{ - struct i915_execbuffer eb; - struct dma_fence *in_fence = NULL; - struct dma_fence *exec_fence = NULL; - struct sync_file *out_fence = NULL; - int out_fence_fd = -1; - int err; - - BUILD_BUG_ON(__EXEC_INTERNAL_FLAGS & ~__I915_EXEC_ILLEGAL_FLAGS); - BUILD_BUG_ON(__EXEC_OBJECT_INTERNAL_FLAGS & - ~__EXEC_OBJECT_UNKNOWN_FLAGS); - - eb.i915 = to_i915(dev); - eb.file = file; - eb.args = args; - if (DBG_FORCE_RELOC || !(args->flags & I915_EXEC_NO_RELOC)) - args->flags |= __EXEC_HAS_RELOC; - - eb.exec = exec; - eb.vma = (struct i915_vma **)(exec + args->buffer_count + 1); - eb.vma[0] = NULL; - eb.flags = (unsigned int *)(eb.vma + args->buffer_count + 1); - - eb.invalid_flags = __EXEC_OBJECT_UNKNOWN_FLAGS; - reloc_cache_init(&eb.reloc_cache, eb.i915); - - eb.buffer_count = args->buffer_count; - eb.batch_start_offset = args->batch_start_offset; - eb.batch_len = args->batch_len; - - eb.batch_flags = 0; - if (args->flags & I915_EXEC_SECURE) { - if (!drm_is_current_master(file) || !capable(CAP_SYS_ADMIN)) - return -EPERM; - - eb.batch_flags |= I915_DISPATCH_SECURE; - } - if (args->flags & I915_EXEC_IS_PINNED) - eb.batch_flags |= I915_DISPATCH_PINNED; - - if (args->flags & I915_EXEC_FENCE_IN) { - in_fence = sync_file_get_fence(lower_32_bits(args->rsvd2)); - if (!in_fence) - return -EINVAL; - } - - if (args->flags & I915_EXEC_FENCE_SUBMIT) { - if (in_fence) { - err = -EINVAL; - goto err_in_fence; - } - - exec_fence = sync_file_get_fence(lower_32_bits(args->rsvd2)); - if (!exec_fence) { - err = -EINVAL; - goto err_in_fence; - } - } - - if (args->flags & I915_EXEC_FENCE_OUT) { - out_fence_fd = get_unused_fd_flags(O_CLOEXEC); - if (out_fence_fd < 0) { - err = out_fence_fd; - goto err_exec_fence; - } - } - - err = eb_create(&eb); - if (err) - goto err_out_fence; - - GEM_BUG_ON(!eb.lut_size); - - err = eb_select_context(&eb); - if (unlikely(err)) - goto err_destroy; - - /* - * Take a local wakeref for preparing to dispatch the execbuf as - * we expect to access the hardware fairly frequently in the - * process. Upon first dispatch, we acquire another prolonged - * wakeref that we hold until the GPU has been idle for at least - * 100ms. - */ - intel_gt_pm_get(eb.i915); - - err = i915_mutex_lock_interruptible(dev); - if (err) - goto err_rpm; - - err = eb_select_engine(&eb, file, args); - if (unlikely(err)) - goto err_unlock; - - err = eb_wait_for_ring(&eb); /* may temporarily drop struct_mutex */ - if (unlikely(err)) - goto err_engine; - - err = eb_relocate(&eb); - if (err) { - /* - * If the user expects the execobject.offset and - * reloc.presumed_offset to be an exact match, - * as for using NO_RELOC, then we cannot update - * the execobject.offset until we have completed - * relocation. - */ - args->flags &= ~__EXEC_HAS_RELOC; - goto err_vma; - } - - if (unlikely(*eb.batch->exec_flags & EXEC_OBJECT_WRITE)) { - DRM_DEBUG("Attempting to use self-modifying batch buffer\n"); - err = -EINVAL; - goto err_vma; - } - if (eb.batch_start_offset > eb.batch->size || - eb.batch_len > eb.batch->size - eb.batch_start_offset) { - DRM_DEBUG("Attempting to use out-of-bounds batch\n"); - err = -EINVAL; - goto err_vma; - } - - if (eb_use_cmdparser(&eb)) { - struct i915_vma *vma; - - vma = eb_parse(&eb, drm_is_current_master(file)); - if (IS_ERR(vma)) { - err = PTR_ERR(vma); - goto err_vma; - } - - if (vma) { - /* - * Batch parsed and accepted: - * - * Set the DISPATCH_SECURE bit to remove the NON_SECURE - * bit from MI_BATCH_BUFFER_START commands issued in - * the dispatch_execbuffer implementations. We - * specifically don't want that set on batches the - * command parser has accepted. - */ - eb.batch_flags |= I915_DISPATCH_SECURE; - eb.batch_start_offset = 0; - eb.batch = vma; - } - } - - if (eb.batch_len == 0) - eb.batch_len = eb.batch->size - eb.batch_start_offset; - - /* - * snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure - * batch" bit. Hence we need to pin secure batches into the global gtt. - * hsw should have this fixed, but bdw mucks it up again. */ - if (eb.batch_flags & I915_DISPATCH_SECURE) { - struct i915_vma *vma; - - /* - * So on first glance it looks freaky that we pin the batch here - * outside of the reservation loop. But: - * - The batch is already pinned into the relevant ppgtt, so we - * already have the backing storage fully allocated. - * - No other BO uses the global gtt (well contexts, but meh), - * so we don't really have issues with multiple objects not - * fitting due to fragmentation. - * So this is actually safe. - */ - vma = i915_gem_object_ggtt_pin(eb.batch->obj, NULL, 0, 0, 0); - if (IS_ERR(vma)) { - err = PTR_ERR(vma); - goto err_vma; - } - - eb.batch = vma; - } - - /* All GPU relocation batches must be submitted prior to the user rq */ - GEM_BUG_ON(eb.reloc_cache.rq); - - /* Allocate a request for this batch buffer nice and early. */ - eb.request = i915_request_create(eb.context); - if (IS_ERR(eb.request)) { - err = PTR_ERR(eb.request); - goto err_batch_unpin; - } - - if (in_fence) { - err = i915_request_await_dma_fence(eb.request, in_fence); - if (err < 0) - goto err_request; - } - - if (exec_fence) { - err = i915_request_await_execution(eb.request, exec_fence, - eb.engine->bond_execute); - if (err < 0) - goto err_request; - } - - if (fences) { - err = await_fence_array(&eb, fences); - if (err) - goto err_request; - } - - if (out_fence_fd != -1) { - out_fence = sync_file_create(&eb.request->fence); - if (!out_fence) { - err = -ENOMEM; - goto err_request; - } - } - - /* - * Whilst this request exists, batch_obj will be on the - * active_list, and so will hold the active reference. Only when this - * request is retired will the the batch_obj be moved onto the - * inactive_list and lose its active reference. Hence we do not need - * to explicitly hold another reference here. - */ - eb.request->batch = eb.batch; - - trace_i915_request_queue(eb.request, eb.batch_flags); - err = eb_submit(&eb); -err_request: - add_to_client(eb.request, file); - i915_request_add(eb.request); - - if (fences) - signal_fence_array(&eb, fences); - - if (out_fence) { - if (err == 0) { - fd_install(out_fence_fd, out_fence->file); - args->rsvd2 &= GENMASK_ULL(31, 0); /* keep in-fence */ - args->rsvd2 |= (u64)out_fence_fd << 32; - out_fence_fd = -1; - } else { - fput(out_fence->file); - } - } - -err_batch_unpin: - if (eb.batch_flags & I915_DISPATCH_SECURE) - i915_vma_unpin(eb.batch); -err_vma: - if (eb.exec) - eb_release_vmas(&eb); -err_engine: - eb_unpin_context(&eb); -err_unlock: - mutex_unlock(&dev->struct_mutex); -err_rpm: - intel_gt_pm_put(eb.i915); - i915_gem_context_put(eb.gem_context); -err_destroy: - eb_destroy(&eb); -err_out_fence: - if (out_fence_fd != -1) - put_unused_fd(out_fence_fd); -err_exec_fence: - dma_fence_put(exec_fence); -err_in_fence: - dma_fence_put(in_fence); - return err; -} - -static size_t eb_element_size(void) -{ - return (sizeof(struct drm_i915_gem_exec_object2) + - sizeof(struct i915_vma *) + - sizeof(unsigned int)); -} - -static bool check_buffer_count(size_t count) -{ - const size_t sz = eb_element_size(); - - /* - * When using LUT_HANDLE, we impose a limit of INT_MAX for the lookup - * array size (see eb_create()). Otherwise, we can accept an array as - * large as can be addressed (though use large arrays at your peril)! - */ - - return !(count < 1 || count > INT_MAX || count > SIZE_MAX / sz - 1); -} - -/* - * Legacy execbuffer just creates an exec2 list from the original exec object - * list array and passes it to the real function. - */ -int -i915_gem_execbuffer_ioctl(struct drm_device *dev, void *data, - struct drm_file *file) -{ - struct drm_i915_gem_execbuffer *args = data; - struct drm_i915_gem_execbuffer2 exec2; - struct drm_i915_gem_exec_object *exec_list = NULL; - struct drm_i915_gem_exec_object2 *exec2_list = NULL; - const size_t count = args->buffer_count; - unsigned int i; - int err; - - if (!check_buffer_count(count)) { - DRM_DEBUG("execbuf2 with %zd buffers\n", count); - return -EINVAL; - } - - exec2.buffers_ptr = args->buffers_ptr; - exec2.buffer_count = args->buffer_count; - exec2.batch_start_offset = args->batch_start_offset; - exec2.batch_len = args->batch_len; - exec2.DR1 = args->DR1; - exec2.DR4 = args->DR4; - exec2.num_cliprects = args->num_cliprects; - exec2.cliprects_ptr = args->cliprects_ptr; - exec2.flags = I915_EXEC_RENDER; - i915_execbuffer2_set_context_id(exec2, 0); - - if (!i915_gem_check_execbuffer(&exec2)) - return -EINVAL; - - /* Copy in the exec list from userland */ - exec_list = kvmalloc_array(count, sizeof(*exec_list), - __GFP_NOWARN | GFP_KERNEL); - exec2_list = kvmalloc_array(count + 1, eb_element_size(), - __GFP_NOWARN | GFP_KERNEL); - if (exec_list == NULL || exec2_list == NULL) { - DRM_DEBUG("Failed to allocate exec list for %d buffers\n", - args->buffer_count); - kvfree(exec_list); - kvfree(exec2_list); - return -ENOMEM; - } - err = copy_from_user(exec_list, - u64_to_user_ptr(args->buffers_ptr), - sizeof(*exec_list) * count); - if (err) { - DRM_DEBUG("copy %d exec entries failed %d\n", - args->buffer_count, err); - kvfree(exec_list); - kvfree(exec2_list); - return -EFAULT; - } - - for (i = 0; i < args->buffer_count; i++) { - exec2_list[i].handle = exec_list[i].handle; - exec2_list[i].relocation_count = exec_list[i].relocation_count; - exec2_list[i].relocs_ptr = exec_list[i].relocs_ptr; - exec2_list[i].alignment = exec_list[i].alignment; - exec2_list[i].offset = exec_list[i].offset; - if (INTEL_GEN(to_i915(dev)) < 4) - exec2_list[i].flags = EXEC_OBJECT_NEEDS_FENCE; - else - exec2_list[i].flags = 0; - } - - err = i915_gem_do_execbuffer(dev, file, &exec2, exec2_list, NULL); - if (exec2.flags & __EXEC_HAS_RELOC) { - struct drm_i915_gem_exec_object __user *user_exec_list = - u64_to_user_ptr(args->buffers_ptr); - - /* Copy the new buffer offsets back to the user's exec list. */ - for (i = 0; i < args->buffer_count; i++) { - if (!(exec2_list[i].offset & UPDATE)) - continue; - - exec2_list[i].offset = - gen8_canonical_addr(exec2_list[i].offset & PIN_OFFSET_MASK); - exec2_list[i].offset &= PIN_OFFSET_MASK; - if (__copy_to_user(&user_exec_list[i].offset, - &exec2_list[i].offset, - sizeof(user_exec_list[i].offset))) - break; - } - } - - kvfree(exec_list); - kvfree(exec2_list); - return err; -} - -int -i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data, - struct drm_file *file) -{ - struct drm_i915_gem_execbuffer2 *args = data; - struct drm_i915_gem_exec_object2 *exec2_list; - struct drm_syncobj **fences = NULL; - const size_t count = args->buffer_count; - int err; - - if (!check_buffer_count(count)) { - DRM_DEBUG("execbuf2 with %zd buffers\n", count); - return -EINVAL; - } - - if (!i915_gem_check_execbuffer(args)) - return -EINVAL; - - /* Allocate an extra slot for use by the command parser */ - exec2_list = kvmalloc_array(count + 1, eb_element_size(), - __GFP_NOWARN | GFP_KERNEL); - if (exec2_list == NULL) { - DRM_DEBUG("Failed to allocate exec list for %zd buffers\n", - count); - return -ENOMEM; - } - if (copy_from_user(exec2_list, - u64_to_user_ptr(args->buffers_ptr), - sizeof(*exec2_list) * count)) { - DRM_DEBUG("copy %zd exec entries failed\n", count); - kvfree(exec2_list); - return -EFAULT; - } - - if (args->flags & I915_EXEC_FENCE_ARRAY) { - fences = get_fence_array(args, file); - if (IS_ERR(fences)) { - kvfree(exec2_list); - return PTR_ERR(fences); - } - } - - err = i915_gem_do_execbuffer(dev, file, args, exec2_list, fences); - - /* - * Now that we have begun execution of the batchbuffer, we ignore - * any new error after this point. Also given that we have already - * updated the associated relocations, we try to write out the current - * object locations irrespective of any error. - */ - if (args->flags & __EXEC_HAS_RELOC) { - struct drm_i915_gem_exec_object2 __user *user_exec_list = - u64_to_user_ptr(args->buffers_ptr); - unsigned int i; - - /* Copy the new buffer offsets back to the user's exec list. */ - /* - * Note: count * sizeof(*user_exec_list) does not overflow, - * because we checked 'count' in check_buffer_count(). - * - * And this range already got effectively checked earlier - * when we did the "copy_from_user()" above. - */ - if (!user_access_begin(user_exec_list, count * sizeof(*user_exec_list))) - goto end; - - for (i = 0; i < args->buffer_count; i++) { - if (!(exec2_list[i].offset & UPDATE)) - continue; - - exec2_list[i].offset = - gen8_canonical_addr(exec2_list[i].offset & PIN_OFFSET_MASK); - unsafe_put_user(exec2_list[i].offset, - &user_exec_list[i].offset, - end_user); - } -end_user: - user_access_end(); -end:; - } - - args->flags &= ~__I915_EXEC_UNKNOWN_FLAGS; - put_fence_array(args, fences); - kvfree(exec2_list); - return err; -} diff --git a/drivers/gpu/drm/i915/i915_gem_internal.c b/drivers/gpu/drm/i915/i915_gem_internal.c deleted file mode 100644 index 21662176819f..000000000000 --- a/drivers/gpu/drm/i915/i915_gem_internal.c +++ /dev/null @@ -1,207 +0,0 @@ -/* - * Copyright © 2014-2016 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - */ - -#include -#include "i915_drv.h" - -#define QUIET (__GFP_NORETRY | __GFP_NOWARN) -#define MAYFAIL (__GFP_RETRY_MAYFAIL | __GFP_NOWARN) - -static void internal_free_pages(struct sg_table *st) -{ - struct scatterlist *sg; - - for (sg = st->sgl; sg; sg = __sg_next(sg)) { - if (sg_page(sg)) - __free_pages(sg_page(sg), get_order(sg->length)); - } - - sg_free_table(st); - kfree(st); -} - -static int i915_gem_object_get_pages_internal(struct drm_i915_gem_object *obj) -{ - struct drm_i915_private *i915 = to_i915(obj->base.dev); - struct sg_table *st; - struct scatterlist *sg; - unsigned int sg_page_sizes; - unsigned int npages; - int max_order; - gfp_t gfp; - - max_order = MAX_ORDER; -#ifdef CONFIG_SWIOTLB - if (swiotlb_nr_tbl()) { - unsigned int max_segment; - - max_segment = swiotlb_max_segment(); - if (max_segment) { - max_segment = max_t(unsigned int, max_segment, - PAGE_SIZE) >> PAGE_SHIFT; - max_order = min(max_order, ilog2(max_segment)); - } - } -#endif - - gfp = GFP_KERNEL | __GFP_HIGHMEM | __GFP_RECLAIMABLE; - if (IS_I965GM(i915) || IS_I965G(i915)) { - /* 965gm cannot relocate objects above 4GiB. */ - gfp &= ~__GFP_HIGHMEM; - gfp |= __GFP_DMA32; - } - -create_st: - st = kmalloc(sizeof(*st), GFP_KERNEL); - if (!st) - return -ENOMEM; - - npages = obj->base.size / PAGE_SIZE; - if (sg_alloc_table(st, npages, GFP_KERNEL)) { - kfree(st); - return -ENOMEM; - } - - sg = st->sgl; - st->nents = 0; - sg_page_sizes = 0; - - do { - int order = min(fls(npages) - 1, max_order); - struct page *page; - - do { - page = alloc_pages(gfp | (order ? QUIET : MAYFAIL), - order); - if (page) - break; - if (!order--) - goto err; - - /* Limit subsequent allocations as well */ - max_order = order; - } while (1); - - sg_set_page(sg, page, PAGE_SIZE << order, 0); - sg_page_sizes |= PAGE_SIZE << order; - st->nents++; - - npages -= 1 << order; - if (!npages) { - sg_mark_end(sg); - break; - } - - sg = __sg_next(sg); - } while (1); - - if (i915_gem_gtt_prepare_pages(obj, st)) { - /* Failed to dma-map try again with single page sg segments */ - if (get_order(st->sgl->length)) { - internal_free_pages(st); - max_order = 0; - goto create_st; - } - goto err; - } - - /* Mark the pages as dontneed whilst they are still pinned. As soon - * as they are unpinned they are allowed to be reaped by the shrinker, - * and the caller is expected to repopulate - the contents of this - * object are only valid whilst active and pinned. - */ - obj->mm.madv = I915_MADV_DONTNEED; - - __i915_gem_object_set_pages(obj, st, sg_page_sizes); - - return 0; - -err: - sg_set_page(sg, NULL, 0, 0); - sg_mark_end(sg); - internal_free_pages(st); - - return -ENOMEM; -} - -static void i915_gem_object_put_pages_internal(struct drm_i915_gem_object *obj, - struct sg_table *pages) -{ - i915_gem_gtt_finish_pages(obj, pages); - internal_free_pages(pages); - - obj->mm.dirty = false; - obj->mm.madv = I915_MADV_WILLNEED; -} - -static const struct drm_i915_gem_object_ops i915_gem_object_internal_ops = { - .flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE | - I915_GEM_OBJECT_IS_SHRINKABLE, - .get_pages = i915_gem_object_get_pages_internal, - .put_pages = i915_gem_object_put_pages_internal, -}; - -/** - * i915_gem_object_create_internal: create an object with volatile pages - * @i915: the i915 device - * @size: the size in bytes of backing storage to allocate for the object - * - * Creates a new object that wraps some internal memory for private use. - * This object is not backed by swappable storage, and as such its contents - * are volatile and only valid whilst pinned. If the object is reaped by the - * shrinker, its pages and data will be discarded. Equally, it is not a full - * GEM object and so not valid for access from userspace. This makes it useful - * for hardware interfaces like ringbuffers (which are pinned from the time - * the request is written to the time the hardware stops accessing it), but - * not for contexts (which need to be preserved when not active for later - * reuse). Note that it is not cleared upon allocation. - */ -struct drm_i915_gem_object * -i915_gem_object_create_internal(struct drm_i915_private *i915, - phys_addr_t size) -{ - struct drm_i915_gem_object *obj; - unsigned int cache_level; - - GEM_BUG_ON(!size); - GEM_BUG_ON(!IS_ALIGNED(size, PAGE_SIZE)); - - if (overflows_type(size, obj->base.size)) - return ERR_PTR(-E2BIG); - - obj = i915_gem_object_alloc(); - if (!obj) - return ERR_PTR(-ENOMEM); - - drm_gem_private_object_init(&i915->drm, &obj->base, size); - i915_gem_object_init(obj, &i915_gem_object_internal_ops); - - obj->read_domains = I915_GEM_DOMAIN_CPU; - obj->write_domain = I915_GEM_DOMAIN_CPU; - - cache_level = HAS_LLC(i915) ? I915_CACHE_LLC : I915_CACHE_NONE; - i915_gem_object_set_cache_coherency(obj, cache_level); - - return obj; -} diff --git a/drivers/gpu/drm/i915/i915_gem_pm.c b/drivers/gpu/drm/i915/i915_gem_pm.c deleted file mode 100644 index c0ad19605297..000000000000 --- a/drivers/gpu/drm/i915/i915_gem_pm.c +++ /dev/null @@ -1,251 +0,0 @@ -/* - * SPDX-License-Identifier: MIT - * - * Copyright © 2019 Intel Corporation - */ - -#include "gt/intel_gt_pm.h" - -#include "i915_drv.h" -#include "i915_gem_pm.h" -#include "i915_globals.h" - -static void i915_gem_park(struct drm_i915_private *i915) -{ - struct intel_engine_cs *engine; - enum intel_engine_id id; - - lockdep_assert_held(&i915->drm.struct_mutex); - - for_each_engine(engine, i915, id) - i915_gem_batch_pool_fini(&engine->batch_pool); - - i915_timelines_park(i915); - i915_vma_parked(i915); - - i915_globals_park(); -} - -static void idle_work_handler(struct work_struct *work) -{ - struct drm_i915_private *i915 = - container_of(work, typeof(*i915), gem.idle_work); - bool restart = true; - - cancel_delayed_work(&i915->gem.retire_work); - mutex_lock(&i915->drm.struct_mutex); - - intel_wakeref_lock(&i915->gt.wakeref); - if (!intel_wakeref_active(&i915->gt.wakeref) && !work_pending(work)) { - i915_gem_park(i915); - restart = false; - } - intel_wakeref_unlock(&i915->gt.wakeref); - - mutex_unlock(&i915->drm.struct_mutex); - if (restart) - queue_delayed_work(i915->wq, - &i915->gem.retire_work, - round_jiffies_up_relative(HZ)); -} - -static void retire_work_handler(struct work_struct *work) -{ - struct drm_i915_private *i915 = - container_of(work, typeof(*i915), gem.retire_work.work); - - /* Come back later if the device is busy... */ - if (mutex_trylock(&i915->drm.struct_mutex)) { - i915_retire_requests(i915); - mutex_unlock(&i915->drm.struct_mutex); - } - - queue_delayed_work(i915->wq, - &i915->gem.retire_work, - round_jiffies_up_relative(HZ)); -} - -static int pm_notifier(struct notifier_block *nb, - unsigned long action, - void *data) -{ - struct drm_i915_private *i915 = - container_of(nb, typeof(*i915), gem.pm_notifier); - - switch (action) { - case INTEL_GT_UNPARK: - i915_globals_unpark(); - queue_delayed_work(i915->wq, - &i915->gem.retire_work, - round_jiffies_up_relative(HZ)); - break; - - case INTEL_GT_PARK: - queue_work(i915->wq, &i915->gem.idle_work); - break; - } - - return NOTIFY_OK; -} - -static bool switch_to_kernel_context_sync(struct drm_i915_private *i915) -{ - bool result = true; - - do { - if (i915_gem_wait_for_idle(i915, - I915_WAIT_LOCKED | - I915_WAIT_FOR_IDLE_BOOST, - I915_GEM_IDLE_TIMEOUT) == -ETIME) { - /* XXX hide warning from gem_eio */ - if (i915_modparams.reset) { - dev_err(i915->drm.dev, - "Failed to idle engines, declaring wedged!\n"); - GEM_TRACE_DUMP(); - } - - /* - * Forcibly cancel outstanding work and leave - * the gpu quiet. - */ - i915_gem_set_wedged(i915); - result = false; - } - } while (i915_retire_requests(i915) && result); - - GEM_BUG_ON(i915->gt.awake); - return result; -} - -bool i915_gem_load_power_context(struct drm_i915_private *i915) -{ - return switch_to_kernel_context_sync(i915); -} - -void i915_gem_suspend(struct drm_i915_private *i915) -{ - GEM_TRACE("\n"); - - intel_wakeref_auto(&i915->mm.userfault_wakeref, 0); - flush_workqueue(i915->wq); - - mutex_lock(&i915->drm.struct_mutex); - - /* - * We have to flush all the executing contexts to main memory so - * that they can saved in the hibernation image. To ensure the last - * context image is coherent, we have to switch away from it. That - * leaves the i915->kernel_context still active when - * we actually suspend, and its image in memory may not match the GPU - * state. Fortunately, the kernel_context is disposable and we do - * not rely on its state. - */ - switch_to_kernel_context_sync(i915); - - mutex_unlock(&i915->drm.struct_mutex); - - /* - * Assert that we successfully flushed all the work and - * reset the GPU back to its idle, low power state. - */ - GEM_BUG_ON(i915->gt.awake); - flush_work(&i915->gem.idle_work); - - cancel_delayed_work_sync(&i915->gpu_error.hangcheck_work); - - i915_gem_drain_freed_objects(i915); - - intel_uc_suspend(i915); -} - -void i915_gem_suspend_late(struct drm_i915_private *i915) -{ - struct drm_i915_gem_object *obj; - struct list_head *phases[] = { - &i915->mm.unbound_list, - &i915->mm.bound_list, - NULL - }, **phase; - - /* - * Neither the BIOS, ourselves or any other kernel - * expects the system to be in execlists mode on startup, - * so we need to reset the GPU back to legacy mode. And the only - * known way to disable logical contexts is through a GPU reset. - * - * So in order to leave the system in a known default configuration, - * always reset the GPU upon unload and suspend. Afterwards we then - * clean up the GEM state tracking, flushing off the requests and - * leaving the system in a known idle state. - * - * Note that is of the upmost importance that the GPU is idle and - * all stray writes are flushed *before* we dismantle the backing - * storage for the pinned objects. - * - * However, since we are uncertain that resetting the GPU on older - * machines is a good idea, we don't - just in case it leaves the - * machine in an unusable condition. - */ - - mutex_lock(&i915->drm.struct_mutex); - for (phase = phases; *phase; phase++) { - list_for_each_entry(obj, *phase, mm.link) - WARN_ON(i915_gem_object_set_to_gtt_domain(obj, false)); - } - mutex_unlock(&i915->drm.struct_mutex); - - intel_uc_sanitize(i915); - i915_gem_sanitize(i915); -} - -void i915_gem_resume(struct drm_i915_private *i915) -{ - GEM_TRACE("\n"); - - WARN_ON(i915->gt.awake); - - mutex_lock(&i915->drm.struct_mutex); - intel_uncore_forcewake_get(&i915->uncore, FORCEWAKE_ALL); - - i915_gem_restore_gtt_mappings(i915); - i915_gem_restore_fences(i915); - - /* - * As we didn't flush the kernel context before suspend, we cannot - * guarantee that the context image is complete. So let's just reset - * it and start again. - */ - intel_gt_resume(i915); - - if (i915_gem_init_hw(i915)) - goto err_wedged; - - intel_uc_resume(i915); - - /* Always reload a context for powersaving. */ - if (!i915_gem_load_power_context(i915)) - goto err_wedged; - -out_unlock: - intel_uncore_forcewake_put(&i915->uncore, FORCEWAKE_ALL); - mutex_unlock(&i915->drm.struct_mutex); - return; - -err_wedged: - if (!i915_reset_failed(i915)) { - dev_err(i915->drm.dev, - "Failed to re-initialize GPU, declaring it wedged!\n"); - i915_gem_set_wedged(i915); - } - goto out_unlock; -} - -void i915_gem_init__pm(struct drm_i915_private *i915) -{ - INIT_WORK(&i915->gem.idle_work, idle_work_handler); - INIT_DELAYED_WORK(&i915->gem.retire_work, retire_work_handler); - - i915->gem.pm_notifier.notifier_call = pm_notifier; - blocking_notifier_chain_register(&i915->gt.pm_notifications, - &i915->gem.pm_notifier); -} diff --git a/drivers/gpu/drm/i915/i915_gem_pm.h b/drivers/gpu/drm/i915/i915_gem_pm.h deleted file mode 100644 index 6f7d5d11ac3b..000000000000 --- a/drivers/gpu/drm/i915/i915_gem_pm.h +++ /dev/null @@ -1,25 +0,0 @@ -/* - * SPDX-License-Identifier: MIT - * - * Copyright © 2019 Intel Corporation - */ - -#ifndef __I915_GEM_PM_H__ -#define __I915_GEM_PM_H__ - -#include - -struct drm_i915_private; -struct work_struct; - -void i915_gem_init__pm(struct drm_i915_private *i915); - -bool i915_gem_load_power_context(struct drm_i915_private *i915); -void i915_gem_resume(struct drm_i915_private *i915); - -void i915_gem_idle_work_handler(struct work_struct *work); - -void i915_gem_suspend(struct drm_i915_private *i915); -void i915_gem_suspend_late(struct drm_i915_private *i915); - -#endif /* __I915_GEM_PM_H__ */ diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c deleted file mode 100644 index 2c7aefb3e101..000000000000 --- a/drivers/gpu/drm/i915/i915_gem_shrinker.c +++ /dev/null @@ -1,574 +0,0 @@ -/* - * Copyright © 2008-2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "i915_drv.h" -#include "i915_trace.h" - -static bool shrinker_lock(struct drm_i915_private *i915, - unsigned int flags, - bool *unlock) -{ - struct mutex *m = &i915->drm.struct_mutex; - - switch (mutex_trylock_recursive(m)) { - case MUTEX_TRYLOCK_RECURSIVE: - *unlock = false; - return true; - - case MUTEX_TRYLOCK_FAILED: - *unlock = false; - if (flags & I915_SHRINK_ACTIVE && - mutex_lock_killable_nested(m, I915_MM_SHRINKER) == 0) - *unlock = true; - return *unlock; - - case MUTEX_TRYLOCK_SUCCESS: - *unlock = true; - return true; - } - - BUG(); -} - -static void shrinker_unlock(struct drm_i915_private *i915, bool unlock) -{ - if (!unlock) - return; - - mutex_unlock(&i915->drm.struct_mutex); -} - -static bool swap_available(void) -{ - return get_nr_swap_pages() > 0; -} - -static bool can_release_pages(struct drm_i915_gem_object *obj) -{ - /* Consider only shrinkable ojects. */ - if (!i915_gem_object_is_shrinkable(obj)) - return false; - - /* Only report true if by unbinding the object and putting its pages - * we can actually make forward progress towards freeing physical - * pages. - * - * If the pages are pinned for any other reason than being bound - * to the GPU, simply unbinding from the GPU is not going to succeed - * in releasing our pin count on the pages themselves. - */ - if (atomic_read(&obj->mm.pages_pin_count) > obj->bind_count) - return false; - - /* If any vma are "permanently" pinned, it will prevent us from - * reclaiming the obj->mm.pages. We only allow scanout objects to claim - * a permanent pin, along with a few others like the context objects. - * To simplify the scan, and to avoid walking the list of vma under the - * object, we just check the count of its permanently pinned. - */ - if (READ_ONCE(obj->pin_global)) - return false; - - /* We can only return physical pages to the system if we can either - * discard the contents (because the user has marked them as being - * purgeable) or if we can move their contents out to swap. - */ - return swap_available() || obj->mm.madv == I915_MADV_DONTNEED; -} - -static bool unsafe_drop_pages(struct drm_i915_gem_object *obj) -{ - if (i915_gem_object_unbind(obj) == 0) - __i915_gem_object_put_pages(obj, I915_MM_SHRINKER); - return !i915_gem_object_has_pages(obj); -} - -static void try_to_writeback(struct drm_i915_gem_object *obj, - unsigned int flags) -{ - switch (obj->mm.madv) { - case I915_MADV_DONTNEED: - i915_gem_object_truncate(obj); - case __I915_MADV_PURGED: - return; - } - - if (flags & I915_SHRINK_WRITEBACK) - i915_gem_object_writeback(obj); -} - -/** - * i915_gem_shrink - Shrink buffer object caches - * @i915: i915 device - * @target: amount of memory to make available, in pages - * @nr_scanned: optional output for number of pages scanned (incremental) - * @flags: control flags for selecting cache types - * - * This function is the main interface to the shrinker. It will try to release - * up to @target pages of main memory backing storage from buffer objects. - * Selection of the specific caches can be done with @flags. This is e.g. useful - * when purgeable objects should be removed from caches preferentially. - * - * Note that it's not guaranteed that released amount is actually available as - * free system memory - the pages might still be in-used to due to other reasons - * (like cpu mmaps) or the mm core has reused them before we could grab them. - * Therefore code that needs to explicitly shrink buffer objects caches (e.g. to - * avoid deadlocks in memory reclaim) must fall back to i915_gem_shrink_all(). - * - * Also note that any kind of pinning (both per-vma address space pins and - * backing storage pins at the buffer object level) result in the shrinker code - * having to skip the object. - * - * Returns: - * The number of pages of backing storage actually released. - */ -unsigned long -i915_gem_shrink(struct drm_i915_private *i915, - unsigned long target, - unsigned long *nr_scanned, - unsigned flags) -{ - const struct { - struct list_head *list; - unsigned int bit; - } phases[] = { - { &i915->mm.unbound_list, I915_SHRINK_UNBOUND }, - { &i915->mm.bound_list, I915_SHRINK_BOUND }, - { NULL, 0 }, - }, *phase; - intel_wakeref_t wakeref = 0; - unsigned long count = 0; - unsigned long scanned = 0; - bool unlock; - - if (!shrinker_lock(i915, flags, &unlock)) - return 0; - - /* - * When shrinking the active list, also consider active contexts. - * Active contexts are pinned until they are retired, and so can - * not be simply unbound to retire and unpin their pages. To shrink - * the contexts, we must wait until the gpu is idle. - * - * We don't care about errors here; if we cannot wait upon the GPU, - * we will free as much as we can and hope to get a second chance. - */ - if (flags & I915_SHRINK_ACTIVE) - i915_gem_wait_for_idle(i915, - I915_WAIT_LOCKED, - MAX_SCHEDULE_TIMEOUT); - - trace_i915_gem_shrink(i915, target, flags); - i915_retire_requests(i915); - - /* - * Unbinding of objects will require HW access; Let us not wake the - * device just to recover a little memory. If absolutely necessary, - * we will force the wake during oom-notifier. - */ - if (flags & I915_SHRINK_BOUND) { - wakeref = intel_runtime_pm_get_if_in_use(i915); - if (!wakeref) - flags &= ~I915_SHRINK_BOUND; - } - - /* - * As we may completely rewrite the (un)bound list whilst unbinding - * (due to retiring requests) we have to strictly process only - * one element of the list at the time, and recheck the list - * on every iteration. - * - * In particular, we must hold a reference whilst removing the - * object as we may end up waiting for and/or retiring the objects. - * This might release the final reference (held by the active list) - * and result in the object being freed from under us. This is - * similar to the precautions the eviction code must take whilst - * removing objects. - * - * Also note that although these lists do not hold a reference to - * the object we can safely grab one here: The final object - * unreferencing and the bound_list are both protected by the - * dev->struct_mutex and so we won't ever be able to observe an - * object on the bound_list with a reference count equals 0. - */ - for (phase = phases; phase->list; phase++) { - struct list_head still_in_list; - struct drm_i915_gem_object *obj; - - if ((flags & phase->bit) == 0) - continue; - - INIT_LIST_HEAD(&still_in_list); - - /* - * We serialize our access to unreferenced objects through - * the use of the struct_mutex. While the objects are not - * yet freed (due to RCU then a workqueue) we still want - * to be able to shrink their pages, so they remain on - * the unbound/bound list until actually freed. - */ - spin_lock(&i915->mm.obj_lock); - while (count < target && - (obj = list_first_entry_or_null(phase->list, - typeof(*obj), - mm.link))) { - list_move_tail(&obj->mm.link, &still_in_list); - - if (flags & I915_SHRINK_PURGEABLE && - obj->mm.madv != I915_MADV_DONTNEED) - continue; - - if (flags & I915_SHRINK_VMAPS && - !is_vmalloc_addr(obj->mm.mapping)) - continue; - - if (!(flags & I915_SHRINK_ACTIVE) && - (i915_gem_object_is_active(obj) || - i915_gem_object_is_framebuffer(obj))) - continue; - - if (!can_release_pages(obj)) - continue; - - spin_unlock(&i915->mm.obj_lock); - - if (unsafe_drop_pages(obj)) { - /* May arrive from get_pages on another bo */ - mutex_lock_nested(&obj->mm.lock, - I915_MM_SHRINKER); - if (!i915_gem_object_has_pages(obj)) { - try_to_writeback(obj, flags); - count += obj->base.size >> PAGE_SHIFT; - } - mutex_unlock(&obj->mm.lock); - } - scanned += obj->base.size >> PAGE_SHIFT; - - spin_lock(&i915->mm.obj_lock); - } - list_splice_tail(&still_in_list, phase->list); - spin_unlock(&i915->mm.obj_lock); - } - - if (flags & I915_SHRINK_BOUND) - intel_runtime_pm_put(i915, wakeref); - - i915_retire_requests(i915); - - shrinker_unlock(i915, unlock); - - if (nr_scanned) - *nr_scanned += scanned; - return count; -} - -/** - * i915_gem_shrink_all - Shrink buffer object caches completely - * @i915: i915 device - * - * This is a simple wraper around i915_gem_shrink() to aggressively shrink all - * caches completely. It also first waits for and retires all outstanding - * requests to also be able to release backing storage for active objects. - * - * This should only be used in code to intentionally quiescent the gpu or as a - * last-ditch effort when memory seems to have run out. - * - * Returns: - * The number of pages of backing storage actually released. - */ -unsigned long i915_gem_shrink_all(struct drm_i915_private *i915) -{ - intel_wakeref_t wakeref; - unsigned long freed = 0; - - with_intel_runtime_pm(i915, wakeref) { - freed = i915_gem_shrink(i915, -1UL, NULL, - I915_SHRINK_BOUND | - I915_SHRINK_UNBOUND | - I915_SHRINK_ACTIVE); - } - - return freed; -} - -static unsigned long -i915_gem_shrinker_count(struct shrinker *shrinker, struct shrink_control *sc) -{ - struct drm_i915_private *i915 = - container_of(shrinker, struct drm_i915_private, mm.shrinker); - struct drm_i915_gem_object *obj; - unsigned long num_objects = 0; - unsigned long count = 0; - - spin_lock(&i915->mm.obj_lock); - list_for_each_entry(obj, &i915->mm.unbound_list, mm.link) - if (can_release_pages(obj)) { - count += obj->base.size >> PAGE_SHIFT; - num_objects++; - } - - list_for_each_entry(obj, &i915->mm.bound_list, mm.link) - if (!i915_gem_object_is_active(obj) && can_release_pages(obj)) { - count += obj->base.size >> PAGE_SHIFT; - num_objects++; - } - spin_unlock(&i915->mm.obj_lock); - - /* Update our preferred vmscan batch size for the next pass. - * Our rough guess for an effective batch size is roughly 2 - * available GEM objects worth of pages. That is we don't want - * the shrinker to fire, until it is worth the cost of freeing an - * entire GEM object. - */ - if (num_objects) { - unsigned long avg = 2 * count / num_objects; - - i915->mm.shrinker.batch = - max((i915->mm.shrinker.batch + avg) >> 1, - 128ul /* default SHRINK_BATCH */); - } - - return count; -} - -static unsigned long -i915_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc) -{ - struct drm_i915_private *i915 = - container_of(shrinker, struct drm_i915_private, mm.shrinker); - unsigned long freed; - bool unlock; - - sc->nr_scanned = 0; - - if (!shrinker_lock(i915, 0, &unlock)) - return SHRINK_STOP; - - freed = i915_gem_shrink(i915, - sc->nr_to_scan, - &sc->nr_scanned, - I915_SHRINK_BOUND | - I915_SHRINK_UNBOUND | - I915_SHRINK_PURGEABLE | - I915_SHRINK_WRITEBACK); - if (sc->nr_scanned < sc->nr_to_scan) - freed += i915_gem_shrink(i915, - sc->nr_to_scan - sc->nr_scanned, - &sc->nr_scanned, - I915_SHRINK_BOUND | - I915_SHRINK_UNBOUND | - I915_SHRINK_WRITEBACK); - if (sc->nr_scanned < sc->nr_to_scan && current_is_kswapd()) { - intel_wakeref_t wakeref; - - with_intel_runtime_pm(i915, wakeref) { - freed += i915_gem_shrink(i915, - sc->nr_to_scan - sc->nr_scanned, - &sc->nr_scanned, - I915_SHRINK_ACTIVE | - I915_SHRINK_BOUND | - I915_SHRINK_UNBOUND | - I915_SHRINK_WRITEBACK); - } - } - - shrinker_unlock(i915, unlock); - - return sc->nr_scanned ? freed : SHRINK_STOP; -} - -static int -i915_gem_shrinker_oom(struct notifier_block *nb, unsigned long event, void *ptr) -{ - struct drm_i915_private *i915 = - container_of(nb, struct drm_i915_private, mm.oom_notifier); - struct drm_i915_gem_object *obj; - unsigned long unevictable, bound, unbound, freed_pages; - intel_wakeref_t wakeref; - - freed_pages = 0; - with_intel_runtime_pm(i915, wakeref) - freed_pages += i915_gem_shrink(i915, -1UL, NULL, - I915_SHRINK_BOUND | - I915_SHRINK_UNBOUND | - I915_SHRINK_WRITEBACK); - - /* Because we may be allocating inside our own driver, we cannot - * assert that there are no objects with pinned pages that are not - * being pointed to by hardware. - */ - unbound = bound = unevictable = 0; - spin_lock(&i915->mm.obj_lock); - list_for_each_entry(obj, &i915->mm.unbound_list, mm.link) { - if (!can_release_pages(obj)) - unevictable += obj->base.size >> PAGE_SHIFT; - else - unbound += obj->base.size >> PAGE_SHIFT; - } - list_for_each_entry(obj, &i915->mm.bound_list, mm.link) { - if (!can_release_pages(obj)) - unevictable += obj->base.size >> PAGE_SHIFT; - else - bound += obj->base.size >> PAGE_SHIFT; - } - spin_unlock(&i915->mm.obj_lock); - - if (freed_pages || unbound || bound) - pr_info("Purging GPU memory, %lu pages freed, " - "%lu pages still pinned.\n", - freed_pages, unevictable); - - *(unsigned long *)ptr += freed_pages; - return NOTIFY_DONE; -} - -static int -i915_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr) -{ - struct drm_i915_private *i915 = - container_of(nb, struct drm_i915_private, mm.vmap_notifier); - struct i915_vma *vma, *next; - unsigned long freed_pages = 0; - intel_wakeref_t wakeref; - bool unlock; - - if (!shrinker_lock(i915, 0, &unlock)) - return NOTIFY_DONE; - - /* Force everything onto the inactive lists */ - if (i915_gem_wait_for_idle(i915, - I915_WAIT_LOCKED, - MAX_SCHEDULE_TIMEOUT)) - goto out; - - with_intel_runtime_pm(i915, wakeref) - freed_pages += i915_gem_shrink(i915, -1UL, NULL, - I915_SHRINK_BOUND | - I915_SHRINK_UNBOUND | - I915_SHRINK_VMAPS); - - /* We also want to clear any cached iomaps as they wrap vmap */ - mutex_lock(&i915->ggtt.vm.mutex); - list_for_each_entry_safe(vma, next, - &i915->ggtt.vm.bound_list, vm_link) { - unsigned long count = vma->node.size >> PAGE_SHIFT; - - if (!vma->iomap || i915_vma_is_active(vma)) - continue; - - mutex_unlock(&i915->ggtt.vm.mutex); - if (i915_vma_unbind(vma) == 0) - freed_pages += count; - mutex_lock(&i915->ggtt.vm.mutex); - } - mutex_unlock(&i915->ggtt.vm.mutex); - -out: - shrinker_unlock(i915, unlock); - - *(unsigned long *)ptr += freed_pages; - return NOTIFY_DONE; -} - -/** - * i915_gem_shrinker_register - Register the i915 shrinker - * @i915: i915 device - * - * This function registers and sets up the i915 shrinker and OOM handler. - */ -void i915_gem_shrinker_register(struct drm_i915_private *i915) -{ - i915->mm.shrinker.scan_objects = i915_gem_shrinker_scan; - i915->mm.shrinker.count_objects = i915_gem_shrinker_count; - i915->mm.shrinker.seeks = DEFAULT_SEEKS; - i915->mm.shrinker.batch = 4096; - WARN_ON(register_shrinker(&i915->mm.shrinker)); - - i915->mm.oom_notifier.notifier_call = i915_gem_shrinker_oom; - WARN_ON(register_oom_notifier(&i915->mm.oom_notifier)); - - i915->mm.vmap_notifier.notifier_call = i915_gem_shrinker_vmap; - WARN_ON(register_vmap_purge_notifier(&i915->mm.vmap_notifier)); -} - -/** - * i915_gem_shrinker_unregister - Unregisters the i915 shrinker - * @i915: i915 device - * - * This function unregisters the i915 shrinker and OOM handler. - */ -void i915_gem_shrinker_unregister(struct drm_i915_private *i915) -{ - WARN_ON(unregister_vmap_purge_notifier(&i915->mm.vmap_notifier)); - WARN_ON(unregister_oom_notifier(&i915->mm.oom_notifier)); - unregister_shrinker(&i915->mm.shrinker); -} - -void i915_gem_shrinker_taints_mutex(struct drm_i915_private *i915, - struct mutex *mutex) -{ - bool unlock = false; - - if (!IS_ENABLED(CONFIG_LOCKDEP)) - return; - - if (!lockdep_is_held_type(&i915->drm.struct_mutex, -1)) { - mutex_acquire(&i915->drm.struct_mutex.dep_map, - I915_MM_NORMAL, 0, _RET_IP_); - unlock = true; - } - - fs_reclaim_acquire(GFP_KERNEL); - - /* - * As we invariably rely on the struct_mutex within the shrinker, - * but have a complicated recursion dance, taint all the mutexes used - * within the shrinker with the struct_mutex. For completeness, we - * taint with all subclass of struct_mutex, even though we should - * only need tainting by I915_MM_NORMAL to catch possible ABBA - * deadlocks from using struct_mutex inside @mutex. - */ - mutex_acquire(&i915->drm.struct_mutex.dep_map, - I915_MM_SHRINKER, 0, _RET_IP_); - - mutex_acquire(&mutex->dep_map, 0, 0, _RET_IP_); - mutex_release(&mutex->dep_map, 0, _RET_IP_); - - mutex_release(&i915->drm.struct_mutex.dep_map, 0, _RET_IP_); - - fs_reclaim_release(GFP_KERNEL); - - if (unlock) - mutex_release(&i915->drm.struct_mutex.dep_map, 0, _RET_IP_); -} diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c deleted file mode 100644 index 0a8082cfc761..000000000000 --- a/drivers/gpu/drm/i915/i915_gem_stolen.c +++ /dev/null @@ -1,721 +0,0 @@ -/* - * Copyright © 2008-2012 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - * Authors: - * Eric Anholt - * Chris Wilson - * - */ - -#include -#include "i915_drv.h" - -/* - * The BIOS typically reserves some of the system's memory for the exclusive - * use of the integrated graphics. This memory is no longer available for - * use by the OS and so the user finds that his system has less memory - * available than he put in. We refer to this memory as stolen. - * - * The BIOS will allocate its framebuffer from the stolen memory. Our - * goal is try to reuse that object for our own fbcon which must always - * be available for panics. Anything else we can reuse the stolen memory - * for is a boon. - */ - -int i915_gem_stolen_insert_node_in_range(struct drm_i915_private *dev_priv, - struct drm_mm_node *node, u64 size, - unsigned alignment, u64 start, u64 end) -{ - int ret; - - if (!drm_mm_initialized(&dev_priv->mm.stolen)) - return -ENODEV; - - /* WaSkipStolenMemoryFirstPage:bdw+ */ - if (INTEL_GEN(dev_priv) >= 8 && start < 4096) - start = 4096; - - mutex_lock(&dev_priv->mm.stolen_lock); - ret = drm_mm_insert_node_in_range(&dev_priv->mm.stolen, node, - size, alignment, 0, - start, end, DRM_MM_INSERT_BEST); - mutex_unlock(&dev_priv->mm.stolen_lock); - - return ret; -} - -int i915_gem_stolen_insert_node(struct drm_i915_private *dev_priv, - struct drm_mm_node *node, u64 size, - unsigned alignment) -{ - return i915_gem_stolen_insert_node_in_range(dev_priv, node, size, - alignment, 0, U64_MAX); -} - -void i915_gem_stolen_remove_node(struct drm_i915_private *dev_priv, - struct drm_mm_node *node) -{ - mutex_lock(&dev_priv->mm.stolen_lock); - drm_mm_remove_node(node); - mutex_unlock(&dev_priv->mm.stolen_lock); -} - -static int i915_adjust_stolen(struct drm_i915_private *dev_priv, - struct resource *dsm) -{ - struct i915_ggtt *ggtt = &dev_priv->ggtt; - struct resource *r; - - if (dsm->start == 0 || dsm->end <= dsm->start) - return -EINVAL; - - /* - * TODO: We have yet too encounter the case where the GTT wasn't at the - * end of stolen. With that assumption we could simplify this. - */ - - /* Make sure we don't clobber the GTT if it's within stolen memory */ - if (INTEL_GEN(dev_priv) <= 4 && - !IS_G33(dev_priv) && !IS_PINEVIEW(dev_priv) && !IS_G4X(dev_priv)) { - struct resource stolen[2] = {*dsm, *dsm}; - struct resource ggtt_res; - resource_size_t ggtt_start; - - ggtt_start = I915_READ(PGTBL_CTL); - if (IS_GEN(dev_priv, 4)) - ggtt_start = (ggtt_start & PGTBL_ADDRESS_LO_MASK) | - (ggtt_start & PGTBL_ADDRESS_HI_MASK) << 28; - else - ggtt_start &= PGTBL_ADDRESS_LO_MASK; - - ggtt_res = - (struct resource) DEFINE_RES_MEM(ggtt_start, - ggtt_total_entries(ggtt) * 4); - - if (ggtt_res.start >= stolen[0].start && ggtt_res.start < stolen[0].end) - stolen[0].end = ggtt_res.start; - if (ggtt_res.end > stolen[1].start && ggtt_res.end <= stolen[1].end) - stolen[1].start = ggtt_res.end; - - /* Pick the larger of the two chunks */ - if (resource_size(&stolen[0]) > resource_size(&stolen[1])) - *dsm = stolen[0]; - else - *dsm = stolen[1]; - - if (stolen[0].start != stolen[1].start || - stolen[0].end != stolen[1].end) { - DRM_DEBUG_DRIVER("GTT within stolen memory at %pR\n", &ggtt_res); - DRM_DEBUG_DRIVER("Stolen memory adjusted to %pR\n", dsm); - } - } - - /* - * Verify that nothing else uses this physical address. Stolen - * memory should be reserved by the BIOS and hidden from the - * kernel. So if the region is already marked as busy, something - * is seriously wrong. - */ - r = devm_request_mem_region(dev_priv->drm.dev, dsm->start, - resource_size(dsm), - "Graphics Stolen Memory"); - if (r == NULL) { - /* - * One more attempt but this time requesting region from - * start + 1, as we have seen that this resolves the region - * conflict with the PCI Bus. - * This is a BIOS w/a: Some BIOS wrap stolen in the root - * PCI bus, but have an off-by-one error. Hence retry the - * reservation starting from 1 instead of 0. - * There's also BIOS with off-by-one on the other end. - */ - r = devm_request_mem_region(dev_priv->drm.dev, dsm->start + 1, - resource_size(dsm) - 2, - "Graphics Stolen Memory"); - /* - * GEN3 firmware likes to smash pci bridges into the stolen - * range. Apparently this works. - */ - if (r == NULL && !IS_GEN(dev_priv, 3)) { - DRM_ERROR("conflict detected with stolen region: %pR\n", - dsm); - - return -EBUSY; - } - } - - return 0; -} - -void i915_gem_cleanup_stolen(struct drm_i915_private *dev_priv) -{ - if (!drm_mm_initialized(&dev_priv->mm.stolen)) - return; - - drm_mm_takedown(&dev_priv->mm.stolen); -} - -static void g4x_get_stolen_reserved(struct drm_i915_private *dev_priv, - resource_size_t *base, - resource_size_t *size) -{ - u32 reg_val = I915_READ(IS_GM45(dev_priv) ? - CTG_STOLEN_RESERVED : - ELK_STOLEN_RESERVED); - resource_size_t stolen_top = dev_priv->dsm.end + 1; - - DRM_DEBUG_DRIVER("%s_STOLEN_RESERVED = %08x\n", - IS_GM45(dev_priv) ? "CTG" : "ELK", reg_val); - - if ((reg_val & G4X_STOLEN_RESERVED_ENABLE) == 0) - return; - - /* - * Whether ILK really reuses the ELK register for this is unclear. - * Let's see if we catch anyone with this supposedly enabled on ILK. - */ - WARN(IS_GEN(dev_priv, 5), "ILK stolen reserved found? 0x%08x\n", - reg_val); - - if (!(reg_val & G4X_STOLEN_RESERVED_ADDR2_MASK)) - return; - - *base = (reg_val & G4X_STOLEN_RESERVED_ADDR2_MASK) << 16; - WARN_ON((reg_val & G4X_STOLEN_RESERVED_ADDR1_MASK) < *base); - - *size = stolen_top - *base; -} - -static void gen6_get_stolen_reserved(struct drm_i915_private *dev_priv, - resource_size_t *base, - resource_size_t *size) -{ - u32 reg_val = I915_READ(GEN6_STOLEN_RESERVED); - - DRM_DEBUG_DRIVER("GEN6_STOLEN_RESERVED = %08x\n", reg_val); - - if (!(reg_val & GEN6_STOLEN_RESERVED_ENABLE)) - return; - - *base = reg_val & GEN6_STOLEN_RESERVED_ADDR_MASK; - - switch (reg_val & GEN6_STOLEN_RESERVED_SIZE_MASK) { - case GEN6_STOLEN_RESERVED_1M: - *size = 1024 * 1024; - break; - case GEN6_STOLEN_RESERVED_512K: - *size = 512 * 1024; - break; - case GEN6_STOLEN_RESERVED_256K: - *size = 256 * 1024; - break; - case GEN6_STOLEN_RESERVED_128K: - *size = 128 * 1024; - break; - default: - *size = 1024 * 1024; - MISSING_CASE(reg_val & GEN6_STOLEN_RESERVED_SIZE_MASK); - } -} - -static void vlv_get_stolen_reserved(struct drm_i915_private *dev_priv, - resource_size_t *base, - resource_size_t *size) -{ - u32 reg_val = I915_READ(GEN6_STOLEN_RESERVED); - resource_size_t stolen_top = dev_priv->dsm.end + 1; - - DRM_DEBUG_DRIVER("GEN6_STOLEN_RESERVED = %08x\n", reg_val); - - if (!(reg_val & GEN6_STOLEN_RESERVED_ENABLE)) - return; - - switch (reg_val & GEN7_STOLEN_RESERVED_SIZE_MASK) { - default: - MISSING_CASE(reg_val & GEN7_STOLEN_RESERVED_SIZE_MASK); - /* fall through */ - case GEN7_STOLEN_RESERVED_1M: - *size = 1024 * 1024; - break; - } - - /* - * On vlv, the ADDR_MASK portion is left as 0 and HW deduces the - * reserved location as (top - size). - */ - *base = stolen_top - *size; -} - -static void gen7_get_stolen_reserved(struct drm_i915_private *dev_priv, - resource_size_t *base, - resource_size_t *size) -{ - u32 reg_val = I915_READ(GEN6_STOLEN_RESERVED); - - DRM_DEBUG_DRIVER("GEN6_STOLEN_RESERVED = %08x\n", reg_val); - - if (!(reg_val & GEN6_STOLEN_RESERVED_ENABLE)) - return; - - *base = reg_val & GEN7_STOLEN_RESERVED_ADDR_MASK; - - switch (reg_val & GEN7_STOLEN_RESERVED_SIZE_MASK) { - case GEN7_STOLEN_RESERVED_1M: - *size = 1024 * 1024; - break; - case GEN7_STOLEN_RESERVED_256K: - *size = 256 * 1024; - break; - default: - *size = 1024 * 1024; - MISSING_CASE(reg_val & GEN7_STOLEN_RESERVED_SIZE_MASK); - } -} - -static void chv_get_stolen_reserved(struct drm_i915_private *dev_priv, - resource_size_t *base, - resource_size_t *size) -{ - u32 reg_val = I915_READ(GEN6_STOLEN_RESERVED); - - DRM_DEBUG_DRIVER("GEN6_STOLEN_RESERVED = %08x\n", reg_val); - - if (!(reg_val & GEN6_STOLEN_RESERVED_ENABLE)) - return; - - *base = reg_val & GEN6_STOLEN_RESERVED_ADDR_MASK; - - switch (reg_val & GEN8_STOLEN_RESERVED_SIZE_MASK) { - case GEN8_STOLEN_RESERVED_1M: - *size = 1024 * 1024; - break; - case GEN8_STOLEN_RESERVED_2M: - *size = 2 * 1024 * 1024; - break; - case GEN8_STOLEN_RESERVED_4M: - *size = 4 * 1024 * 1024; - break; - case GEN8_STOLEN_RESERVED_8M: - *size = 8 * 1024 * 1024; - break; - default: - *size = 8 * 1024 * 1024; - MISSING_CASE(reg_val & GEN8_STOLEN_RESERVED_SIZE_MASK); - } -} - -static void bdw_get_stolen_reserved(struct drm_i915_private *dev_priv, - resource_size_t *base, - resource_size_t *size) -{ - u32 reg_val = I915_READ(GEN6_STOLEN_RESERVED); - resource_size_t stolen_top = dev_priv->dsm.end + 1; - - DRM_DEBUG_DRIVER("GEN6_STOLEN_RESERVED = %08x\n", reg_val); - - if (!(reg_val & GEN6_STOLEN_RESERVED_ENABLE)) - return; - - if (!(reg_val & GEN6_STOLEN_RESERVED_ADDR_MASK)) - return; - - *base = reg_val & GEN6_STOLEN_RESERVED_ADDR_MASK; - *size = stolen_top - *base; -} - -static void icl_get_stolen_reserved(struct drm_i915_private *dev_priv, - resource_size_t *base, - resource_size_t *size) -{ - u64 reg_val = I915_READ64(GEN6_STOLEN_RESERVED); - - DRM_DEBUG_DRIVER("GEN6_STOLEN_RESERVED = 0x%016llx\n", reg_val); - - *base = reg_val & GEN11_STOLEN_RESERVED_ADDR_MASK; - - switch (reg_val & GEN8_STOLEN_RESERVED_SIZE_MASK) { - case GEN8_STOLEN_RESERVED_1M: - *size = 1024 * 1024; - break; - case GEN8_STOLEN_RESERVED_2M: - *size = 2 * 1024 * 1024; - break; - case GEN8_STOLEN_RESERVED_4M: - *size = 4 * 1024 * 1024; - break; - case GEN8_STOLEN_RESERVED_8M: - *size = 8 * 1024 * 1024; - break; - default: - *size = 8 * 1024 * 1024; - MISSING_CASE(reg_val & GEN8_STOLEN_RESERVED_SIZE_MASK); - } -} - -int i915_gem_init_stolen(struct drm_i915_private *dev_priv) -{ - resource_size_t reserved_base, stolen_top; - resource_size_t reserved_total, reserved_size; - - mutex_init(&dev_priv->mm.stolen_lock); - - if (intel_vgpu_active(dev_priv)) { - DRM_INFO("iGVT-g active, disabling use of stolen memory\n"); - return 0; - } - - if (intel_vtd_active() && INTEL_GEN(dev_priv) < 8) { - DRM_INFO("DMAR active, disabling use of stolen memory\n"); - return 0; - } - - if (resource_size(&intel_graphics_stolen_res) == 0) - return 0; - - dev_priv->dsm = intel_graphics_stolen_res; - - if (i915_adjust_stolen(dev_priv, &dev_priv->dsm)) - return 0; - - GEM_BUG_ON(dev_priv->dsm.start == 0); - GEM_BUG_ON(dev_priv->dsm.end <= dev_priv->dsm.start); - - stolen_top = dev_priv->dsm.end + 1; - reserved_base = stolen_top; - reserved_size = 0; - - switch (INTEL_GEN(dev_priv)) { - case 2: - case 3: - break; - case 4: - if (!IS_G4X(dev_priv)) - break; - /* fall through */ - case 5: - g4x_get_stolen_reserved(dev_priv, - &reserved_base, &reserved_size); - break; - case 6: - gen6_get_stolen_reserved(dev_priv, - &reserved_base, &reserved_size); - break; - case 7: - if (IS_VALLEYVIEW(dev_priv)) - vlv_get_stolen_reserved(dev_priv, - &reserved_base, &reserved_size); - else - gen7_get_stolen_reserved(dev_priv, - &reserved_base, &reserved_size); - break; - case 8: - case 9: - case 10: - if (IS_LP(dev_priv)) - chv_get_stolen_reserved(dev_priv, - &reserved_base, &reserved_size); - else - bdw_get_stolen_reserved(dev_priv, - &reserved_base, &reserved_size); - break; - case 11: - default: - icl_get_stolen_reserved(dev_priv, &reserved_base, - &reserved_size); - break; - } - - /* - * Our expectation is that the reserved space is at the top of the - * stolen region and *never* at the bottom. If we see !reserved_base, - * it likely means we failed to read the registers correctly. - */ - if (!reserved_base) { - DRM_ERROR("inconsistent reservation %pa + %pa; ignoring\n", - &reserved_base, &reserved_size); - reserved_base = stolen_top; - reserved_size = 0; - } - - dev_priv->dsm_reserved = - (struct resource) DEFINE_RES_MEM(reserved_base, reserved_size); - - if (!resource_contains(&dev_priv->dsm, &dev_priv->dsm_reserved)) { - DRM_ERROR("Stolen reserved area %pR outside stolen memory %pR\n", - &dev_priv->dsm_reserved, &dev_priv->dsm); - return 0; - } - - /* It is possible for the reserved area to end before the end of stolen - * memory, so just consider the start. */ - reserved_total = stolen_top - reserved_base; - - DRM_DEBUG_DRIVER("Memory reserved for graphics device: %lluK, usable: %lluK\n", - (u64)resource_size(&dev_priv->dsm) >> 10, - ((u64)resource_size(&dev_priv->dsm) - reserved_total) >> 10); - - dev_priv->stolen_usable_size = - resource_size(&dev_priv->dsm) - reserved_total; - - /* Basic memrange allocator for stolen space. */ - drm_mm_init(&dev_priv->mm.stolen, 0, dev_priv->stolen_usable_size); - - return 0; -} - -static struct sg_table * -i915_pages_create_for_stolen(struct drm_device *dev, - resource_size_t offset, resource_size_t size) -{ - struct drm_i915_private *dev_priv = to_i915(dev); - struct sg_table *st; - struct scatterlist *sg; - - GEM_BUG_ON(range_overflows(offset, size, resource_size(&dev_priv->dsm))); - - /* We hide that we have no struct page backing our stolen object - * by wrapping the contiguous physical allocation with a fake - * dma mapping in a single scatterlist. - */ - - st = kmalloc(sizeof(*st), GFP_KERNEL); - if (st == NULL) - return ERR_PTR(-ENOMEM); - - if (sg_alloc_table(st, 1, GFP_KERNEL)) { - kfree(st); - return ERR_PTR(-ENOMEM); - } - - sg = st->sgl; - sg->offset = 0; - sg->length = size; - - sg_dma_address(sg) = (dma_addr_t)dev_priv->dsm.start + offset; - sg_dma_len(sg) = size; - - return st; -} - -static int i915_gem_object_get_pages_stolen(struct drm_i915_gem_object *obj) -{ - struct sg_table *pages = - i915_pages_create_for_stolen(obj->base.dev, - obj->stolen->start, - obj->stolen->size); - if (IS_ERR(pages)) - return PTR_ERR(pages); - - __i915_gem_object_set_pages(obj, pages, obj->stolen->size); - - return 0; -} - -static void i915_gem_object_put_pages_stolen(struct drm_i915_gem_object *obj, - struct sg_table *pages) -{ - /* Should only be called from i915_gem_object_release_stolen() */ - sg_free_table(pages); - kfree(pages); -} - -static void -i915_gem_object_release_stolen(struct drm_i915_gem_object *obj) -{ - struct drm_i915_private *dev_priv = to_i915(obj->base.dev); - struct drm_mm_node *stolen = fetch_and_zero(&obj->stolen); - - GEM_BUG_ON(!stolen); - - __i915_gem_object_unpin_pages(obj); - - i915_gem_stolen_remove_node(dev_priv, stolen); - kfree(stolen); -} - -static const struct drm_i915_gem_object_ops i915_gem_object_stolen_ops = { - .get_pages = i915_gem_object_get_pages_stolen, - .put_pages = i915_gem_object_put_pages_stolen, - .release = i915_gem_object_release_stolen, -}; - -static struct drm_i915_gem_object * -_i915_gem_object_create_stolen(struct drm_i915_private *dev_priv, - struct drm_mm_node *stolen) -{ - struct drm_i915_gem_object *obj; - unsigned int cache_level; - - obj = i915_gem_object_alloc(); - if (obj == NULL) - return NULL; - - drm_gem_private_object_init(&dev_priv->drm, &obj->base, stolen->size); - i915_gem_object_init(obj, &i915_gem_object_stolen_ops); - - obj->stolen = stolen; - obj->read_domains = I915_GEM_DOMAIN_CPU | I915_GEM_DOMAIN_GTT; - cache_level = HAS_LLC(dev_priv) ? I915_CACHE_LLC : I915_CACHE_NONE; - i915_gem_object_set_cache_coherency(obj, cache_level); - - if (i915_gem_object_pin_pages(obj)) - goto cleanup; - - return obj; - -cleanup: - i915_gem_object_free(obj); - return NULL; -} - -struct drm_i915_gem_object * -i915_gem_object_create_stolen(struct drm_i915_private *dev_priv, - resource_size_t size) -{ - struct drm_i915_gem_object *obj; - struct drm_mm_node *stolen; - int ret; - - if (!drm_mm_initialized(&dev_priv->mm.stolen)) - return NULL; - - if (size == 0) - return NULL; - - stolen = kzalloc(sizeof(*stolen), GFP_KERNEL); - if (!stolen) - return NULL; - - ret = i915_gem_stolen_insert_node(dev_priv, stolen, size, 4096); - if (ret) { - kfree(stolen); - return NULL; - } - - obj = _i915_gem_object_create_stolen(dev_priv, stolen); - if (obj) - return obj; - - i915_gem_stolen_remove_node(dev_priv, stolen); - kfree(stolen); - return NULL; -} - -struct drm_i915_gem_object * -i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *dev_priv, - resource_size_t stolen_offset, - resource_size_t gtt_offset, - resource_size_t size) -{ - struct i915_ggtt *ggtt = &dev_priv->ggtt; - struct drm_i915_gem_object *obj; - struct drm_mm_node *stolen; - struct i915_vma *vma; - int ret; - - if (!drm_mm_initialized(&dev_priv->mm.stolen)) - return NULL; - - lockdep_assert_held(&dev_priv->drm.struct_mutex); - - DRM_DEBUG_DRIVER("creating preallocated stolen object: stolen_offset=%pa, gtt_offset=%pa, size=%pa\n", - &stolen_offset, >t_offset, &size); - - /* KISS and expect everything to be page-aligned */ - if (WARN_ON(size == 0) || - WARN_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE)) || - WARN_ON(!IS_ALIGNED(stolen_offset, I915_GTT_MIN_ALIGNMENT))) - return NULL; - - stolen = kzalloc(sizeof(*stolen), GFP_KERNEL); - if (!stolen) - return NULL; - - stolen->start = stolen_offset; - stolen->size = size; - mutex_lock(&dev_priv->mm.stolen_lock); - ret = drm_mm_reserve_node(&dev_priv->mm.stolen, stolen); - mutex_unlock(&dev_priv->mm.stolen_lock); - if (ret) { - DRM_DEBUG_DRIVER("failed to allocate stolen space\n"); - kfree(stolen); - return NULL; - } - - obj = _i915_gem_object_create_stolen(dev_priv, stolen); - if (obj == NULL) { - DRM_DEBUG_DRIVER("failed to allocate stolen object\n"); - i915_gem_stolen_remove_node(dev_priv, stolen); - kfree(stolen); - return NULL; - } - - /* Some objects just need physical mem from stolen space */ - if (gtt_offset == I915_GTT_OFFSET_NONE) - return obj; - - ret = i915_gem_object_pin_pages(obj); - if (ret) - goto err; - - vma = i915_vma_instance(obj, &ggtt->vm, NULL); - if (IS_ERR(vma)) { - ret = PTR_ERR(vma); - goto err_pages; - } - - /* To simplify the initialisation sequence between KMS and GTT, - * we allow construction of the stolen object prior to - * setting up the GTT space. The actual reservation will occur - * later. - */ - ret = i915_gem_gtt_reserve(&ggtt->vm, &vma->node, - size, gtt_offset, obj->cache_level, - 0); - if (ret) { - DRM_DEBUG_DRIVER("failed to allocate stolen GTT space\n"); - goto err_pages; - } - - GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); - - vma->pages = obj->mm.pages; - vma->flags |= I915_VMA_GLOBAL_BIND; - __i915_vma_set_map_and_fenceable(vma); - - mutex_lock(&ggtt->vm.mutex); - list_move_tail(&vma->vm_link, &ggtt->vm.bound_list); - mutex_unlock(&ggtt->vm.mutex); - - spin_lock(&dev_priv->mm.obj_lock); - list_move_tail(&obj->mm.link, &dev_priv->mm.bound_list); - obj->bind_count++; - spin_unlock(&dev_priv->mm.obj_lock); - - return obj; - -err_pages: - i915_gem_object_unpin_pages(obj); -err: - i915_gem_object_put(obj); - return NULL; -} diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c deleted file mode 100644 index 86d6d92ccbc9..000000000000 --- a/drivers/gpu/drm/i915/i915_gem_tiling.c +++ /dev/null @@ -1,460 +0,0 @@ -/* - * Copyright © 2008 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - * Authors: - * Eric Anholt - * - */ - -#include -#include -#include - -#include "gem/i915_gem_ioctls.h" - -#include "i915_drv.h" - -/** - * DOC: buffer object tiling - * - * i915_gem_set_tiling_ioctl() and i915_gem_get_tiling_ioctl() is the userspace - * interface to declare fence register requirements. - * - * In principle GEM doesn't care at all about the internal data layout of an - * object, and hence it also doesn't care about tiling or swizzling. There's two - * exceptions: - * - * - For X and Y tiling the hardware provides detilers for CPU access, so called - * fences. Since there's only a limited amount of them the kernel must manage - * these, and therefore userspace must tell the kernel the object tiling if it - * wants to use fences for detiling. - * - On gen3 and gen4 platforms have a swizzling pattern for tiled objects which - * depends upon the physical page frame number. When swapping such objects the - * page frame number might change and the kernel must be able to fix this up - * and hence now the tiling. Note that on a subset of platforms with - * asymmetric memory channel population the swizzling pattern changes in an - * unknown way, and for those the kernel simply forbids swapping completely. - * - * Since neither of this applies for new tiling layouts on modern platforms like - * W, Ys and Yf tiling GEM only allows object tiling to be set to X or Y tiled. - * Anything else can be handled in userspace entirely without the kernel's - * invovlement. - */ - -/** - * i915_gem_fence_size - required global GTT size for a fence - * @i915: i915 device - * @size: object size - * @tiling: tiling mode - * @stride: tiling stride - * - * Return the required global GTT size for a fence (view of a tiled object), - * taking into account potential fence register mapping. - */ -u32 i915_gem_fence_size(struct drm_i915_private *i915, - u32 size, unsigned int tiling, unsigned int stride) -{ - u32 ggtt_size; - - GEM_BUG_ON(!size); - - if (tiling == I915_TILING_NONE) - return size; - - GEM_BUG_ON(!stride); - - if (INTEL_GEN(i915) >= 4) { - stride *= i915_gem_tile_height(tiling); - GEM_BUG_ON(!IS_ALIGNED(stride, I965_FENCE_PAGE)); - return roundup(size, stride); - } - - /* Previous chips need a power-of-two fence region when tiling */ - if (IS_GEN(i915, 3)) - ggtt_size = 1024*1024; - else - ggtt_size = 512*1024; - - while (ggtt_size < size) - ggtt_size <<= 1; - - return ggtt_size; -} - -/** - * i915_gem_fence_alignment - required global GTT alignment for a fence - * @i915: i915 device - * @size: object size - * @tiling: tiling mode - * @stride: tiling stride - * - * Return the required global GTT alignment for a fence (a view of a tiled - * object), taking into account potential fence register mapping. - */ -u32 i915_gem_fence_alignment(struct drm_i915_private *i915, u32 size, - unsigned int tiling, unsigned int stride) -{ - GEM_BUG_ON(!size); - - /* - * Minimum alignment is 4k (GTT page size), but might be greater - * if a fence register is needed for the object. - */ - if (tiling == I915_TILING_NONE) - return I915_GTT_MIN_ALIGNMENT; - - if (INTEL_GEN(i915) >= 4) - return I965_FENCE_PAGE; - - /* - * Previous chips need to be aligned to the size of the smallest - * fence register that can contain the object. - */ - return i915_gem_fence_size(i915, size, tiling, stride); -} - -/* Check pitch constriants for all chips & tiling formats */ -static bool -i915_tiling_ok(struct drm_i915_gem_object *obj, - unsigned int tiling, unsigned int stride) -{ - struct drm_i915_private *i915 = to_i915(obj->base.dev); - unsigned int tile_width; - - /* Linear is always fine */ - if (tiling == I915_TILING_NONE) - return true; - - if (tiling > I915_TILING_LAST) - return false; - - /* check maximum stride & object size */ - /* i965+ stores the end address of the gtt mapping in the fence - * reg, so dont bother to check the size */ - if (INTEL_GEN(i915) >= 7) { - if (stride / 128 > GEN7_FENCE_MAX_PITCH_VAL) - return false; - } else if (INTEL_GEN(i915) >= 4) { - if (stride / 128 > I965_FENCE_MAX_PITCH_VAL) - return false; - } else { - if (stride > 8192) - return false; - - if (!is_power_of_2(stride)) - return false; - } - - if (IS_GEN(i915, 2) || - (tiling == I915_TILING_Y && HAS_128_BYTE_Y_TILING(i915))) - tile_width = 128; - else - tile_width = 512; - - if (!stride || !IS_ALIGNED(stride, tile_width)) - return false; - - return true; -} - -static bool i915_vma_fence_prepare(struct i915_vma *vma, - int tiling_mode, unsigned int stride) -{ - struct drm_i915_private *i915 = vma->vm->i915; - u32 size, alignment; - - if (!i915_vma_is_map_and_fenceable(vma)) - return true; - - size = i915_gem_fence_size(i915, vma->size, tiling_mode, stride); - if (vma->node.size < size) - return false; - - alignment = i915_gem_fence_alignment(i915, vma->size, tiling_mode, stride); - if (!IS_ALIGNED(vma->node.start, alignment)) - return false; - - return true; -} - -/* Make the current GTT allocation valid for the change in tiling. */ -static int -i915_gem_object_fence_prepare(struct drm_i915_gem_object *obj, - int tiling_mode, unsigned int stride) -{ - struct i915_vma *vma; - int ret; - - if (tiling_mode == I915_TILING_NONE) - return 0; - - for_each_ggtt_vma(vma, obj) { - if (i915_vma_fence_prepare(vma, tiling_mode, stride)) - continue; - - ret = i915_vma_unbind(vma); - if (ret) - return ret; - } - - return 0; -} - -int -i915_gem_object_set_tiling(struct drm_i915_gem_object *obj, - unsigned int tiling, unsigned int stride) -{ - struct drm_i915_private *i915 = to_i915(obj->base.dev); - struct i915_vma *vma; - int err; - - /* Make sure we don't cross-contaminate obj->tiling_and_stride */ - BUILD_BUG_ON(I915_TILING_LAST & STRIDE_MASK); - - GEM_BUG_ON(!i915_tiling_ok(obj, tiling, stride)); - GEM_BUG_ON(!stride ^ (tiling == I915_TILING_NONE)); - lockdep_assert_held(&i915->drm.struct_mutex); - - if ((tiling | stride) == obj->tiling_and_stride) - return 0; - - if (i915_gem_object_is_framebuffer(obj)) - return -EBUSY; - - /* We need to rebind the object if its current allocation - * no longer meets the alignment restrictions for its new - * tiling mode. Otherwise we can just leave it alone, but - * need to ensure that any fence register is updated before - * the next fenced (either through the GTT or by the BLT unit - * on older GPUs) access. - * - * After updating the tiling parameters, we then flag whether - * we need to update an associated fence register. Note this - * has to also include the unfenced register the GPU uses - * whilst executing a fenced command for an untiled object. - */ - - err = i915_gem_object_fence_prepare(obj, tiling, stride); - if (err) - return err; - - i915_gem_object_lock(obj); - if (i915_gem_object_is_framebuffer(obj)) { - i915_gem_object_unlock(obj); - return -EBUSY; - } - - /* If the memory has unknown (i.e. varying) swizzling, we pin the - * pages to prevent them being swapped out and causing corruption - * due to the change in swizzling. - */ - mutex_lock(&obj->mm.lock); - if (i915_gem_object_has_pages(obj) && - obj->mm.madv == I915_MADV_WILLNEED && - i915->quirks & QUIRK_PIN_SWIZZLED_PAGES) { - if (tiling == I915_TILING_NONE) { - GEM_BUG_ON(!obj->mm.quirked); - __i915_gem_object_unpin_pages(obj); - obj->mm.quirked = false; - } - if (!i915_gem_object_is_tiled(obj)) { - GEM_BUG_ON(obj->mm.quirked); - __i915_gem_object_pin_pages(obj); - obj->mm.quirked = true; - } - } - mutex_unlock(&obj->mm.lock); - - for_each_ggtt_vma(vma, obj) { - vma->fence_size = - i915_gem_fence_size(i915, vma->size, tiling, stride); - vma->fence_alignment = - i915_gem_fence_alignment(i915, - vma->size, tiling, stride); - - if (vma->fence) - vma->fence->dirty = true; - } - - obj->tiling_and_stride = tiling | stride; - i915_gem_object_unlock(obj); - - /* Force the fence to be reacquired for GTT access */ - i915_gem_object_release_mmap(obj); - - /* Try to preallocate memory required to save swizzling on put-pages */ - if (i915_gem_object_needs_bit17_swizzle(obj)) { - if (!obj->bit_17) { - obj->bit_17 = bitmap_zalloc(obj->base.size >> PAGE_SHIFT, - GFP_KERNEL); - } - } else { - bitmap_free(obj->bit_17); - obj->bit_17 = NULL; - } - - return 0; -} - -/** - * i915_gem_set_tiling_ioctl - IOCTL handler to set tiling mode - * @dev: DRM device - * @data: data pointer for the ioctl - * @file: DRM file for the ioctl call - * - * Sets the tiling mode of an object, returning the required swizzling of - * bit 6 of addresses in the object. - * - * Called by the user via ioctl. - * - * Returns: - * Zero on success, negative errno on failure. - */ -int -i915_gem_set_tiling_ioctl(struct drm_device *dev, void *data, - struct drm_file *file) -{ - struct drm_i915_gem_set_tiling *args = data; - struct drm_i915_gem_object *obj; - int err; - - obj = i915_gem_object_lookup(file, args->handle); - if (!obj) - return -ENOENT; - - /* - * The tiling mode of proxy objects is handled by its generator, and - * not allowed to be changed by userspace. - */ - if (i915_gem_object_is_proxy(obj)) { - err = -ENXIO; - goto err; - } - - if (!i915_tiling_ok(obj, args->tiling_mode, args->stride)) { - err = -EINVAL; - goto err; - } - - if (args->tiling_mode == I915_TILING_NONE) { - args->swizzle_mode = I915_BIT_6_SWIZZLE_NONE; - args->stride = 0; - } else { - if (args->tiling_mode == I915_TILING_X) - args->swizzle_mode = to_i915(dev)->mm.bit_6_swizzle_x; - else - args->swizzle_mode = to_i915(dev)->mm.bit_6_swizzle_y; - - /* Hide bit 17 swizzling from the user. This prevents old Mesa - * from aborting the application on sw fallbacks to bit 17, - * and we use the pread/pwrite bit17 paths to swizzle for it. - * If there was a user that was relying on the swizzle - * information for drm_intel_bo_map()ed reads/writes this would - * break it, but we don't have any of those. - */ - if (args->swizzle_mode == I915_BIT_6_SWIZZLE_9_17) - args->swizzle_mode = I915_BIT_6_SWIZZLE_9; - if (args->swizzle_mode == I915_BIT_6_SWIZZLE_9_10_17) - args->swizzle_mode = I915_BIT_6_SWIZZLE_9_10; - - /* If we can't handle the swizzling, make it untiled. */ - if (args->swizzle_mode == I915_BIT_6_SWIZZLE_UNKNOWN) { - args->tiling_mode = I915_TILING_NONE; - args->swizzle_mode = I915_BIT_6_SWIZZLE_NONE; - args->stride = 0; - } - } - - err = mutex_lock_interruptible(&dev->struct_mutex); - if (err) - goto err; - - err = i915_gem_object_set_tiling(obj, args->tiling_mode, args->stride); - mutex_unlock(&dev->struct_mutex); - - /* We have to maintain this existing ABI... */ - args->stride = i915_gem_object_get_stride(obj); - args->tiling_mode = i915_gem_object_get_tiling(obj); - -err: - i915_gem_object_put(obj); - return err; -} - -/** - * i915_gem_get_tiling_ioctl - IOCTL handler to get tiling mode - * @dev: DRM device - * @data: data pointer for the ioctl - * @file: DRM file for the ioctl call - * - * Returns the current tiling mode and required bit 6 swizzling for the object. - * - * Called by the user via ioctl. - * - * Returns: - * Zero on success, negative errno on failure. - */ -int -i915_gem_get_tiling_ioctl(struct drm_device *dev, void *data, - struct drm_file *file) -{ - struct drm_i915_gem_get_tiling *args = data; - struct drm_i915_private *dev_priv = to_i915(dev); - struct drm_i915_gem_object *obj; - int err = -ENOENT; - - rcu_read_lock(); - obj = i915_gem_object_lookup_rcu(file, args->handle); - if (obj) { - args->tiling_mode = - READ_ONCE(obj->tiling_and_stride) & TILING_MASK; - err = 0; - } - rcu_read_unlock(); - if (unlikely(err)) - return err; - - switch (args->tiling_mode) { - case I915_TILING_X: - args->swizzle_mode = dev_priv->mm.bit_6_swizzle_x; - break; - case I915_TILING_Y: - args->swizzle_mode = dev_priv->mm.bit_6_swizzle_y; - break; - default: - case I915_TILING_NONE: - args->swizzle_mode = I915_BIT_6_SWIZZLE_NONE; - break; - } - - /* Hide bit 17 from the user -- see comment in i915_gem_set_tiling */ - if (dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) - args->phys_swizzle_mode = I915_BIT_6_SWIZZLE_UNKNOWN; - else - args->phys_swizzle_mode = args->swizzle_mode; - if (args->swizzle_mode == I915_BIT_6_SWIZZLE_9_17) - args->swizzle_mode = I915_BIT_6_SWIZZLE_9; - if (args->swizzle_mode == I915_BIT_6_SWIZZLE_9_10_17) - args->swizzle_mode = I915_BIT_6_SWIZZLE_9_10; - - return 0; -} diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c deleted file mode 100644 index 2c1b6bb7a040..000000000000 --- a/drivers/gpu/drm/i915/i915_gem_userptr.c +++ /dev/null @@ -1,851 +0,0 @@ -/* - * Copyright © 2012-2014 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - */ - -#include -#include -#include -#include -#include - -#include - -#include "gem/i915_gem_ioctls.h" - -#include "i915_drv.h" -#include "i915_trace.h" -#include "intel_drv.h" - -struct i915_mm_struct { - struct mm_struct *mm; - struct drm_i915_private *i915; - struct i915_mmu_notifier *mn; - struct hlist_node node; - struct kref kref; - struct work_struct work; -}; - -#if defined(CONFIG_MMU_NOTIFIER) -#include - -struct i915_mmu_notifier { - spinlock_t lock; - struct hlist_node node; - struct mmu_notifier mn; - struct rb_root_cached objects; - struct i915_mm_struct *mm; -}; - -struct i915_mmu_object { - struct i915_mmu_notifier *mn; - struct drm_i915_gem_object *obj; - struct interval_tree_node it; -}; - -static void add_object(struct i915_mmu_object *mo) -{ - GEM_BUG_ON(!RB_EMPTY_NODE(&mo->it.rb)); - interval_tree_insert(&mo->it, &mo->mn->objects); -} - -static void del_object(struct i915_mmu_object *mo) -{ - if (RB_EMPTY_NODE(&mo->it.rb)) - return; - - interval_tree_remove(&mo->it, &mo->mn->objects); - RB_CLEAR_NODE(&mo->it.rb); -} - -static void -__i915_gem_userptr_set_active(struct drm_i915_gem_object *obj, bool value) -{ - struct i915_mmu_object *mo = obj->userptr.mmu_object; - - /* - * During mm_invalidate_range we need to cancel any userptr that - * overlaps the range being invalidated. Doing so requires the - * struct_mutex, and that risks recursion. In order to cause - * recursion, the user must alias the userptr address space with - * a GTT mmapping (possible with a MAP_FIXED) - then when we have - * to invalidate that mmaping, mm_invalidate_range is called with - * the userptr address *and* the struct_mutex held. To prevent that - * we set a flag under the i915_mmu_notifier spinlock to indicate - * whether this object is valid. - */ - if (!mo) - return; - - spin_lock(&mo->mn->lock); - if (value) - add_object(mo); - else - del_object(mo); - spin_unlock(&mo->mn->lock); -} - -static int -userptr_mn_invalidate_range_start(struct mmu_notifier *_mn, - const struct mmu_notifier_range *range) -{ - struct i915_mmu_notifier *mn = - container_of(_mn, struct i915_mmu_notifier, mn); - struct interval_tree_node *it; - struct mutex *unlock = NULL; - unsigned long end; - int ret = 0; - - if (RB_EMPTY_ROOT(&mn->objects.rb_root)) - return 0; - - /* interval ranges are inclusive, but invalidate range is exclusive */ - end = range->end - 1; - - spin_lock(&mn->lock); - it = interval_tree_iter_first(&mn->objects, range->start, end); - while (it) { - struct drm_i915_gem_object *obj; - - if (!mmu_notifier_range_blockable(range)) { - ret = -EAGAIN; - break; - } - - /* - * The mmu_object is released late when destroying the - * GEM object so it is entirely possible to gain a - * reference on an object in the process of being freed - * since our serialisation is via the spinlock and not - * the struct_mutex - and consequently use it after it - * is freed and then double free it. To prevent that - * use-after-free we only acquire a reference on the - * object if it is not in the process of being destroyed. - */ - obj = container_of(it, struct i915_mmu_object, it)->obj; - if (!kref_get_unless_zero(&obj->base.refcount)) { - it = interval_tree_iter_next(it, range->start, end); - continue; - } - spin_unlock(&mn->lock); - - if (!unlock) { - unlock = &mn->mm->i915->drm.struct_mutex; - - switch (mutex_trylock_recursive(unlock)) { - default: - case MUTEX_TRYLOCK_FAILED: - if (mutex_lock_killable_nested(unlock, I915_MM_SHRINKER)) { - i915_gem_object_put(obj); - return -EINTR; - } - /* fall through */ - case MUTEX_TRYLOCK_SUCCESS: - break; - - case MUTEX_TRYLOCK_RECURSIVE: - unlock = ERR_PTR(-EEXIST); - break; - } - } - - ret = i915_gem_object_unbind(obj); - if (ret == 0) - ret = __i915_gem_object_put_pages(obj, I915_MM_SHRINKER); - i915_gem_object_put(obj); - if (ret) - goto unlock; - - spin_lock(&mn->lock); - - /* - * As we do not (yet) protect the mmu from concurrent insertion - * over this range, there is no guarantee that this search will - * terminate given a pathologic workload. - */ - it = interval_tree_iter_first(&mn->objects, range->start, end); - } - spin_unlock(&mn->lock); - -unlock: - if (!IS_ERR_OR_NULL(unlock)) - mutex_unlock(unlock); - - return ret; - -} - -static const struct mmu_notifier_ops i915_gem_userptr_notifier = { - .invalidate_range_start = userptr_mn_invalidate_range_start, -}; - -static struct i915_mmu_notifier * -i915_mmu_notifier_create(struct i915_mm_struct *mm) -{ - struct i915_mmu_notifier *mn; - - mn = kmalloc(sizeof(*mn), GFP_KERNEL); - if (mn == NULL) - return ERR_PTR(-ENOMEM); - - spin_lock_init(&mn->lock); - mn->mn.ops = &i915_gem_userptr_notifier; - mn->objects = RB_ROOT_CACHED; - mn->mm = mm; - - return mn; -} - -static void -i915_gem_userptr_release__mmu_notifier(struct drm_i915_gem_object *obj) -{ - struct i915_mmu_object *mo; - - mo = fetch_and_zero(&obj->userptr.mmu_object); - if (!mo) - return; - - spin_lock(&mo->mn->lock); - del_object(mo); - spin_unlock(&mo->mn->lock); - kfree(mo); -} - -static struct i915_mmu_notifier * -i915_mmu_notifier_find(struct i915_mm_struct *mm) -{ - struct i915_mmu_notifier *mn; - int err = 0; - - mn = mm->mn; - if (mn) - return mn; - - mn = i915_mmu_notifier_create(mm); - if (IS_ERR(mn)) - err = PTR_ERR(mn); - - down_write(&mm->mm->mmap_sem); - mutex_lock(&mm->i915->mm_lock); - if (mm->mn == NULL && !err) { - /* Protected by mmap_sem (write-lock) */ - err = __mmu_notifier_register(&mn->mn, mm->mm); - if (!err) { - /* Protected by mm_lock */ - mm->mn = fetch_and_zero(&mn); - } - } else if (mm->mn) { - /* - * Someone else raced and successfully installed the mmu - * notifier, we can cancel our own errors. - */ - err = 0; - } - mutex_unlock(&mm->i915->mm_lock); - up_write(&mm->mm->mmap_sem); - - if (mn && !IS_ERR(mn)) - kfree(mn); - - return err ? ERR_PTR(err) : mm->mn; -} - -static int -i915_gem_userptr_init__mmu_notifier(struct drm_i915_gem_object *obj, - unsigned flags) -{ - struct i915_mmu_notifier *mn; - struct i915_mmu_object *mo; - - if (flags & I915_USERPTR_UNSYNCHRONIZED) - return capable(CAP_SYS_ADMIN) ? 0 : -EPERM; - - if (WARN_ON(obj->userptr.mm == NULL)) - return -EINVAL; - - mn = i915_mmu_notifier_find(obj->userptr.mm); - if (IS_ERR(mn)) - return PTR_ERR(mn); - - mo = kzalloc(sizeof(*mo), GFP_KERNEL); - if (!mo) - return -ENOMEM; - - mo->mn = mn; - mo->obj = obj; - mo->it.start = obj->userptr.ptr; - mo->it.last = obj->userptr.ptr + obj->base.size - 1; - RB_CLEAR_NODE(&mo->it.rb); - - obj->userptr.mmu_object = mo; - return 0; -} - -static void -i915_mmu_notifier_free(struct i915_mmu_notifier *mn, - struct mm_struct *mm) -{ - if (mn == NULL) - return; - - mmu_notifier_unregister(&mn->mn, mm); - kfree(mn); -} - -#else - -static void -__i915_gem_userptr_set_active(struct drm_i915_gem_object *obj, bool value) -{ -} - -static void -i915_gem_userptr_release__mmu_notifier(struct drm_i915_gem_object *obj) -{ -} - -static int -i915_gem_userptr_init__mmu_notifier(struct drm_i915_gem_object *obj, - unsigned flags) -{ - if ((flags & I915_USERPTR_UNSYNCHRONIZED) == 0) - return -ENODEV; - - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - - return 0; -} - -static void -i915_mmu_notifier_free(struct i915_mmu_notifier *mn, - struct mm_struct *mm) -{ -} - -#endif - -static struct i915_mm_struct * -__i915_mm_struct_find(struct drm_i915_private *dev_priv, struct mm_struct *real) -{ - struct i915_mm_struct *mm; - - /* Protected by dev_priv->mm_lock */ - hash_for_each_possible(dev_priv->mm_structs, mm, node, (unsigned long)real) - if (mm->mm == real) - return mm; - - return NULL; -} - -static int -i915_gem_userptr_init__mm_struct(struct drm_i915_gem_object *obj) -{ - struct drm_i915_private *dev_priv = to_i915(obj->base.dev); - struct i915_mm_struct *mm; - int ret = 0; - - /* During release of the GEM object we hold the struct_mutex. This - * precludes us from calling mmput() at that time as that may be - * the last reference and so call exit_mmap(). exit_mmap() will - * attempt to reap the vma, and if we were holding a GTT mmap - * would then call drm_gem_vm_close() and attempt to reacquire - * the struct mutex. So in order to avoid that recursion, we have - * to defer releasing the mm reference until after we drop the - * struct_mutex, i.e. we need to schedule a worker to do the clean - * up. - */ - mutex_lock(&dev_priv->mm_lock); - mm = __i915_mm_struct_find(dev_priv, current->mm); - if (mm == NULL) { - mm = kmalloc(sizeof(*mm), GFP_KERNEL); - if (mm == NULL) { - ret = -ENOMEM; - goto out; - } - - kref_init(&mm->kref); - mm->i915 = to_i915(obj->base.dev); - - mm->mm = current->mm; - mmgrab(current->mm); - - mm->mn = NULL; - - /* Protected by dev_priv->mm_lock */ - hash_add(dev_priv->mm_structs, - &mm->node, (unsigned long)mm->mm); - } else - kref_get(&mm->kref); - - obj->userptr.mm = mm; -out: - mutex_unlock(&dev_priv->mm_lock); - return ret; -} - -static void -__i915_mm_struct_free__worker(struct work_struct *work) -{ - struct i915_mm_struct *mm = container_of(work, typeof(*mm), work); - i915_mmu_notifier_free(mm->mn, mm->mm); - mmdrop(mm->mm); - kfree(mm); -} - -static void -__i915_mm_struct_free(struct kref *kref) -{ - struct i915_mm_struct *mm = container_of(kref, typeof(*mm), kref); - - /* Protected by dev_priv->mm_lock */ - hash_del(&mm->node); - mutex_unlock(&mm->i915->mm_lock); - - INIT_WORK(&mm->work, __i915_mm_struct_free__worker); - queue_work(mm->i915->mm.userptr_wq, &mm->work); -} - -static void -i915_gem_userptr_release__mm_struct(struct drm_i915_gem_object *obj) -{ - if (obj->userptr.mm == NULL) - return; - - kref_put_mutex(&obj->userptr.mm->kref, - __i915_mm_struct_free, - &to_i915(obj->base.dev)->mm_lock); - obj->userptr.mm = NULL; -} - -struct get_pages_work { - struct work_struct work; - struct drm_i915_gem_object *obj; - struct task_struct *task; -}; - -static struct sg_table * -__i915_gem_userptr_alloc_pages(struct drm_i915_gem_object *obj, - struct page **pvec, int num_pages) -{ - unsigned int max_segment = i915_sg_segment_size(); - struct sg_table *st; - unsigned int sg_page_sizes; - int ret; - - st = kmalloc(sizeof(*st), GFP_KERNEL); - if (!st) - return ERR_PTR(-ENOMEM); - -alloc_table: - ret = __sg_alloc_table_from_pages(st, pvec, num_pages, - 0, num_pages << PAGE_SHIFT, - max_segment, - GFP_KERNEL); - if (ret) { - kfree(st); - return ERR_PTR(ret); - } - - ret = i915_gem_gtt_prepare_pages(obj, st); - if (ret) { - sg_free_table(st); - - if (max_segment > PAGE_SIZE) { - max_segment = PAGE_SIZE; - goto alloc_table; - } - - kfree(st); - return ERR_PTR(ret); - } - - sg_page_sizes = i915_sg_page_sizes(st->sgl); - - __i915_gem_object_set_pages(obj, st, sg_page_sizes); - - return st; -} - -static void -__i915_gem_userptr_get_pages_worker(struct work_struct *_work) -{ - struct get_pages_work *work = container_of(_work, typeof(*work), work); - struct drm_i915_gem_object *obj = work->obj; - const int npages = obj->base.size >> PAGE_SHIFT; - struct page **pvec; - int pinned, ret; - - ret = -ENOMEM; - pinned = 0; - - pvec = kvmalloc_array(npages, sizeof(struct page *), GFP_KERNEL); - if (pvec != NULL) { - struct mm_struct *mm = obj->userptr.mm->mm; - unsigned int flags = 0; - - if (!i915_gem_object_is_readonly(obj)) - flags |= FOLL_WRITE; - - ret = -EFAULT; - if (mmget_not_zero(mm)) { - down_read(&mm->mmap_sem); - while (pinned < npages) { - ret = get_user_pages_remote - (work->task, mm, - obj->userptr.ptr + pinned * PAGE_SIZE, - npages - pinned, - flags, - pvec + pinned, NULL, NULL); - if (ret < 0) - break; - - pinned += ret; - } - up_read(&mm->mmap_sem); - mmput(mm); - } - } - - mutex_lock(&obj->mm.lock); - if (obj->userptr.work == &work->work) { - struct sg_table *pages = ERR_PTR(ret); - - if (pinned == npages) { - pages = __i915_gem_userptr_alloc_pages(obj, pvec, - npages); - if (!IS_ERR(pages)) { - pinned = 0; - pages = NULL; - } - } - - obj->userptr.work = ERR_CAST(pages); - if (IS_ERR(pages)) - __i915_gem_userptr_set_active(obj, false); - } - mutex_unlock(&obj->mm.lock); - - release_pages(pvec, pinned); - kvfree(pvec); - - i915_gem_object_put(obj); - put_task_struct(work->task); - kfree(work); -} - -static struct sg_table * -__i915_gem_userptr_get_pages_schedule(struct drm_i915_gem_object *obj) -{ - struct get_pages_work *work; - - /* Spawn a worker so that we can acquire the - * user pages without holding our mutex. Access - * to the user pages requires mmap_sem, and we have - * a strict lock ordering of mmap_sem, struct_mutex - - * we already hold struct_mutex here and so cannot - * call gup without encountering a lock inversion. - * - * Userspace will keep on repeating the operation - * (thanks to EAGAIN) until either we hit the fast - * path or the worker completes. If the worker is - * cancelled or superseded, the task is still run - * but the results ignored. (This leads to - * complications that we may have a stray object - * refcount that we need to be wary of when - * checking for existing objects during creation.) - * If the worker encounters an error, it reports - * that error back to this function through - * obj->userptr.work = ERR_PTR. - */ - work = kmalloc(sizeof(*work), GFP_KERNEL); - if (work == NULL) - return ERR_PTR(-ENOMEM); - - obj->userptr.work = &work->work; - - work->obj = i915_gem_object_get(obj); - - work->task = current; - get_task_struct(work->task); - - INIT_WORK(&work->work, __i915_gem_userptr_get_pages_worker); - queue_work(to_i915(obj->base.dev)->mm.userptr_wq, &work->work); - - return ERR_PTR(-EAGAIN); -} - -static int i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj) -{ - const int num_pages = obj->base.size >> PAGE_SHIFT; - struct mm_struct *mm = obj->userptr.mm->mm; - struct page **pvec; - struct sg_table *pages; - bool active; - int pinned; - - /* If userspace should engineer that these pages are replaced in - * the vma between us binding this page into the GTT and completion - * of rendering... Their loss. If they change the mapping of their - * pages they need to create a new bo to point to the new vma. - * - * However, that still leaves open the possibility of the vma - * being copied upon fork. Which falls under the same userspace - * synchronisation issue as a regular bo, except that this time - * the process may not be expecting that a particular piece of - * memory is tied to the GPU. - * - * Fortunately, we can hook into the mmu_notifier in order to - * discard the page references prior to anything nasty happening - * to the vma (discard or cloning) which should prevent the more - * egregious cases from causing harm. - */ - - if (obj->userptr.work) { - /* active flag should still be held for the pending work */ - if (IS_ERR(obj->userptr.work)) - return PTR_ERR(obj->userptr.work); - else - return -EAGAIN; - } - - pvec = NULL; - pinned = 0; - - if (mm == current->mm) { - pvec = kvmalloc_array(num_pages, sizeof(struct page *), - GFP_KERNEL | - __GFP_NORETRY | - __GFP_NOWARN); - if (pvec) /* defer to worker if malloc fails */ - pinned = __get_user_pages_fast(obj->userptr.ptr, - num_pages, - !i915_gem_object_is_readonly(obj), - pvec); - } - - active = false; - if (pinned < 0) { - pages = ERR_PTR(pinned); - pinned = 0; - } else if (pinned < num_pages) { - pages = __i915_gem_userptr_get_pages_schedule(obj); - active = pages == ERR_PTR(-EAGAIN); - } else { - pages = __i915_gem_userptr_alloc_pages(obj, pvec, num_pages); - active = !IS_ERR(pages); - } - if (active) - __i915_gem_userptr_set_active(obj, true); - - if (IS_ERR(pages)) - release_pages(pvec, pinned); - kvfree(pvec); - - return PTR_ERR_OR_ZERO(pages); -} - -static void -i915_gem_userptr_put_pages(struct drm_i915_gem_object *obj, - struct sg_table *pages) -{ - struct sgt_iter sgt_iter; - struct page *page; - - /* Cancel any inflight work and force them to restart their gup */ - obj->userptr.work = NULL; - __i915_gem_userptr_set_active(obj, false); - if (!pages) - return; - - __i915_gem_object_release_shmem(obj, pages, true); - i915_gem_gtt_finish_pages(obj, pages); - - for_each_sgt_page(page, sgt_iter, pages) { - if (obj->mm.dirty) - set_page_dirty(page); - - mark_page_accessed(page); - put_page(page); - } - obj->mm.dirty = false; - - sg_free_table(pages); - kfree(pages); -} - -static void -i915_gem_userptr_release(struct drm_i915_gem_object *obj) -{ - i915_gem_userptr_release__mmu_notifier(obj); - i915_gem_userptr_release__mm_struct(obj); -} - -static int -i915_gem_userptr_dmabuf_export(struct drm_i915_gem_object *obj) -{ - if (obj->userptr.mmu_object) - return 0; - - return i915_gem_userptr_init__mmu_notifier(obj, 0); -} - -static const struct drm_i915_gem_object_ops i915_gem_userptr_ops = { - .flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE | - I915_GEM_OBJECT_IS_SHRINKABLE | - I915_GEM_OBJECT_ASYNC_CANCEL, - .get_pages = i915_gem_userptr_get_pages, - .put_pages = i915_gem_userptr_put_pages, - .dmabuf_export = i915_gem_userptr_dmabuf_export, - .release = i915_gem_userptr_release, -}; - -/* - * Creates a new mm object that wraps some normal memory from the process - * context - user memory. - * - * We impose several restrictions upon the memory being mapped - * into the GPU. - * 1. It must be page aligned (both start/end addresses, i.e ptr and size). - * 2. It must be normal system memory, not a pointer into another map of IO - * space (e.g. it must not be a GTT mmapping of another object). - * 3. We only allow a bo as large as we could in theory map into the GTT, - * that is we limit the size to the total size of the GTT. - * 4. The bo is marked as being snoopable. The backing pages are left - * accessible directly by the CPU, but reads and writes by the GPU may - * incur the cost of a snoop (unless you have an LLC architecture). - * - * Synchronisation between multiple users and the GPU is left to userspace - * through the normal set-domain-ioctl. The kernel will enforce that the - * GPU relinquishes the VMA before it is returned back to the system - * i.e. upon free(), munmap() or process termination. However, the userspace - * malloc() library may not immediately relinquish the VMA after free() and - * instead reuse it whilst the GPU is still reading and writing to the VMA. - * Caveat emptor. - * - * Also note, that the object created here is not currently a "first class" - * object, in that several ioctls are banned. These are the CPU access - * ioctls: mmap(), pwrite and pread. In practice, you are expected to use - * direct access via your pointer rather than use those ioctls. Another - * restriction is that we do not allow userptr surfaces to be pinned to the - * hardware and so we reject any attempt to create a framebuffer out of a - * userptr. - * - * If you think this is a good interface to use to pass GPU memory between - * drivers, please use dma-buf instead. In fact, wherever possible use - * dma-buf instead. - */ -int -i915_gem_userptr_ioctl(struct drm_device *dev, - void *data, - struct drm_file *file) -{ - struct drm_i915_private *dev_priv = to_i915(dev); - struct drm_i915_gem_userptr *args = data; - struct drm_i915_gem_object *obj; - int ret; - u32 handle; - - if (!HAS_LLC(dev_priv) && !HAS_SNOOP(dev_priv)) { - /* We cannot support coherent userptr objects on hw without - * LLC and broken snooping. - */ - return -ENODEV; - } - - if (args->flags & ~(I915_USERPTR_READ_ONLY | - I915_USERPTR_UNSYNCHRONIZED)) - return -EINVAL; - - if (!args->user_size) - return -EINVAL; - - if (offset_in_page(args->user_ptr | args->user_size)) - return -EINVAL; - - if (!access_ok((char __user *)(unsigned long)args->user_ptr, args->user_size)) - return -EFAULT; - - if (args->flags & I915_USERPTR_READ_ONLY) { - struct i915_hw_ppgtt *ppgtt; - - /* - * On almost all of the older hw, we cannot tell the GPU that - * a page is readonly. - */ - ppgtt = dev_priv->kernel_context->ppgtt; - if (!ppgtt || !ppgtt->vm.has_read_only) - return -ENODEV; - } - - obj = i915_gem_object_alloc(); - if (obj == NULL) - return -ENOMEM; - - drm_gem_private_object_init(dev, &obj->base, args->user_size); - i915_gem_object_init(obj, &i915_gem_userptr_ops); - obj->read_domains = I915_GEM_DOMAIN_CPU; - obj->write_domain = I915_GEM_DOMAIN_CPU; - i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC); - - obj->userptr.ptr = args->user_ptr; - if (args->flags & I915_USERPTR_READ_ONLY) - i915_gem_object_set_readonly(obj); - - /* And keep a pointer to the current->mm for resolving the user pages - * at binding. This means that we need to hook into the mmu_notifier - * in order to detect if the mmu is destroyed. - */ - ret = i915_gem_userptr_init__mm_struct(obj); - if (ret == 0) - ret = i915_gem_userptr_init__mmu_notifier(obj, args->flags); - if (ret == 0) - ret = drm_gem_handle_create(file, &obj->base, &handle); - - /* drop reference from allocate - handle holds it now */ - i915_gem_object_put(obj); - if (ret) - return ret; - - args->handle = handle; - return 0; -} - -int i915_gem_init_userptr(struct drm_i915_private *dev_priv) -{ - mutex_init(&dev_priv->mm_lock); - hash_init(dev_priv->mm_structs); - - dev_priv->mm.userptr_wq = - alloc_workqueue("i915-userptr-acquire", - WQ_HIGHPRI | WQ_UNBOUND, - 0); - if (!dev_priv->mm.userptr_wq) - return -ENOMEM; - - return 0; -} - -void i915_gem_cleanup_userptr(struct drm_i915_private *dev_priv) -{ - destroy_workqueue(dev_priv->mm.userptr_wq); -} diff --git a/drivers/gpu/drm/i915/i915_gemfs.c b/drivers/gpu/drm/i915/i915_gemfs.c deleted file mode 100644 index 888b7d3f04c3..000000000000 --- a/drivers/gpu/drm/i915/i915_gemfs.c +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright © 2017 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - */ - -#include -#include -#include - -#include "i915_drv.h" -#include "i915_gemfs.h" - -int i915_gemfs_init(struct drm_i915_private *i915) -{ - struct file_system_type *type; - struct vfsmount *gemfs; - - type = get_fs_type("tmpfs"); - if (!type) - return -ENODEV; - - gemfs = kern_mount(type); - if (IS_ERR(gemfs)) - return PTR_ERR(gemfs); - - /* - * Enable huge-pages for objects that are at least HPAGE_PMD_SIZE, most - * likely 2M. Note that within_size may overallocate huge-pages, if say - * we allocate an object of size 2M + 4K, we may get 2M + 2M, but under - * memory pressure shmem should split any huge-pages which can be - * shrunk. - */ - - if (has_transparent_hugepage()) { - struct super_block *sb = gemfs->mnt_sb; - /* FIXME: Disabled until we get W/A for read BW issue. */ - char options[] = "huge=never"; - int flags = 0; - int err; - - err = sb->s_op->remount_fs(sb, &flags, options); - if (err) { - kern_unmount(gemfs); - return err; - } - } - - i915->mm.gemfs = gemfs; - - return 0; -} - -void i915_gemfs_fini(struct drm_i915_private *i915) -{ - kern_unmount(i915->mm.gemfs); -} diff --git a/drivers/gpu/drm/i915/i915_gemfs.h b/drivers/gpu/drm/i915/i915_gemfs.h deleted file mode 100644 index cca8bdc5b93e..000000000000 --- a/drivers/gpu/drm/i915/i915_gemfs.h +++ /dev/null @@ -1,34 +0,0 @@ -/* - * Copyright © 2017 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - */ - -#ifndef __I915_GEMFS_H__ -#define __I915_GEMFS_H__ - -struct drm_i915_private; - -int i915_gemfs_init(struct drm_i915_private *i915); - -void i915_gemfs_fini(struct drm_i915_private *i915); - -#endif diff --git a/drivers/gpu/drm/i915/i915_globals.c b/drivers/gpu/drm/i915/i915_globals.c index db52a58eadcc..2d5fcba98841 100644 --- a/drivers/gpu/drm/i915/i915_globals.c +++ b/drivers/gpu/drm/i915/i915_globals.c @@ -8,7 +8,7 @@ #include #include "i915_active.h" -#include "i915_gem_context.h" +#include "gem/i915_gem_context.h" #include "gem/i915_gem_object.h" #include "i915_globals.h" #include "i915_request.h" diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 4f85cbdddb0d..c86865a34972 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -36,6 +36,8 @@ #include +#include "gem/i915_gem_context.h" + #include "i915_drv.h" #include "i915_gpu_error.h" #include "intel_atomic.h" diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 379fd89a180f..2e33a9b4eae7 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -195,6 +195,8 @@ #include #include +#include "gem/i915_gem_context.h" +#include "gem/i915_gem_pm.h" #include "gt/intel_lrc_reg.h" #include "i915_drv.h" diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 18b34b0bf872..da1e6984a8cc 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -29,6 +29,9 @@ #include #include +#include "gem/i915_gem_context.h" +#include "gt/intel_context.h" + #include "i915_active.h" #include "i915_drv.h" #include "i915_globals.h" diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 6a063d3fccee..f454cf2450b5 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -45,7 +45,6 @@ #include #include "i915_drv.h" -#include "i915_gem_clflush.h" #include "i915_trace.h" #include "intel_acpi.h" #include "intel_atomic.h" diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c index ffdab22db2b0..a4f98ccef0fe 100644 --- a/drivers/gpu/drm/i915/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/intel_guc_submission.c @@ -26,6 +26,8 @@ #include "gt/intel_engine_pm.h" #include "gt/intel_lrc_reg.h" +#include "gt/intel_context.h" +#include "gem/i915_gem_context.h" #include "intel_guc_submission.h" #include "i915_drv.h" diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c index b64b45d9b538..80dcd879fc58 100644 --- a/drivers/gpu/drm/i915/intel_overlay.c +++ b/drivers/gpu/drm/i915/intel_overlay.c @@ -29,6 +29,8 @@ #include #include +#include "gem/i915_gem_pm.h" + #include "i915_drv.h" #include "i915_reg.h" #include "intel_drv.h" diff --git a/drivers/gpu/drm/i915/selftests/huge_gem_object.c b/drivers/gpu/drm/i915/selftests/huge_gem_object.c deleted file mode 100644 index 419fd4d6a8f0..000000000000 --- a/drivers/gpu/drm/i915/selftests/huge_gem_object.c +++ /dev/null @@ -1,139 +0,0 @@ -/* - * Copyright © 2016 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - */ - -#include "huge_gem_object.h" - -static void huge_free_pages(struct drm_i915_gem_object *obj, - struct sg_table *pages) -{ - unsigned long nreal = obj->scratch / PAGE_SIZE; - struct scatterlist *sg; - - for (sg = pages->sgl; sg && nreal--; sg = __sg_next(sg)) - __free_page(sg_page(sg)); - - sg_free_table(pages); - kfree(pages); -} - -static int huge_get_pages(struct drm_i915_gem_object *obj) -{ -#define GFP (GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY) - const unsigned long nreal = obj->scratch / PAGE_SIZE; - const unsigned long npages = obj->base.size / PAGE_SIZE; - struct scatterlist *sg, *src, *end; - struct sg_table *pages; - unsigned long n; - - pages = kmalloc(sizeof(*pages), GFP); - if (!pages) - return -ENOMEM; - - if (sg_alloc_table(pages, npages, GFP)) { - kfree(pages); - return -ENOMEM; - } - - sg = pages->sgl; - for (n = 0; n < nreal; n++) { - struct page *page; - - page = alloc_page(GFP | __GFP_HIGHMEM); - if (!page) { - sg_mark_end(sg); - goto err; - } - - sg_set_page(sg, page, PAGE_SIZE, 0); - sg = __sg_next(sg); - } - if (nreal < npages) { - for (end = sg, src = pages->sgl; sg; sg = __sg_next(sg)) { - sg_set_page(sg, sg_page(src), PAGE_SIZE, 0); - src = __sg_next(src); - if (src == end) - src = pages->sgl; - } - } - - if (i915_gem_gtt_prepare_pages(obj, pages)) - goto err; - - __i915_gem_object_set_pages(obj, pages, PAGE_SIZE); - - return 0; - -err: - huge_free_pages(obj, pages); - - return -ENOMEM; -#undef GFP -} - -static void huge_put_pages(struct drm_i915_gem_object *obj, - struct sg_table *pages) -{ - i915_gem_gtt_finish_pages(obj, pages); - huge_free_pages(obj, pages); - - obj->mm.dirty = false; -} - -static const struct drm_i915_gem_object_ops huge_ops = { - .flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE | - I915_GEM_OBJECT_IS_SHRINKABLE, - .get_pages = huge_get_pages, - .put_pages = huge_put_pages, -}; - -struct drm_i915_gem_object * -huge_gem_object(struct drm_i915_private *i915, - phys_addr_t phys_size, - dma_addr_t dma_size) -{ - struct drm_i915_gem_object *obj; - unsigned int cache_level; - - GEM_BUG_ON(!phys_size || phys_size > dma_size); - GEM_BUG_ON(!IS_ALIGNED(phys_size, PAGE_SIZE)); - GEM_BUG_ON(!IS_ALIGNED(dma_size, I915_GTT_PAGE_SIZE)); - - if (overflows_type(dma_size, obj->base.size)) - return ERR_PTR(-E2BIG); - - obj = i915_gem_object_alloc(); - if (!obj) - return ERR_PTR(-ENOMEM); - - drm_gem_private_object_init(&i915->drm, &obj->base, dma_size); - i915_gem_object_init(obj, &huge_ops); - - obj->read_domains = I915_GEM_DOMAIN_CPU; - obj->write_domain = I915_GEM_DOMAIN_CPU; - cache_level = HAS_LLC(i915) ? I915_CACHE_LLC : I915_CACHE_NONE; - i915_gem_object_set_cache_coherency(obj, cache_level); - obj->scratch = phys_size; - - return obj; -} diff --git a/drivers/gpu/drm/i915/selftests/huge_gem_object.h b/drivers/gpu/drm/i915/selftests/huge_gem_object.h deleted file mode 100644 index a6133a9e8029..000000000000 --- a/drivers/gpu/drm/i915/selftests/huge_gem_object.h +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Copyright © 2016 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - */ - -#ifndef __HUGE_GEM_OBJECT_H -#define __HUGE_GEM_OBJECT_H - -struct drm_i915_gem_object * -huge_gem_object(struct drm_i915_private *i915, - phys_addr_t phys_size, - dma_addr_t dma_size); - -static inline phys_addr_t -huge_gem_object_phys_size(struct drm_i915_gem_object *obj) -{ - return obj->scratch; -} - -static inline dma_addr_t -huge_gem_object_dma_size(struct drm_i915_gem_object *obj) -{ - return obj->base.size; -} - -#endif /* !__HUGE_GEM_OBJECT_H */ diff --git a/drivers/gpu/drm/i915/selftests/huge_pages.c b/drivers/gpu/drm/i915/selftests/huge_pages.c deleted file mode 100644 index b22b8249dfbd..000000000000 --- a/drivers/gpu/drm/i915/selftests/huge_pages.c +++ /dev/null @@ -1,1793 +0,0 @@ -/* - * Copyright © 2017 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - */ - -#include "../i915_selftest.h" - -#include - -#include "igt_gem_utils.h" -#include "mock_drm.h" -#include "i915_random.h" - -static const unsigned int page_sizes[] = { - I915_GTT_PAGE_SIZE_2M, - I915_GTT_PAGE_SIZE_64K, - I915_GTT_PAGE_SIZE_4K, -}; - -static unsigned int get_largest_page_size(struct drm_i915_private *i915, - u64 rem) -{ - int i; - - for (i = 0; i < ARRAY_SIZE(page_sizes); ++i) { - unsigned int page_size = page_sizes[i]; - - if (HAS_PAGE_SIZES(i915, page_size) && rem >= page_size) - return page_size; - } - - return 0; -} - -static void huge_pages_free_pages(struct sg_table *st) -{ - struct scatterlist *sg; - - for (sg = st->sgl; sg; sg = __sg_next(sg)) { - if (sg_page(sg)) - __free_pages(sg_page(sg), get_order(sg->length)); - } - - sg_free_table(st); - kfree(st); -} - -static int get_huge_pages(struct drm_i915_gem_object *obj) -{ -#define GFP (GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY) - unsigned int page_mask = obj->mm.page_mask; - struct sg_table *st; - struct scatterlist *sg; - unsigned int sg_page_sizes; - u64 rem; - - st = kmalloc(sizeof(*st), GFP); - if (!st) - return -ENOMEM; - - if (sg_alloc_table(st, obj->base.size >> PAGE_SHIFT, GFP)) { - kfree(st); - return -ENOMEM; - } - - rem = obj->base.size; - sg = st->sgl; - st->nents = 0; - sg_page_sizes = 0; - - /* - * Our goal here is simple, we want to greedily fill the object from - * largest to smallest page-size, while ensuring that we use *every* - * page-size as per the given page-mask. - */ - do { - unsigned int bit = ilog2(page_mask); - unsigned int page_size = BIT(bit); - int order = get_order(page_size); - - do { - struct page *page; - - GEM_BUG_ON(order >= MAX_ORDER); - page = alloc_pages(GFP | __GFP_ZERO, order); - if (!page) - goto err; - - sg_set_page(sg, page, page_size, 0); - sg_page_sizes |= page_size; - st->nents++; - - rem -= page_size; - if (!rem) { - sg_mark_end(sg); - break; - } - - sg = __sg_next(sg); - } while ((rem - ((page_size-1) & page_mask)) >= page_size); - - page_mask &= (page_size-1); - } while (page_mask); - - if (i915_gem_gtt_prepare_pages(obj, st)) - goto err; - - obj->mm.madv = I915_MADV_DONTNEED; - - GEM_BUG_ON(sg_page_sizes != obj->mm.page_mask); - __i915_gem_object_set_pages(obj, st, sg_page_sizes); - - return 0; - -err: - sg_set_page(sg, NULL, 0, 0); - sg_mark_end(sg); - huge_pages_free_pages(st); - - return -ENOMEM; -} - -static void put_huge_pages(struct drm_i915_gem_object *obj, - struct sg_table *pages) -{ - i915_gem_gtt_finish_pages(obj, pages); - huge_pages_free_pages(pages); - - obj->mm.dirty = false; - obj->mm.madv = I915_MADV_WILLNEED; -} - -static const struct drm_i915_gem_object_ops huge_page_ops = { - .flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE | - I915_GEM_OBJECT_IS_SHRINKABLE, - .get_pages = get_huge_pages, - .put_pages = put_huge_pages, -}; - -static struct drm_i915_gem_object * -huge_pages_object(struct drm_i915_private *i915, - u64 size, - unsigned int page_mask) -{ - struct drm_i915_gem_object *obj; - - GEM_BUG_ON(!size); - GEM_BUG_ON(!IS_ALIGNED(size, BIT(__ffs(page_mask)))); - - if (size >> PAGE_SHIFT > INT_MAX) - return ERR_PTR(-E2BIG); - - if (overflows_type(size, obj->base.size)) - return ERR_PTR(-E2BIG); - - obj = i915_gem_object_alloc(); - if (!obj) - return ERR_PTR(-ENOMEM); - - drm_gem_private_object_init(&i915->drm, &obj->base, size); - i915_gem_object_init(obj, &huge_page_ops); - - obj->write_domain = I915_GEM_DOMAIN_CPU; - obj->read_domains = I915_GEM_DOMAIN_CPU; - obj->cache_level = I915_CACHE_NONE; - - obj->mm.page_mask = page_mask; - - return obj; -} - -static int fake_get_huge_pages(struct drm_i915_gem_object *obj) -{ - struct drm_i915_private *i915 = to_i915(obj->base.dev); - const u64 max_len = rounddown_pow_of_two(UINT_MAX); - struct sg_table *st; - struct scatterlist *sg; - unsigned int sg_page_sizes; - u64 rem; - - st = kmalloc(sizeof(*st), GFP); - if (!st) - return -ENOMEM; - - if (sg_alloc_table(st, obj->base.size >> PAGE_SHIFT, GFP)) { - kfree(st); - return -ENOMEM; - } - - /* Use optimal page sized chunks to fill in the sg table */ - rem = obj->base.size; - sg = st->sgl; - st->nents = 0; - sg_page_sizes = 0; - do { - unsigned int page_size = get_largest_page_size(i915, rem); - unsigned int len = min(page_size * div_u64(rem, page_size), - max_len); - - GEM_BUG_ON(!page_size); - - sg->offset = 0; - sg->length = len; - sg_dma_len(sg) = len; - sg_dma_address(sg) = page_size; - - sg_page_sizes |= len; - - st->nents++; - - rem -= len; - if (!rem) { - sg_mark_end(sg); - break; - } - - sg = sg_next(sg); - } while (1); - - i915_sg_trim(st); - - obj->mm.madv = I915_MADV_DONTNEED; - - __i915_gem_object_set_pages(obj, st, sg_page_sizes); - - return 0; -} - -static int fake_get_huge_pages_single(struct drm_i915_gem_object *obj) -{ - struct drm_i915_private *i915 = to_i915(obj->base.dev); - struct sg_table *st; - struct scatterlist *sg; - unsigned int page_size; - - st = kmalloc(sizeof(*st), GFP); - if (!st) - return -ENOMEM; - - if (sg_alloc_table(st, 1, GFP)) { - kfree(st); - return -ENOMEM; - } - - sg = st->sgl; - st->nents = 1; - - page_size = get_largest_page_size(i915, obj->base.size); - GEM_BUG_ON(!page_size); - - sg->offset = 0; - sg->length = obj->base.size; - sg_dma_len(sg) = obj->base.size; - sg_dma_address(sg) = page_size; - - obj->mm.madv = I915_MADV_DONTNEED; - - __i915_gem_object_set_pages(obj, st, sg->length); - - return 0; -#undef GFP -} - -static void fake_free_huge_pages(struct drm_i915_gem_object *obj, - struct sg_table *pages) -{ - sg_free_table(pages); - kfree(pages); -} - -static void fake_put_huge_pages(struct drm_i915_gem_object *obj, - struct sg_table *pages) -{ - fake_free_huge_pages(obj, pages); - obj->mm.dirty = false; - obj->mm.madv = I915_MADV_WILLNEED; -} - -static const struct drm_i915_gem_object_ops fake_ops = { - .flags = I915_GEM_OBJECT_IS_SHRINKABLE, - .get_pages = fake_get_huge_pages, - .put_pages = fake_put_huge_pages, -}; - -static const struct drm_i915_gem_object_ops fake_ops_single = { - .flags = I915_GEM_OBJECT_IS_SHRINKABLE, - .get_pages = fake_get_huge_pages_single, - .put_pages = fake_put_huge_pages, -}; - -static struct drm_i915_gem_object * -fake_huge_pages_object(struct drm_i915_private *i915, u64 size, bool single) -{ - struct drm_i915_gem_object *obj; - - GEM_BUG_ON(!size); - GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE)); - - if (size >> PAGE_SHIFT > UINT_MAX) - return ERR_PTR(-E2BIG); - - if (overflows_type(size, obj->base.size)) - return ERR_PTR(-E2BIG); - - obj = i915_gem_object_alloc(); - if (!obj) - return ERR_PTR(-ENOMEM); - - drm_gem_private_object_init(&i915->drm, &obj->base, size); - - if (single) - i915_gem_object_init(obj, &fake_ops_single); - else - i915_gem_object_init(obj, &fake_ops); - - obj->write_domain = I915_GEM_DOMAIN_CPU; - obj->read_domains = I915_GEM_DOMAIN_CPU; - obj->cache_level = I915_CACHE_NONE; - - return obj; -} - -static int igt_check_page_sizes(struct i915_vma *vma) -{ - struct drm_i915_private *i915 = vma->vm->i915; - unsigned int supported = INTEL_INFO(i915)->page_sizes; - struct drm_i915_gem_object *obj = vma->obj; - int err = 0; - - if (!HAS_PAGE_SIZES(i915, vma->page_sizes.sg)) { - pr_err("unsupported page_sizes.sg=%u, supported=%u\n", - vma->page_sizes.sg & ~supported, supported); - err = -EINVAL; - } - - if (!HAS_PAGE_SIZES(i915, vma->page_sizes.gtt)) { - pr_err("unsupported page_sizes.gtt=%u, supported=%u\n", - vma->page_sizes.gtt & ~supported, supported); - err = -EINVAL; - } - - if (vma->page_sizes.phys != obj->mm.page_sizes.phys) { - pr_err("vma->page_sizes.phys(%u) != obj->mm.page_sizes.phys(%u)\n", - vma->page_sizes.phys, obj->mm.page_sizes.phys); - err = -EINVAL; - } - - if (vma->page_sizes.sg != obj->mm.page_sizes.sg) { - pr_err("vma->page_sizes.sg(%u) != obj->mm.page_sizes.sg(%u)\n", - vma->page_sizes.sg, obj->mm.page_sizes.sg); - err = -EINVAL; - } - - if (obj->mm.page_sizes.gtt) { - pr_err("obj->page_sizes.gtt(%u) should never be set\n", - obj->mm.page_sizes.gtt); - err = -EINVAL; - } - - return err; -} - -static int igt_mock_exhaust_device_supported_pages(void *arg) -{ - struct i915_hw_ppgtt *ppgtt = arg; - struct drm_i915_private *i915 = ppgtt->vm.i915; - unsigned int saved_mask = INTEL_INFO(i915)->page_sizes; - struct drm_i915_gem_object *obj; - struct i915_vma *vma; - int i, j, single; - int err; - - /* - * Sanity check creating objects with every valid page support - * combination for our mock device. - */ - - for (i = 1; i < BIT(ARRAY_SIZE(page_sizes)); i++) { - unsigned int combination = 0; - - for (j = 0; j < ARRAY_SIZE(page_sizes); j++) { - if (i & BIT(j)) - combination |= page_sizes[j]; - } - - mkwrite_device_info(i915)->page_sizes = combination; - - for (single = 0; single <= 1; ++single) { - obj = fake_huge_pages_object(i915, combination, !!single); - if (IS_ERR(obj)) { - err = PTR_ERR(obj); - goto out_device; - } - - if (obj->base.size != combination) { - pr_err("obj->base.size=%zu, expected=%u\n", - obj->base.size, combination); - err = -EINVAL; - goto out_put; - } - - vma = i915_vma_instance(obj, &ppgtt->vm, NULL); - if (IS_ERR(vma)) { - err = PTR_ERR(vma); - goto out_put; - } - - err = i915_vma_pin(vma, 0, 0, PIN_USER); - if (err) - goto out_close; - - err = igt_check_page_sizes(vma); - - if (vma->page_sizes.sg != combination) { - pr_err("page_sizes.sg=%u, expected=%u\n", - vma->page_sizes.sg, combination); - err = -EINVAL; - } - - i915_vma_unpin(vma); - i915_vma_close(vma); - - i915_gem_object_put(obj); - - if (err) - goto out_device; - } - } - - goto out_device; - -out_close: - i915_vma_close(vma); -out_put: - i915_gem_object_put(obj); -out_device: - mkwrite_device_info(i915)->page_sizes = saved_mask; - - return err; -} - -static int igt_mock_ppgtt_misaligned_dma(void *arg) -{ - struct i915_hw_ppgtt *ppgtt = arg; - struct drm_i915_private *i915 = ppgtt->vm.i915; - unsigned long supported = INTEL_INFO(i915)->page_sizes; - struct drm_i915_gem_object *obj; - int bit; - int err; - - /* - * Sanity check dma misalignment for huge pages -- the dma addresses we - * insert into the paging structures need to always respect the page - * size alignment. - */ - - bit = ilog2(I915_GTT_PAGE_SIZE_64K); - - for_each_set_bit_from(bit, &supported, - ilog2(I915_GTT_MAX_PAGE_SIZE) + 1) { - IGT_TIMEOUT(end_time); - unsigned int page_size = BIT(bit); - unsigned int flags = PIN_USER | PIN_OFFSET_FIXED; - unsigned int offset; - unsigned int size = - round_up(page_size, I915_GTT_PAGE_SIZE_2M) << 1; - struct i915_vma *vma; - - obj = fake_huge_pages_object(i915, size, true); - if (IS_ERR(obj)) - return PTR_ERR(obj); - - if (obj->base.size != size) { - pr_err("obj->base.size=%zu, expected=%u\n", - obj->base.size, size); - err = -EINVAL; - goto out_put; - } - - err = i915_gem_object_pin_pages(obj); - if (err) - goto out_put; - - /* Force the page size for this object */ - obj->mm.page_sizes.sg = page_size; - - vma = i915_vma_instance(obj, &ppgtt->vm, NULL); - if (IS_ERR(vma)) { - err = PTR_ERR(vma); - goto out_unpin; - } - - err = i915_vma_pin(vma, 0, 0, flags); - if (err) { - i915_vma_close(vma); - goto out_unpin; - } - - - err = igt_check_page_sizes(vma); - - if (vma->page_sizes.gtt != page_size) { - pr_err("page_sizes.gtt=%u, expected %u\n", - vma->page_sizes.gtt, page_size); - err = -EINVAL; - } - - i915_vma_unpin(vma); - - if (err) { - i915_vma_close(vma); - goto out_unpin; - } - - /* - * Try all the other valid offsets until the next - * boundary -- should always fall back to using 4K - * pages. - */ - for (offset = 4096; offset < page_size; offset += 4096) { - err = i915_vma_unbind(vma); - if (err) { - i915_vma_close(vma); - goto out_unpin; - } - - err = i915_vma_pin(vma, 0, 0, flags | offset); - if (err) { - i915_vma_close(vma); - goto out_unpin; - } - - err = igt_check_page_sizes(vma); - - if (vma->page_sizes.gtt != I915_GTT_PAGE_SIZE_4K) { - pr_err("page_sizes.gtt=%u, expected %llu\n", - vma->page_sizes.gtt, I915_GTT_PAGE_SIZE_4K); - err = -EINVAL; - } - - i915_vma_unpin(vma); - - if (err) { - i915_vma_close(vma); - goto out_unpin; - } - - if (igt_timeout(end_time, - "%s timed out at offset %x with page-size %x\n", - __func__, offset, page_size)) - break; - } - - i915_vma_close(vma); - - i915_gem_object_unpin_pages(obj); - __i915_gem_object_put_pages(obj, I915_MM_NORMAL); - i915_gem_object_put(obj); - } - - return 0; - -out_unpin: - i915_gem_object_unpin_pages(obj); -out_put: - i915_gem_object_put(obj); - - return err; -} - -static void close_object_list(struct list_head *objects, - struct i915_hw_ppgtt *ppgtt) -{ - struct drm_i915_gem_object *obj, *on; - - list_for_each_entry_safe(obj, on, objects, st_link) { - struct i915_vma *vma; - - vma = i915_vma_instance(obj, &ppgtt->vm, NULL); - if (!IS_ERR(vma)) - i915_vma_close(vma); - - list_del(&obj->st_link); - i915_gem_object_unpin_pages(obj); - __i915_gem_object_put_pages(obj, I915_MM_NORMAL); - i915_gem_object_put(obj); - } -} - -static int igt_mock_ppgtt_huge_fill(void *arg) -{ - struct i915_hw_ppgtt *ppgtt = arg; - struct drm_i915_private *i915 = ppgtt->vm.i915; - unsigned long max_pages = ppgtt->vm.total >> PAGE_SHIFT; - unsigned long page_num; - bool single = false; - LIST_HEAD(objects); - IGT_TIMEOUT(end_time); - int err = -ENODEV; - - for_each_prime_number_from(page_num, 1, max_pages) { - struct drm_i915_gem_object *obj; - u64 size = page_num << PAGE_SHIFT; - struct i915_vma *vma; - unsigned int expected_gtt = 0; - int i; - - obj = fake_huge_pages_object(i915, size, single); - if (IS_ERR(obj)) { - err = PTR_ERR(obj); - break; - } - - if (obj->base.size != size) { - pr_err("obj->base.size=%zd, expected=%llu\n", - obj->base.size, size); - i915_gem_object_put(obj); - err = -EINVAL; - break; - } - - err = i915_gem_object_pin_pages(obj); - if (err) { - i915_gem_object_put(obj); - break; - } - - list_add(&obj->st_link, &objects); - - vma = i915_vma_instance(obj, &ppgtt->vm, NULL); - if (IS_ERR(vma)) { - err = PTR_ERR(vma); - break; - } - - err = i915_vma_pin(vma, 0, 0, PIN_USER); - if (err) - break; - - err = igt_check_page_sizes(vma); - if (err) { - i915_vma_unpin(vma); - break; - } - - /* - * Figure out the expected gtt page size knowing that we go from - * largest to smallest page size sg chunks, and that we align to - * the largest page size. - */ - for (i = 0; i < ARRAY_SIZE(page_sizes); ++i) { - unsigned int page_size = page_sizes[i]; - - if (HAS_PAGE_SIZES(i915, page_size) && - size >= page_size) { - expected_gtt |= page_size; - size &= page_size-1; - } - } - - GEM_BUG_ON(!expected_gtt); - GEM_BUG_ON(size); - - if (expected_gtt & I915_GTT_PAGE_SIZE_4K) - expected_gtt &= ~I915_GTT_PAGE_SIZE_64K; - - i915_vma_unpin(vma); - - if (vma->page_sizes.sg & I915_GTT_PAGE_SIZE_64K) { - if (!IS_ALIGNED(vma->node.start, - I915_GTT_PAGE_SIZE_2M)) { - pr_err("node.start(%llx) not aligned to 2M\n", - vma->node.start); - err = -EINVAL; - break; - } - - if (!IS_ALIGNED(vma->node.size, - I915_GTT_PAGE_SIZE_2M)) { - pr_err("node.size(%llx) not aligned to 2M\n", - vma->node.size); - err = -EINVAL; - break; - } - } - - if (vma->page_sizes.gtt != expected_gtt) { - pr_err("gtt=%u, expected=%u, size=%zd, single=%s\n", - vma->page_sizes.gtt, expected_gtt, - obj->base.size, yesno(!!single)); - err = -EINVAL; - break; - } - - if (igt_timeout(end_time, - "%s timed out at size %zd\n", - __func__, obj->base.size)) - break; - - single = !single; - } - - close_object_list(&objects, ppgtt); - - if (err == -ENOMEM || err == -ENOSPC) - err = 0; - - return err; -} - -static int igt_mock_ppgtt_64K(void *arg) -{ - struct i915_hw_ppgtt *ppgtt = arg; - struct drm_i915_private *i915 = ppgtt->vm.i915; - struct drm_i915_gem_object *obj; - const struct object_info { - unsigned int size; - unsigned int gtt; - unsigned int offset; - } objects[] = { - /* Cases with forced padding/alignment */ - { - .size = SZ_64K, - .gtt = I915_GTT_PAGE_SIZE_64K, - .offset = 0, - }, - { - .size = SZ_64K + SZ_4K, - .gtt = I915_GTT_PAGE_SIZE_4K, - .offset = 0, - }, - { - .size = SZ_64K - SZ_4K, - .gtt = I915_GTT_PAGE_SIZE_4K, - .offset = 0, - }, - { - .size = SZ_2M, - .gtt = I915_GTT_PAGE_SIZE_64K, - .offset = 0, - }, - { - .size = SZ_2M - SZ_4K, - .gtt = I915_GTT_PAGE_SIZE_4K, - .offset = 0, - }, - { - .size = SZ_2M + SZ_4K, - .gtt = I915_GTT_PAGE_SIZE_64K | I915_GTT_PAGE_SIZE_4K, - .offset = 0, - }, - { - .size = SZ_2M + SZ_64K, - .gtt = I915_GTT_PAGE_SIZE_64K, - .offset = 0, - }, - { - .size = SZ_2M - SZ_64K, - .gtt = I915_GTT_PAGE_SIZE_64K, - .offset = 0, - }, - /* Try without any forced padding/alignment */ - { - .size = SZ_64K, - .offset = SZ_2M, - .gtt = I915_GTT_PAGE_SIZE_4K, - }, - { - .size = SZ_128K, - .offset = SZ_2M - SZ_64K, - .gtt = I915_GTT_PAGE_SIZE_4K, - }, - }; - struct i915_vma *vma; - int i, single; - int err; - - /* - * Sanity check some of the trickiness with 64K pages -- either we can - * safely mark the whole page-table(2M block) as 64K, or we have to - * always fallback to 4K. - */ - - if (!HAS_PAGE_SIZES(i915, I915_GTT_PAGE_SIZE_64K)) - return 0; - - for (i = 0; i < ARRAY_SIZE(objects); ++i) { - unsigned int size = objects[i].size; - unsigned int expected_gtt = objects[i].gtt; - unsigned int offset = objects[i].offset; - unsigned int flags = PIN_USER; - - for (single = 0; single <= 1; single++) { - obj = fake_huge_pages_object(i915, size, !!single); - if (IS_ERR(obj)) - return PTR_ERR(obj); - - err = i915_gem_object_pin_pages(obj); - if (err) - goto out_object_put; - - /* - * Disable 2M pages -- We only want to use 64K/4K pages - * for this test. - */ - obj->mm.page_sizes.sg &= ~I915_GTT_PAGE_SIZE_2M; - - vma = i915_vma_instance(obj, &ppgtt->vm, NULL); - if (IS_ERR(vma)) { - err = PTR_ERR(vma); - goto out_object_unpin; - } - - if (offset) - flags |= PIN_OFFSET_FIXED | offset; - - err = i915_vma_pin(vma, 0, 0, flags); - if (err) - goto out_vma_close; - - err = igt_check_page_sizes(vma); - if (err) - goto out_vma_unpin; - - if (!offset && vma->page_sizes.sg & I915_GTT_PAGE_SIZE_64K) { - if (!IS_ALIGNED(vma->node.start, - I915_GTT_PAGE_SIZE_2M)) { - pr_err("node.start(%llx) not aligned to 2M\n", - vma->node.start); - err = -EINVAL; - goto out_vma_unpin; - } - - if (!IS_ALIGNED(vma->node.size, - I915_GTT_PAGE_SIZE_2M)) { - pr_err("node.size(%llx) not aligned to 2M\n", - vma->node.size); - err = -EINVAL; - goto out_vma_unpin; - } - } - - if (vma->page_sizes.gtt != expected_gtt) { - pr_err("gtt=%u, expected=%u, i=%d, single=%s\n", - vma->page_sizes.gtt, expected_gtt, i, - yesno(!!single)); - err = -EINVAL; - goto out_vma_unpin; - } - - i915_vma_unpin(vma); - i915_vma_close(vma); - - i915_gem_object_unpin_pages(obj); - __i915_gem_object_put_pages(obj, I915_MM_NORMAL); - i915_gem_object_put(obj); - } - } - - return 0; - -out_vma_unpin: - i915_vma_unpin(vma); -out_vma_close: - i915_vma_close(vma); -out_object_unpin: - i915_gem_object_unpin_pages(obj); -out_object_put: - i915_gem_object_put(obj); - - return err; -} - -static struct i915_vma * -gpu_write_dw(struct i915_vma *vma, u64 offset, u32 val) -{ - struct drm_i915_private *i915 = vma->vm->i915; - const int gen = INTEL_GEN(i915); - unsigned int count = vma->size >> PAGE_SHIFT; - struct drm_i915_gem_object *obj; - struct i915_vma *batch; - unsigned int size; - u32 *cmd; - int n; - int err; - - size = (1 + 4 * count) * sizeof(u32); - size = round_up(size, PAGE_SIZE); - obj = i915_gem_object_create_internal(i915, size); - if (IS_ERR(obj)) - return ERR_CAST(obj); - - cmd = i915_gem_object_pin_map(obj, I915_MAP_WC); - if (IS_ERR(cmd)) { - err = PTR_ERR(cmd); - goto err; - } - - offset += vma->node.start; - - for (n = 0; n < count; n++) { - if (gen >= 8) { - *cmd++ = MI_STORE_DWORD_IMM_GEN4; - *cmd++ = lower_32_bits(offset); - *cmd++ = upper_32_bits(offset); - *cmd++ = val; - } else if (gen >= 4) { - *cmd++ = MI_STORE_DWORD_IMM_GEN4 | - (gen < 6 ? MI_USE_GGTT : 0); - *cmd++ = 0; - *cmd++ = offset; - *cmd++ = val; - } else { - *cmd++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL; - *cmd++ = offset; - *cmd++ = val; - } - - offset += PAGE_SIZE; - } - - *cmd = MI_BATCH_BUFFER_END; - i915_gem_chipset_flush(i915); - - i915_gem_object_unpin_map(obj); - - batch = i915_vma_instance(obj, vma->vm, NULL); - if (IS_ERR(batch)) { - err = PTR_ERR(batch); - goto err; - } - - err = i915_vma_pin(batch, 0, 0, PIN_USER); - if (err) - goto err; - - return batch; - -err: - i915_gem_object_put(obj); - - return ERR_PTR(err); -} - -static int gpu_write(struct i915_vma *vma, - struct i915_gem_context *ctx, - struct intel_engine_cs *engine, - u32 dword, - u32 value) -{ - struct i915_request *rq; - struct i915_vma *batch; - int err; - - GEM_BUG_ON(!intel_engine_can_store_dword(engine)); - - err = i915_gem_object_set_to_gtt_domain(vma->obj, true); - if (err) - return err; - - batch = gpu_write_dw(vma, dword * sizeof(u32), value); - if (IS_ERR(batch)) - return PTR_ERR(batch); - - rq = igt_request_alloc(ctx, engine); - if (IS_ERR(rq)) { - err = PTR_ERR(rq); - goto err_batch; - } - - err = i915_vma_move_to_active(batch, rq, 0); - if (err) - goto err_request; - - i915_gem_object_set_active_reference(batch->obj); - - err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); - if (err) - goto err_request; - - err = engine->emit_bb_start(rq, - batch->node.start, batch->node.size, - 0); -err_request: - if (err) - i915_request_skip(rq, err); - i915_request_add(rq); -err_batch: - i915_vma_unpin(batch); - i915_vma_close(batch); - - return err; -} - -static int cpu_check(struct drm_i915_gem_object *obj, u32 dword, u32 val) -{ - unsigned int needs_flush; - unsigned long n; - int err; - - err = i915_gem_object_prepare_read(obj, &needs_flush); - if (err) - return err; - - for (n = 0; n < obj->base.size >> PAGE_SHIFT; ++n) { - u32 *ptr = kmap_atomic(i915_gem_object_get_page(obj, n)); - - if (needs_flush & CLFLUSH_BEFORE) - drm_clflush_virt_range(ptr, PAGE_SIZE); - - if (ptr[dword] != val) { - pr_err("n=%lu ptr[%u]=%u, val=%u\n", - n, dword, ptr[dword], val); - kunmap_atomic(ptr); - err = -EINVAL; - break; - } - - kunmap_atomic(ptr); - } - - i915_gem_object_finish_access(obj); - - return err; -} - -static int __igt_write_huge(struct i915_gem_context *ctx, - struct intel_engine_cs *engine, - struct drm_i915_gem_object *obj, - u64 size, u64 offset, - u32 dword, u32 val) -{ - struct drm_i915_private *i915 = to_i915(obj->base.dev); - struct i915_address_space *vm = - ctx->ppgtt ? &ctx->ppgtt->vm : &i915->ggtt.vm; - unsigned int flags = PIN_USER | PIN_OFFSET_FIXED; - struct i915_vma *vma; - int err; - - vma = i915_vma_instance(obj, vm, NULL); - if (IS_ERR(vma)) - return PTR_ERR(vma); - - err = i915_vma_unbind(vma); - if (err) - goto out_vma_close; - - err = i915_vma_pin(vma, size, 0, flags | offset); - if (err) { - /* - * The ggtt may have some pages reserved so - * refrain from erroring out. - */ - if (err == -ENOSPC && i915_is_ggtt(vm)) - err = 0; - - goto out_vma_close; - } - - err = igt_check_page_sizes(vma); - if (err) - goto out_vma_unpin; - - err = gpu_write(vma, ctx, engine, dword, val); - if (err) { - pr_err("gpu-write failed at offset=%llx\n", offset); - goto out_vma_unpin; - } - - err = cpu_check(obj, dword, val); - if (err) { - pr_err("cpu-check failed at offset=%llx\n", offset); - goto out_vma_unpin; - } - -out_vma_unpin: - i915_vma_unpin(vma); -out_vma_close: - i915_vma_destroy(vma); - - return err; -} - -static int igt_write_huge(struct i915_gem_context *ctx, - struct drm_i915_gem_object *obj) -{ - struct drm_i915_private *i915 = to_i915(obj->base.dev); - struct i915_address_space *vm = - ctx->ppgtt ? &ctx->ppgtt->vm : &i915->ggtt.vm; - static struct intel_engine_cs *engines[I915_NUM_ENGINES]; - struct intel_engine_cs *engine; - I915_RND_STATE(prng); - IGT_TIMEOUT(end_time); - unsigned int max_page_size; - unsigned int id; - u64 max; - u64 num; - u64 size; - int *order; - int i, n; - int err = 0; - - GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); - - size = obj->base.size; - if (obj->mm.page_sizes.sg & I915_GTT_PAGE_SIZE_64K) - size = round_up(size, I915_GTT_PAGE_SIZE_2M); - - max_page_size = rounddown_pow_of_two(obj->mm.page_sizes.sg); - max = div_u64((vm->total - size), max_page_size); - - n = 0; - for_each_engine(engine, i915, id) { - if (!intel_engine_can_store_dword(engine)) { - pr_info("store-dword-imm not supported on engine=%u\n", - id); - continue; - } - engines[n++] = engine; - } - - if (!n) - return 0; - - /* - * To keep things interesting when alternating between engines in our - * randomized order, lets also make feeding to the same engine a few - * times in succession a possibility by enlarging the permutation array. - */ - order = i915_random_order(n * I915_NUM_ENGINES, &prng); - if (!order) - return -ENOMEM; - - /* - * Try various offsets in an ascending/descending fashion until we - * timeout -- we want to avoid issues hidden by effectively always using - * offset = 0. - */ - i = 0; - for_each_prime_number_from(num, 0, max) { - u64 offset_low = num * max_page_size; - u64 offset_high = (max - num) * max_page_size; - u32 dword = offset_in_page(num) / 4; - - engine = engines[order[i] % n]; - i = (i + 1) % (n * I915_NUM_ENGINES); - - /* - * In order to utilize 64K pages we need to both pad the vma - * size and ensure the vma offset is at the start of the pt - * boundary, however to improve coverage we opt for testing both - * aligned and unaligned offsets. - */ - if (obj->mm.page_sizes.sg & I915_GTT_PAGE_SIZE_64K) - offset_low = round_down(offset_low, - I915_GTT_PAGE_SIZE_2M); - - err = __igt_write_huge(ctx, engine, obj, size, offset_low, - dword, num + 1); - if (err) - break; - - err = __igt_write_huge(ctx, engine, obj, size, offset_high, - dword, num + 1); - if (err) - break; - - if (igt_timeout(end_time, - "%s timed out on engine=%u, offset_low=%llx offset_high=%llx, max_page_size=%x\n", - __func__, engine->id, offset_low, offset_high, - max_page_size)) - break; - } - - kfree(order); - - return err; -} - -static int igt_ppgtt_exhaust_huge(void *arg) -{ - struct i915_gem_context *ctx = arg; - struct drm_i915_private *i915 = ctx->i915; - unsigned long supported = INTEL_INFO(i915)->page_sizes; - static unsigned int pages[ARRAY_SIZE(page_sizes)]; - struct drm_i915_gem_object *obj; - unsigned int size_mask; - unsigned int page_mask; - int n, i; - int err = -ENODEV; - - if (supported == I915_GTT_PAGE_SIZE_4K) - return 0; - - /* - * Sanity check creating objects with a varying mix of page sizes -- - * ensuring that our writes lands in the right place. - */ - - n = 0; - for_each_set_bit(i, &supported, ilog2(I915_GTT_MAX_PAGE_SIZE) + 1) - pages[n++] = BIT(i); - - for (size_mask = 2; size_mask < BIT(n); size_mask++) { - unsigned int size = 0; - - for (i = 0; i < n; i++) { - if (size_mask & BIT(i)) - size |= pages[i]; - } - - /* - * For our page mask we want to enumerate all the page-size - * combinations which will fit into our chosen object size. - */ - for (page_mask = 2; page_mask <= size_mask; page_mask++) { - unsigned int page_sizes = 0; - - for (i = 0; i < n; i++) { - if (page_mask & BIT(i)) - page_sizes |= pages[i]; - } - - /* - * Ensure that we can actually fill the given object - * with our chosen page mask. - */ - if (!IS_ALIGNED(size, BIT(__ffs(page_sizes)))) - continue; - - obj = huge_pages_object(i915, size, page_sizes); - if (IS_ERR(obj)) { - err = PTR_ERR(obj); - goto out_device; - } - - err = i915_gem_object_pin_pages(obj); - if (err) { - i915_gem_object_put(obj); - - if (err == -ENOMEM) { - pr_info("unable to get pages, size=%u, pages=%u\n", - size, page_sizes); - err = 0; - break; - } - - pr_err("pin_pages failed, size=%u, pages=%u\n", - size_mask, page_mask); - - goto out_device; - } - - /* Force the page-size for the gtt insertion */ - obj->mm.page_sizes.sg = page_sizes; - - err = igt_write_huge(ctx, obj); - if (err) { - pr_err("exhaust write-huge failed with size=%u\n", - size); - goto out_unpin; - } - - i915_gem_object_unpin_pages(obj); - __i915_gem_object_put_pages(obj, I915_MM_NORMAL); - i915_gem_object_put(obj); - } - } - - goto out_device; - -out_unpin: - i915_gem_object_unpin_pages(obj); - i915_gem_object_put(obj); -out_device: - mkwrite_device_info(i915)->page_sizes = supported; - - return err; -} - -static int igt_ppgtt_internal_huge(void *arg) -{ - struct i915_gem_context *ctx = arg; - struct drm_i915_private *i915 = ctx->i915; - struct drm_i915_gem_object *obj; - static const unsigned int sizes[] = { - SZ_64K, - SZ_128K, - SZ_256K, - SZ_512K, - SZ_1M, - SZ_2M, - }; - int i; - int err; - - /* - * Sanity check that the HW uses huge pages correctly through internal - * -- ensure that our writes land in the right place. - */ - - for (i = 0; i < ARRAY_SIZE(sizes); ++i) { - unsigned int size = sizes[i]; - - obj = i915_gem_object_create_internal(i915, size); - if (IS_ERR(obj)) - return PTR_ERR(obj); - - err = i915_gem_object_pin_pages(obj); - if (err) - goto out_put; - - if (obj->mm.page_sizes.phys < I915_GTT_PAGE_SIZE_64K) { - pr_info("internal unable to allocate huge-page(s) with size=%u\n", - size); - goto out_unpin; - } - - err = igt_write_huge(ctx, obj); - if (err) { - pr_err("internal write-huge failed with size=%u\n", - size); - goto out_unpin; - } - - i915_gem_object_unpin_pages(obj); - __i915_gem_object_put_pages(obj, I915_MM_NORMAL); - i915_gem_object_put(obj); - } - - return 0; - -out_unpin: - i915_gem_object_unpin_pages(obj); -out_put: - i915_gem_object_put(obj); - - return err; -} - -static inline bool igt_can_allocate_thp(struct drm_i915_private *i915) -{ - return i915->mm.gemfs && has_transparent_hugepage(); -} - -static int igt_ppgtt_gemfs_huge(void *arg) -{ - struct i915_gem_context *ctx = arg; - struct drm_i915_private *i915 = ctx->i915; - struct drm_i915_gem_object *obj; - static const unsigned int sizes[] = { - SZ_2M, - SZ_4M, - SZ_8M, - SZ_16M, - SZ_32M, - }; - int i; - int err; - - /* - * Sanity check that the HW uses huge pages correctly through gemfs -- - * ensure that our writes land in the right place. - */ - - if (!igt_can_allocate_thp(i915)) { - pr_info("missing THP support, skipping\n"); - return 0; - } - - for (i = 0; i < ARRAY_SIZE(sizes); ++i) { - unsigned int size = sizes[i]; - - obj = i915_gem_object_create_shmem(i915, size); - if (IS_ERR(obj)) - return PTR_ERR(obj); - - err = i915_gem_object_pin_pages(obj); - if (err) - goto out_put; - - if (obj->mm.page_sizes.phys < I915_GTT_PAGE_SIZE_2M) { - pr_info("finishing test early, gemfs unable to allocate huge-page(s) with size=%u\n", - size); - goto out_unpin; - } - - err = igt_write_huge(ctx, obj); - if (err) { - pr_err("gemfs write-huge failed with size=%u\n", - size); - goto out_unpin; - } - - i915_gem_object_unpin_pages(obj); - __i915_gem_object_put_pages(obj, I915_MM_NORMAL); - i915_gem_object_put(obj); - } - - return 0; - -out_unpin: - i915_gem_object_unpin_pages(obj); -out_put: - i915_gem_object_put(obj); - - return err; -} - -static int igt_ppgtt_pin_update(void *arg) -{ - struct i915_gem_context *ctx = arg; - struct drm_i915_private *dev_priv = ctx->i915; - unsigned long supported = INTEL_INFO(dev_priv)->page_sizes; - struct i915_hw_ppgtt *ppgtt = ctx->ppgtt; - struct drm_i915_gem_object *obj; - struct i915_vma *vma; - unsigned int flags = PIN_USER | PIN_OFFSET_FIXED; - int first, last; - int err; - - /* - * Make sure there's no funny business when doing a PIN_UPDATE -- in the - * past we had a subtle issue with being able to incorrectly do multiple - * alloc va ranges on the same object when doing a PIN_UPDATE, which - * resulted in some pretty nasty bugs, though only when using - * huge-gtt-pages. - */ - - if (!ppgtt || !i915_vm_is_4lvl(&ppgtt->vm)) { - pr_info("48b PPGTT not supported, skipping\n"); - return 0; - } - - first = ilog2(I915_GTT_PAGE_SIZE_64K); - last = ilog2(I915_GTT_PAGE_SIZE_2M); - - for_each_set_bit_from(first, &supported, last + 1) { - unsigned int page_size = BIT(first); - - obj = i915_gem_object_create_internal(dev_priv, page_size); - if (IS_ERR(obj)) - return PTR_ERR(obj); - - vma = i915_vma_instance(obj, &ppgtt->vm, NULL); - if (IS_ERR(vma)) { - err = PTR_ERR(vma); - goto out_put; - } - - err = i915_vma_pin(vma, SZ_2M, 0, flags); - if (err) - goto out_close; - - if (vma->page_sizes.sg < page_size) { - pr_info("Unable to allocate page-size %x, finishing test early\n", - page_size); - goto out_unpin; - } - - err = igt_check_page_sizes(vma); - if (err) - goto out_unpin; - - if (vma->page_sizes.gtt != page_size) { - dma_addr_t addr = i915_gem_object_get_dma_address(obj, 0); - - /* - * The only valid reason for this to ever fail would be - * if the dma-mapper screwed us over when we did the - * dma_map_sg(), since it has the final say over the dma - * address. - */ - if (IS_ALIGNED(addr, page_size)) { - pr_err("page_sizes.gtt=%u, expected=%u\n", - vma->page_sizes.gtt, page_size); - err = -EINVAL; - } else { - pr_info("dma address misaligned, finishing test early\n"); - } - - goto out_unpin; - } - - err = i915_vma_bind(vma, I915_CACHE_NONE, PIN_UPDATE); - if (err) - goto out_unpin; - - i915_vma_unpin(vma); - i915_vma_close(vma); - - i915_gem_object_put(obj); - } - - obj = i915_gem_object_create_internal(dev_priv, PAGE_SIZE); - if (IS_ERR(obj)) - return PTR_ERR(obj); - - vma = i915_vma_instance(obj, &ppgtt->vm, NULL); - if (IS_ERR(vma)) { - err = PTR_ERR(vma); - goto out_put; - } - - err = i915_vma_pin(vma, 0, 0, flags); - if (err) - goto out_close; - - /* - * Make sure we don't end up with something like where the pde is still - * pointing to the 2M page, and the pt we just filled-in is dangling -- - * we can check this by writing to the first page where it would then - * land in the now stale 2M page. - */ - - err = gpu_write(vma, ctx, dev_priv->engine[RCS0], 0, 0xdeadbeaf); - if (err) - goto out_unpin; - - err = cpu_check(obj, 0, 0xdeadbeaf); - -out_unpin: - i915_vma_unpin(vma); -out_close: - i915_vma_close(vma); -out_put: - i915_gem_object_put(obj); - - return err; -} - -static int igt_tmpfs_fallback(void *arg) -{ - struct i915_gem_context *ctx = arg; - struct drm_i915_private *i915 = ctx->i915; - struct vfsmount *gemfs = i915->mm.gemfs; - struct i915_address_space *vm = - ctx->ppgtt ? &ctx->ppgtt->vm : &i915->ggtt.vm; - struct drm_i915_gem_object *obj; - struct i915_vma *vma; - u32 *vaddr; - int err = 0; - - /* - * Make sure that we don't burst into a ball of flames upon falling back - * to tmpfs, which we rely on if on the off-chance we encouter a failure - * when setting up gemfs. - */ - - i915->mm.gemfs = NULL; - - obj = i915_gem_object_create_shmem(i915, PAGE_SIZE); - if (IS_ERR(obj)) { - err = PTR_ERR(obj); - goto out_restore; - } - - vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB); - if (IS_ERR(vaddr)) { - err = PTR_ERR(vaddr); - goto out_put; - } - *vaddr = 0xdeadbeaf; - - __i915_gem_object_flush_map(obj, 0, 64); - i915_gem_object_unpin_map(obj); - - vma = i915_vma_instance(obj, vm, NULL); - if (IS_ERR(vma)) { - err = PTR_ERR(vma); - goto out_put; - } - - err = i915_vma_pin(vma, 0, 0, PIN_USER); - if (err) - goto out_close; - - err = igt_check_page_sizes(vma); - - i915_vma_unpin(vma); -out_close: - i915_vma_close(vma); -out_put: - i915_gem_object_put(obj); -out_restore: - i915->mm.gemfs = gemfs; - - return err; -} - -static int igt_shrink_thp(void *arg) -{ - struct i915_gem_context *ctx = arg; - struct drm_i915_private *i915 = ctx->i915; - struct i915_address_space *vm = - ctx->ppgtt ? &ctx->ppgtt->vm : &i915->ggtt.vm; - struct drm_i915_gem_object *obj; - struct i915_vma *vma; - unsigned int flags = PIN_USER; - int err; - - /* - * Sanity check shrinking huge-paged object -- make sure nothing blows - * up. - */ - - if (!igt_can_allocate_thp(i915)) { - pr_info("missing THP support, skipping\n"); - return 0; - } - - obj = i915_gem_object_create_shmem(i915, SZ_2M); - if (IS_ERR(obj)) - return PTR_ERR(obj); - - vma = i915_vma_instance(obj, vm, NULL); - if (IS_ERR(vma)) { - err = PTR_ERR(vma); - goto out_put; - } - - err = i915_vma_pin(vma, 0, 0, flags); - if (err) - goto out_close; - - if (obj->mm.page_sizes.phys < I915_GTT_PAGE_SIZE_2M) { - pr_info("failed to allocate THP, finishing test early\n"); - goto out_unpin; - } - - err = igt_check_page_sizes(vma); - if (err) - goto out_unpin; - - err = gpu_write(vma, ctx, i915->engine[RCS0], 0, 0xdeadbeaf); - if (err) - goto out_unpin; - - i915_vma_unpin(vma); - - /* - * Now that the pages are *unpinned* shrink-all should invoke - * shmem to truncate our pages. - */ - i915_gem_shrink_all(i915); - if (i915_gem_object_has_pages(obj)) { - pr_err("shrink-all didn't truncate the pages\n"); - err = -EINVAL; - goto out_close; - } - - if (obj->mm.page_sizes.sg || obj->mm.page_sizes.phys) { - pr_err("residual page-size bits left\n"); - err = -EINVAL; - goto out_close; - } - - err = i915_vma_pin(vma, 0, 0, flags); - if (err) - goto out_close; - - err = cpu_check(obj, 0, 0xdeadbeaf); - -out_unpin: - i915_vma_unpin(vma); -out_close: - i915_vma_close(vma); -out_put: - i915_gem_object_put(obj); - - return err; -} - -int i915_gem_huge_page_mock_selftests(void) -{ - static const struct i915_subtest tests[] = { - SUBTEST(igt_mock_exhaust_device_supported_pages), - SUBTEST(igt_mock_ppgtt_misaligned_dma), - SUBTEST(igt_mock_ppgtt_huge_fill), - SUBTEST(igt_mock_ppgtt_64K), - }; - struct drm_i915_private *dev_priv; - struct i915_hw_ppgtt *ppgtt; - int err; - - dev_priv = mock_gem_device(); - if (!dev_priv) - return -ENOMEM; - - /* Pretend to be a device which supports the 48b PPGTT */ - mkwrite_device_info(dev_priv)->ppgtt_type = INTEL_PPGTT_FULL; - mkwrite_device_info(dev_priv)->ppgtt_size = 48; - - mutex_lock(&dev_priv->drm.struct_mutex); - ppgtt = i915_ppgtt_create(dev_priv); - if (IS_ERR(ppgtt)) { - err = PTR_ERR(ppgtt); - goto out_unlock; - } - - if (!i915_vm_is_4lvl(&ppgtt->vm)) { - pr_err("failed to create 48b PPGTT\n"); - err = -EINVAL; - goto out_close; - } - - /* If we were ever hit this then it's time to mock the 64K scratch */ - if (!i915_vm_has_scratch_64K(&ppgtt->vm)) { - pr_err("PPGTT missing 64K scratch page\n"); - err = -EINVAL; - goto out_close; - } - - err = i915_subtests(tests, ppgtt); - -out_close: - i915_ppgtt_put(ppgtt); - -out_unlock: - mutex_unlock(&dev_priv->drm.struct_mutex); - drm_dev_put(&dev_priv->drm); - - return err; -} - -int i915_gem_huge_page_live_selftests(struct drm_i915_private *dev_priv) -{ - static const struct i915_subtest tests[] = { - SUBTEST(igt_shrink_thp), - SUBTEST(igt_ppgtt_pin_update), - SUBTEST(igt_tmpfs_fallback), - SUBTEST(igt_ppgtt_exhaust_huge), - SUBTEST(igt_ppgtt_gemfs_huge), - SUBTEST(igt_ppgtt_internal_huge), - }; - struct drm_file *file; - struct i915_gem_context *ctx; - intel_wakeref_t wakeref; - int err; - - if (!HAS_PPGTT(dev_priv)) { - pr_info("PPGTT not supported, skipping live-selftests\n"); - return 0; - } - - if (i915_terminally_wedged(dev_priv)) - return 0; - - file = mock_file(dev_priv); - if (IS_ERR(file)) - return PTR_ERR(file); - - mutex_lock(&dev_priv->drm.struct_mutex); - wakeref = intel_runtime_pm_get(dev_priv); - - ctx = live_context(dev_priv, file); - if (IS_ERR(ctx)) { - err = PTR_ERR(ctx); - goto out_unlock; - } - - if (ctx->ppgtt) - ctx->ppgtt->vm.scrub_64K = true; - - err = i915_subtests(tests, ctx); - -out_unlock: - intel_runtime_pm_put(dev_priv, wakeref); - mutex_unlock(&dev_priv->drm.struct_mutex); - - mock_file_free(dev_priv, file); - - return err; -} diff --git a/drivers/gpu/drm/i915/selftests/i915_active.c b/drivers/gpu/drm/i915/selftests/i915_active.c index eee838dc0634..cc1ca4be1a00 100644 --- a/drivers/gpu/drm/i915/selftests/i915_active.c +++ b/drivers/gpu/drm/i915/selftests/i915_active.c @@ -4,7 +4,9 @@ * Copyright © 2018 Intel Corporation */ -#include "../i915_selftest.h" +#include "gem/i915_gem_pm.h" + +#include "i915_selftest.h" #include "igt_flush_test.h" #include "lib_sw_fence.h" diff --git a/drivers/gpu/drm/i915/selftests/i915_gem.c b/drivers/gpu/drm/i915/selftests/i915_gem.c index c6a9bff85311..83643929416c 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem.c @@ -6,11 +6,13 @@ #include -#include "../i915_selftest.h" +#include "gem/selftests/igt_gem_utils.h" +#include "gem/selftests/mock_context.h" + +#include "i915_selftest.h" -#include "igt_gem_utils.h" #include "igt_flush_test.h" -#include "mock_context.h" +#include "mock_drm.h" static int switch_to_context(struct drm_i915_private *i915, struct i915_gem_context *ctx) diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c deleted file mode 100644 index cb25b5fc8027..000000000000 --- a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c +++ /dev/null @@ -1,397 +0,0 @@ -/* - * Copyright © 2017 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - */ - -#include - -#include "../i915_selftest.h" -#include "i915_random.h" - -static int cpu_set(struct drm_i915_gem_object *obj, - unsigned long offset, - u32 v) -{ - unsigned int needs_clflush; - struct page *page; - void *map; - u32 *cpu; - int err; - - err = i915_gem_object_prepare_write(obj, &needs_clflush); - if (err) - return err; - - page = i915_gem_object_get_page(obj, offset >> PAGE_SHIFT); - map = kmap_atomic(page); - cpu = map + offset_in_page(offset); - - if (needs_clflush & CLFLUSH_BEFORE) - drm_clflush_virt_range(cpu, sizeof(*cpu)); - - *cpu = v; - - if (needs_clflush & CLFLUSH_AFTER) - drm_clflush_virt_range(cpu, sizeof(*cpu)); - - kunmap_atomic(map); - i915_gem_object_finish_access(obj); - - return 0; -} - -static int cpu_get(struct drm_i915_gem_object *obj, - unsigned long offset, - u32 *v) -{ - unsigned int needs_clflush; - struct page *page; - void *map; - u32 *cpu; - int err; - - err = i915_gem_object_prepare_read(obj, &needs_clflush); - if (err) - return err; - - page = i915_gem_object_get_page(obj, offset >> PAGE_SHIFT); - map = kmap_atomic(page); - cpu = map + offset_in_page(offset); - - if (needs_clflush & CLFLUSH_BEFORE) - drm_clflush_virt_range(cpu, sizeof(*cpu)); - - *v = *cpu; - - kunmap_atomic(map); - i915_gem_object_finish_access(obj); - - return 0; -} - -static int gtt_set(struct drm_i915_gem_object *obj, - unsigned long offset, - u32 v) -{ - struct i915_vma *vma; - u32 __iomem *map; - int err; - - err = i915_gem_object_set_to_gtt_domain(obj, true); - if (err) - return err; - - vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, PIN_MAPPABLE); - if (IS_ERR(vma)) - return PTR_ERR(vma); - - map = i915_vma_pin_iomap(vma); - i915_vma_unpin(vma); - if (IS_ERR(map)) - return PTR_ERR(map); - - iowrite32(v, &map[offset / sizeof(*map)]); - i915_vma_unpin_iomap(vma); - - return 0; -} - -static int gtt_get(struct drm_i915_gem_object *obj, - unsigned long offset, - u32 *v) -{ - struct i915_vma *vma; - u32 __iomem *map; - int err; - - err = i915_gem_object_set_to_gtt_domain(obj, false); - if (err) - return err; - - vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, PIN_MAPPABLE); - if (IS_ERR(vma)) - return PTR_ERR(vma); - - map = i915_vma_pin_iomap(vma); - i915_vma_unpin(vma); - if (IS_ERR(map)) - return PTR_ERR(map); - - *v = ioread32(&map[offset / sizeof(*map)]); - i915_vma_unpin_iomap(vma); - - return 0; -} - -static int wc_set(struct drm_i915_gem_object *obj, - unsigned long offset, - u32 v) -{ - u32 *map; - int err; - - err = i915_gem_object_set_to_wc_domain(obj, true); - if (err) - return err; - - map = i915_gem_object_pin_map(obj, I915_MAP_WC); - if (IS_ERR(map)) - return PTR_ERR(map); - - map[offset / sizeof(*map)] = v; - i915_gem_object_unpin_map(obj); - - return 0; -} - -static int wc_get(struct drm_i915_gem_object *obj, - unsigned long offset, - u32 *v) -{ - u32 *map; - int err; - - err = i915_gem_object_set_to_wc_domain(obj, false); - if (err) - return err; - - map = i915_gem_object_pin_map(obj, I915_MAP_WC); - if (IS_ERR(map)) - return PTR_ERR(map); - - *v = map[offset / sizeof(*map)]; - i915_gem_object_unpin_map(obj); - - return 0; -} - -static int gpu_set(struct drm_i915_gem_object *obj, - unsigned long offset, - u32 v) -{ - struct drm_i915_private *i915 = to_i915(obj->base.dev); - struct i915_request *rq; - struct i915_vma *vma; - u32 *cs; - int err; - - err = i915_gem_object_set_to_gtt_domain(obj, true); - if (err) - return err; - - vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 0); - if (IS_ERR(vma)) - return PTR_ERR(vma); - - rq = i915_request_create(i915->engine[RCS0]->kernel_context); - if (IS_ERR(rq)) { - i915_vma_unpin(vma); - return PTR_ERR(rq); - } - - cs = intel_ring_begin(rq, 4); - if (IS_ERR(cs)) { - i915_request_add(rq); - i915_vma_unpin(vma); - return PTR_ERR(cs); - } - - if (INTEL_GEN(i915) >= 8) { - *cs++ = MI_STORE_DWORD_IMM_GEN4 | 1 << 22; - *cs++ = lower_32_bits(i915_ggtt_offset(vma) + offset); - *cs++ = upper_32_bits(i915_ggtt_offset(vma) + offset); - *cs++ = v; - } else if (INTEL_GEN(i915) >= 4) { - *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; - *cs++ = 0; - *cs++ = i915_ggtt_offset(vma) + offset; - *cs++ = v; - } else { - *cs++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL; - *cs++ = i915_ggtt_offset(vma) + offset; - *cs++ = v; - *cs++ = MI_NOOP; - } - intel_ring_advance(rq, cs); - - err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); - i915_vma_unpin(vma); - - i915_request_add(rq); - - return err; -} - -static bool always_valid(struct drm_i915_private *i915) -{ - return true; -} - -static bool needs_fence_registers(struct drm_i915_private *i915) -{ - return !i915_terminally_wedged(i915); -} - -static bool needs_mi_store_dword(struct drm_i915_private *i915) -{ - if (i915_terminally_wedged(i915)) - return false; - - return intel_engine_can_store_dword(i915->engine[RCS0]); -} - -static const struct igt_coherency_mode { - const char *name; - int (*set)(struct drm_i915_gem_object *, unsigned long offset, u32 v); - int (*get)(struct drm_i915_gem_object *, unsigned long offset, u32 *v); - bool (*valid)(struct drm_i915_private *i915); -} igt_coherency_mode[] = { - { "cpu", cpu_set, cpu_get, always_valid }, - { "gtt", gtt_set, gtt_get, needs_fence_registers }, - { "wc", wc_set, wc_get, always_valid }, - { "gpu", gpu_set, NULL, needs_mi_store_dword }, - { }, -}; - -static int igt_gem_coherency(void *arg) -{ - const unsigned int ncachelines = PAGE_SIZE/64; - I915_RND_STATE(prng); - struct drm_i915_private *i915 = arg; - const struct igt_coherency_mode *read, *write, *over; - struct drm_i915_gem_object *obj; - intel_wakeref_t wakeref; - unsigned long count, n; - u32 *offsets, *values; - int err = 0; - - /* We repeatedly write, overwrite and read from a sequence of - * cachelines in order to try and detect incoherency (unflushed writes - * from either the CPU or GPU). Each setter/getter uses our cache - * domain API which should prevent incoherency. - */ - - offsets = kmalloc_array(ncachelines, 2*sizeof(u32), GFP_KERNEL); - if (!offsets) - return -ENOMEM; - for (count = 0; count < ncachelines; count++) - offsets[count] = count * 64 + 4 * (count % 16); - - values = offsets + ncachelines; - - mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(i915); - for (over = igt_coherency_mode; over->name; over++) { - if (!over->set) - continue; - - if (!over->valid(i915)) - continue; - - for (write = igt_coherency_mode; write->name; write++) { - if (!write->set) - continue; - - if (!write->valid(i915)) - continue; - - for (read = igt_coherency_mode; read->name; read++) { - if (!read->get) - continue; - - if (!read->valid(i915)) - continue; - - for_each_prime_number_from(count, 1, ncachelines) { - obj = i915_gem_object_create_internal(i915, PAGE_SIZE); - if (IS_ERR(obj)) { - err = PTR_ERR(obj); - goto unlock; - } - - i915_random_reorder(offsets, ncachelines, &prng); - for (n = 0; n < count; n++) - values[n] = prandom_u32_state(&prng); - - for (n = 0; n < count; n++) { - err = over->set(obj, offsets[n], ~values[n]); - if (err) { - pr_err("Failed to set stale value[%ld/%ld] in object using %s, err=%d\n", - n, count, over->name, err); - goto put_object; - } - } - - for (n = 0; n < count; n++) { - err = write->set(obj, offsets[n], values[n]); - if (err) { - pr_err("Failed to set value[%ld/%ld] in object using %s, err=%d\n", - n, count, write->name, err); - goto put_object; - } - } - - for (n = 0; n < count; n++) { - u32 found; - - err = read->get(obj, offsets[n], &found); - if (err) { - pr_err("Failed to get value[%ld/%ld] in object using %s, err=%d\n", - n, count, read->name, err); - goto put_object; - } - - if (found != values[n]) { - pr_err("Value[%ld/%ld] mismatch, (overwrite with %s) wrote [%s] %x read [%s] %x (inverse %x), at offset %x\n", - n, count, over->name, - write->name, values[n], - read->name, found, - ~values[n], offsets[n]); - err = -EINVAL; - goto put_object; - } - } - - __i915_gem_object_release_unless_active(obj); - } - } - } - } -unlock: - intel_runtime_pm_put(i915, wakeref); - mutex_unlock(&i915->drm.struct_mutex); - kfree(offsets); - return err; - -put_object: - __i915_gem_object_release_unless_active(obj); - goto unlock; -} - -int i915_gem_coherency_live_selftests(struct drm_i915_private *i915) -{ - static const struct i915_subtest tests[] = { - SUBTEST(igt_gem_coherency), - }; - - return i915_subtests(tests, i915); -} diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c deleted file mode 100644 index c69c6d9a998b..000000000000 --- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c +++ /dev/null @@ -1,1752 +0,0 @@ -/* - * Copyright © 2017 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - */ - -#include - -#include "gt/intel_reset.h" -#include "i915_selftest.h" - -#include "i915_random.h" -#include "igt_flush_test.h" -#include "igt_gem_utils.h" -#include "igt_live_test.h" -#include "igt_reset.h" -#include "igt_spinner.h" - -#include "mock_drm.h" -#include "mock_gem_device.h" -#include "huge_gem_object.h" - -#define DW_PER_PAGE (PAGE_SIZE / sizeof(u32)) - -static int live_nop_switch(void *arg) -{ - const unsigned int nctx = 1024; - struct drm_i915_private *i915 = arg; - struct intel_engine_cs *engine; - struct i915_gem_context **ctx; - enum intel_engine_id id; - intel_wakeref_t wakeref; - struct igt_live_test t; - struct drm_file *file; - unsigned long n; - int err = -ENODEV; - - /* - * Create as many contexts as we can feasibly get away with - * and check we can switch between them rapidly. - * - * Serves as very simple stress test for submission and HW switching - * between contexts. - */ - - if (!DRIVER_CAPS(i915)->has_logical_contexts) - return 0; - - file = mock_file(i915); - if (IS_ERR(file)) - return PTR_ERR(file); - - mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(i915); - - ctx = kcalloc(nctx, sizeof(*ctx), GFP_KERNEL); - if (!ctx) { - err = -ENOMEM; - goto out_unlock; - } - - for (n = 0; n < nctx; n++) { - ctx[n] = live_context(i915, file); - if (IS_ERR(ctx[n])) { - err = PTR_ERR(ctx[n]); - goto out_unlock; - } - } - - for_each_engine(engine, i915, id) { - struct i915_request *rq; - unsigned long end_time, prime; - ktime_t times[2] = {}; - - times[0] = ktime_get_raw(); - for (n = 0; n < nctx; n++) { - rq = igt_request_alloc(ctx[n], engine); - if (IS_ERR(rq)) { - err = PTR_ERR(rq); - goto out_unlock; - } - i915_request_add(rq); - } - if (i915_request_wait(rq, - I915_WAIT_LOCKED, - HZ / 5) < 0) { - pr_err("Failed to populated %d contexts\n", nctx); - i915_gem_set_wedged(i915); - err = -EIO; - goto out_unlock; - } - - times[1] = ktime_get_raw(); - - pr_info("Populated %d contexts on %s in %lluns\n", - nctx, engine->name, ktime_to_ns(times[1] - times[0])); - - err = igt_live_test_begin(&t, i915, __func__, engine->name); - if (err) - goto out_unlock; - - end_time = jiffies + i915_selftest.timeout_jiffies; - for_each_prime_number_from(prime, 2, 8192) { - times[1] = ktime_get_raw(); - - for (n = 0; n < prime; n++) { - rq = igt_request_alloc(ctx[n % nctx], engine); - if (IS_ERR(rq)) { - err = PTR_ERR(rq); - goto out_unlock; - } - - /* - * This space is left intentionally blank. - * - * We do not actually want to perform any - * action with this request, we just want - * to measure the latency in allocation - * and submission of our breadcrumbs - - * ensuring that the bare request is sufficient - * for the system to work (i.e. proper HEAD - * tracking of the rings, interrupt handling, - * etc). It also gives us the lowest bounds - * for latency. - */ - - i915_request_add(rq); - } - if (i915_request_wait(rq, - I915_WAIT_LOCKED, - HZ / 5) < 0) { - pr_err("Switching between %ld contexts timed out\n", - prime); - i915_gem_set_wedged(i915); - break; - } - - times[1] = ktime_sub(ktime_get_raw(), times[1]); - if (prime == 2) - times[0] = times[1]; - - if (__igt_timeout(end_time, NULL)) - break; - } - - err = igt_live_test_end(&t); - if (err) - goto out_unlock; - - pr_info("Switch latencies on %s: 1 = %lluns, %lu = %lluns\n", - engine->name, - ktime_to_ns(times[0]), - prime - 1, div64_u64(ktime_to_ns(times[1]), prime - 1)); - } - -out_unlock: - intel_runtime_pm_put(i915, wakeref); - mutex_unlock(&i915->drm.struct_mutex); - mock_file_free(i915, file); - return err; -} - -static struct i915_vma * -gpu_fill_dw(struct i915_vma *vma, u64 offset, unsigned long count, u32 value) -{ - struct drm_i915_gem_object *obj; - const int gen = INTEL_GEN(vma->vm->i915); - unsigned long n, size; - u32 *cmd; - int err; - - size = (4 * count + 1) * sizeof(u32); - size = round_up(size, PAGE_SIZE); - obj = i915_gem_object_create_internal(vma->vm->i915, size); - if (IS_ERR(obj)) - return ERR_CAST(obj); - - cmd = i915_gem_object_pin_map(obj, I915_MAP_WB); - if (IS_ERR(cmd)) { - err = PTR_ERR(cmd); - goto err; - } - - GEM_BUG_ON(offset + (count - 1) * PAGE_SIZE > vma->node.size); - offset += vma->node.start; - - for (n = 0; n < count; n++) { - if (gen >= 8) { - *cmd++ = MI_STORE_DWORD_IMM_GEN4; - *cmd++ = lower_32_bits(offset); - *cmd++ = upper_32_bits(offset); - *cmd++ = value; - } else if (gen >= 4) { - *cmd++ = MI_STORE_DWORD_IMM_GEN4 | - (gen < 6 ? MI_USE_GGTT : 0); - *cmd++ = 0; - *cmd++ = offset; - *cmd++ = value; - } else { - *cmd++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL; - *cmd++ = offset; - *cmd++ = value; - } - offset += PAGE_SIZE; - } - *cmd = MI_BATCH_BUFFER_END; - i915_gem_object_flush_map(obj); - i915_gem_object_unpin_map(obj); - - err = i915_gem_object_set_to_gtt_domain(obj, false); - if (err) - goto err; - - vma = i915_vma_instance(obj, vma->vm, NULL); - if (IS_ERR(vma)) { - err = PTR_ERR(vma); - goto err; - } - - err = i915_vma_pin(vma, 0, 0, PIN_USER); - if (err) - goto err; - - return vma; - -err: - i915_gem_object_put(obj); - return ERR_PTR(err); -} - -static unsigned long real_page_count(struct drm_i915_gem_object *obj) -{ - return huge_gem_object_phys_size(obj) >> PAGE_SHIFT; -} - -static unsigned long fake_page_count(struct drm_i915_gem_object *obj) -{ - return huge_gem_object_dma_size(obj) >> PAGE_SHIFT; -} - -static int gpu_fill(struct drm_i915_gem_object *obj, - struct i915_gem_context *ctx, - struct intel_engine_cs *engine, - unsigned int dw) -{ - struct drm_i915_private *i915 = to_i915(obj->base.dev); - struct i915_address_space *vm = - ctx->ppgtt ? &ctx->ppgtt->vm : &i915->ggtt.vm; - struct i915_request *rq; - struct i915_vma *vma; - struct i915_vma *batch; - unsigned int flags; - int err; - - GEM_BUG_ON(obj->base.size > vm->total); - GEM_BUG_ON(!intel_engine_can_store_dword(engine)); - - vma = i915_vma_instance(obj, vm, NULL); - if (IS_ERR(vma)) - return PTR_ERR(vma); - - err = i915_gem_object_set_to_gtt_domain(obj, false); - if (err) - return err; - - err = i915_vma_pin(vma, 0, 0, PIN_HIGH | PIN_USER); - if (err) - return err; - - /* Within the GTT the huge objects maps every page onto - * its 1024 real pages (using phys_pfn = dma_pfn % 1024). - * We set the nth dword within the page using the nth - * mapping via the GTT - this should exercise the GTT mapping - * whilst checking that each context provides a unique view - * into the object. - */ - batch = gpu_fill_dw(vma, - (dw * real_page_count(obj)) << PAGE_SHIFT | - (dw * sizeof(u32)), - real_page_count(obj), - dw); - if (IS_ERR(batch)) { - err = PTR_ERR(batch); - goto err_vma; - } - - rq = igt_request_alloc(ctx, engine); - if (IS_ERR(rq)) { - err = PTR_ERR(rq); - goto err_batch; - } - - flags = 0; - if (INTEL_GEN(vm->i915) <= 5) - flags |= I915_DISPATCH_SECURE; - - err = engine->emit_bb_start(rq, - batch->node.start, batch->node.size, - flags); - if (err) - goto err_request; - - err = i915_vma_move_to_active(batch, rq, 0); - if (err) - goto skip_request; - - err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); - if (err) - goto skip_request; - - i915_gem_object_set_active_reference(batch->obj); - i915_vma_unpin(batch); - i915_vma_close(batch); - - i915_vma_unpin(vma); - - i915_request_add(rq); - - return 0; - -skip_request: - i915_request_skip(rq, err); -err_request: - i915_request_add(rq); -err_batch: - i915_vma_unpin(batch); - i915_vma_put(batch); -err_vma: - i915_vma_unpin(vma); - return err; -} - -static int cpu_fill(struct drm_i915_gem_object *obj, u32 value) -{ - const bool has_llc = HAS_LLC(to_i915(obj->base.dev)); - unsigned int n, m, need_flush; - int err; - - err = i915_gem_object_prepare_write(obj, &need_flush); - if (err) - return err; - - for (n = 0; n < real_page_count(obj); n++) { - u32 *map; - - map = kmap_atomic(i915_gem_object_get_page(obj, n)); - for (m = 0; m < DW_PER_PAGE; m++) - map[m] = value; - if (!has_llc) - drm_clflush_virt_range(map, PAGE_SIZE); - kunmap_atomic(map); - } - - i915_gem_object_finish_access(obj); - obj->read_domains = I915_GEM_DOMAIN_GTT | I915_GEM_DOMAIN_CPU; - obj->write_domain = 0; - return 0; -} - -static noinline int cpu_check(struct drm_i915_gem_object *obj, - unsigned int idx, unsigned int max) -{ - unsigned int n, m, needs_flush; - int err; - - err = i915_gem_object_prepare_read(obj, &needs_flush); - if (err) - return err; - - for (n = 0; n < real_page_count(obj); n++) { - u32 *map; - - map = kmap_atomic(i915_gem_object_get_page(obj, n)); - if (needs_flush & CLFLUSH_BEFORE) - drm_clflush_virt_range(map, PAGE_SIZE); - - for (m = 0; m < max; m++) { - if (map[m] != m) { - pr_err("%pS: Invalid value at object %d page %d/%ld, offset %d/%d: found %x expected %x\n", - __builtin_return_address(0), idx, - n, real_page_count(obj), m, max, - map[m], m); - err = -EINVAL; - goto out_unmap; - } - } - - for (; m < DW_PER_PAGE; m++) { - if (map[m] != STACK_MAGIC) { - pr_err("%pS: Invalid value at object %d page %d, offset %d: found %x expected %x (uninitialised)\n", - __builtin_return_address(0), idx, n, m, - map[m], STACK_MAGIC); - err = -EINVAL; - goto out_unmap; - } - } - -out_unmap: - kunmap_atomic(map); - if (err) - break; - } - - i915_gem_object_finish_access(obj); - return err; -} - -static int file_add_object(struct drm_file *file, - struct drm_i915_gem_object *obj) -{ - int err; - - GEM_BUG_ON(obj->base.handle_count); - - /* tie the object to the drm_file for easy reaping */ - err = idr_alloc(&file->object_idr, &obj->base, 1, 0, GFP_KERNEL); - if (err < 0) - return err; - - i915_gem_object_get(obj); - obj->base.handle_count++; - return 0; -} - -static struct drm_i915_gem_object * -create_test_object(struct i915_gem_context *ctx, - struct drm_file *file, - struct list_head *objects) -{ - struct drm_i915_gem_object *obj; - struct i915_address_space *vm = - ctx->ppgtt ? &ctx->ppgtt->vm : &ctx->i915->ggtt.vm; - u64 size; - int err; - - size = min(vm->total / 2, 1024ull * DW_PER_PAGE * PAGE_SIZE); - size = round_down(size, DW_PER_PAGE * PAGE_SIZE); - - obj = huge_gem_object(ctx->i915, DW_PER_PAGE * PAGE_SIZE, size); - if (IS_ERR(obj)) - return obj; - - err = file_add_object(file, obj); - i915_gem_object_put(obj); - if (err) - return ERR_PTR(err); - - err = cpu_fill(obj, STACK_MAGIC); - if (err) { - pr_err("Failed to fill object with cpu, err=%d\n", - err); - return ERR_PTR(err); - } - - list_add_tail(&obj->st_link, objects); - return obj; -} - -static unsigned long max_dwords(struct drm_i915_gem_object *obj) -{ - unsigned long npages = fake_page_count(obj); - - GEM_BUG_ON(!IS_ALIGNED(npages, DW_PER_PAGE)); - return npages / DW_PER_PAGE; -} - -static int igt_ctx_exec(void *arg) -{ - struct drm_i915_private *i915 = arg; - struct intel_engine_cs *engine; - enum intel_engine_id id; - int err = -ENODEV; - - /* - * Create a few different contexts (with different mm) and write - * through each ctx/mm using the GPU making sure those writes end - * up in the expected pages of our obj. - */ - - if (!DRIVER_CAPS(i915)->has_logical_contexts) - return 0; - - for_each_engine(engine, i915, id) { - struct drm_i915_gem_object *obj = NULL; - unsigned long ncontexts, ndwords, dw; - struct igt_live_test t; - struct drm_file *file; - IGT_TIMEOUT(end_time); - LIST_HEAD(objects); - - if (!intel_engine_can_store_dword(engine)) - continue; - - if (!engine->context_size) - continue; /* No logical context support in HW */ - - file = mock_file(i915); - if (IS_ERR(file)) - return PTR_ERR(file); - - mutex_lock(&i915->drm.struct_mutex); - - err = igt_live_test_begin(&t, i915, __func__, engine->name); - if (err) - goto out_unlock; - - ncontexts = 0; - ndwords = 0; - dw = 0; - while (!time_after(jiffies, end_time)) { - struct i915_gem_context *ctx; - intel_wakeref_t wakeref; - - ctx = live_context(i915, file); - if (IS_ERR(ctx)) { - err = PTR_ERR(ctx); - goto out_unlock; - } - - if (!obj) { - obj = create_test_object(ctx, file, &objects); - if (IS_ERR(obj)) { - err = PTR_ERR(obj); - goto out_unlock; - } - } - - with_intel_runtime_pm(i915, wakeref) - err = gpu_fill(obj, ctx, engine, dw); - if (err) { - pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) in ctx %u [full-ppgtt? %s], err=%d\n", - ndwords, dw, max_dwords(obj), - engine->name, ctx->hw_id, - yesno(!!ctx->ppgtt), err); - goto out_unlock; - } - - if (++dw == max_dwords(obj)) { - obj = NULL; - dw = 0; - } - - ndwords++; - ncontexts++; - } - - pr_info("Submitted %lu contexts to %s, filling %lu dwords\n", - ncontexts, engine->name, ndwords); - - ncontexts = dw = 0; - list_for_each_entry(obj, &objects, st_link) { - unsigned int rem = - min_t(unsigned int, ndwords - dw, max_dwords(obj)); - - err = cpu_check(obj, ncontexts++, rem); - if (err) - break; - - dw += rem; - } - -out_unlock: - if (igt_live_test_end(&t)) - err = -EIO; - mutex_unlock(&i915->drm.struct_mutex); - - mock_file_free(i915, file); - if (err) - return err; - } - - return 0; -} - -static int igt_shared_ctx_exec(void *arg) -{ - struct drm_i915_private *i915 = arg; - struct i915_gem_context *parent; - struct intel_engine_cs *engine; - enum intel_engine_id id; - struct igt_live_test t; - struct drm_file *file; - int err = 0; - - /* - * Create a few different contexts with the same mm and write - * through each ctx using the GPU making sure those writes end - * up in the expected pages of our obj. - */ - if (!DRIVER_CAPS(i915)->has_logical_contexts) - return 0; - - file = mock_file(i915); - if (IS_ERR(file)) - return PTR_ERR(file); - - mutex_lock(&i915->drm.struct_mutex); - - parent = live_context(i915, file); - if (IS_ERR(parent)) { - err = PTR_ERR(parent); - goto out_unlock; - } - - if (!parent->ppgtt) { /* not full-ppgtt; nothing to share */ - err = 0; - goto out_unlock; - } - - err = igt_live_test_begin(&t, i915, __func__, ""); - if (err) - goto out_unlock; - - for_each_engine(engine, i915, id) { - unsigned long ncontexts, ndwords, dw; - struct drm_i915_gem_object *obj = NULL; - IGT_TIMEOUT(end_time); - LIST_HEAD(objects); - - if (!intel_engine_can_store_dword(engine)) - continue; - - dw = 0; - ndwords = 0; - ncontexts = 0; - while (!time_after(jiffies, end_time)) { - struct i915_gem_context *ctx; - intel_wakeref_t wakeref; - - ctx = kernel_context(i915); - if (IS_ERR(ctx)) { - err = PTR_ERR(ctx); - goto out_test; - } - - __assign_ppgtt(ctx, parent->ppgtt); - - if (!obj) { - obj = create_test_object(parent, file, &objects); - if (IS_ERR(obj)) { - err = PTR_ERR(obj); - kernel_context_close(ctx); - goto out_test; - } - } - - err = 0; - with_intel_runtime_pm(i915, wakeref) - err = gpu_fill(obj, ctx, engine, dw); - if (err) { - pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) in ctx %u [full-ppgtt? %s], err=%d\n", - ndwords, dw, max_dwords(obj), - engine->name, ctx->hw_id, - yesno(!!ctx->ppgtt), err); - kernel_context_close(ctx); - goto out_test; - } - - if (++dw == max_dwords(obj)) { - obj = NULL; - dw = 0; - } - - ndwords++; - ncontexts++; - - kernel_context_close(ctx); - } - pr_info("Submitted %lu contexts to %s, filling %lu dwords\n", - ncontexts, engine->name, ndwords); - - ncontexts = dw = 0; - list_for_each_entry(obj, &objects, st_link) { - unsigned int rem = - min_t(unsigned int, ndwords - dw, max_dwords(obj)); - - err = cpu_check(obj, ncontexts++, rem); - if (err) - goto out_test; - - dw += rem; - } - } -out_test: - if (igt_live_test_end(&t)) - err = -EIO; -out_unlock: - mutex_unlock(&i915->drm.struct_mutex); - - mock_file_free(i915, file); - return err; -} - -static struct i915_vma *rpcs_query_batch(struct i915_vma *vma) -{ - struct drm_i915_gem_object *obj; - u32 *cmd; - int err; - - if (INTEL_GEN(vma->vm->i915) < 8) - return ERR_PTR(-EINVAL); - - obj = i915_gem_object_create_internal(vma->vm->i915, PAGE_SIZE); - if (IS_ERR(obj)) - return ERR_CAST(obj); - - cmd = i915_gem_object_pin_map(obj, I915_MAP_WB); - if (IS_ERR(cmd)) { - err = PTR_ERR(cmd); - goto err; - } - - *cmd++ = MI_STORE_REGISTER_MEM_GEN8; - *cmd++ = i915_mmio_reg_offset(GEN8_R_PWR_CLK_STATE); - *cmd++ = lower_32_bits(vma->node.start); - *cmd++ = upper_32_bits(vma->node.start); - *cmd = MI_BATCH_BUFFER_END; - - __i915_gem_object_flush_map(obj, 0, 64); - i915_gem_object_unpin_map(obj); - - vma = i915_vma_instance(obj, vma->vm, NULL); - if (IS_ERR(vma)) { - err = PTR_ERR(vma); - goto err; - } - - err = i915_vma_pin(vma, 0, 0, PIN_USER); - if (err) - goto err; - - return vma; - -err: - i915_gem_object_put(obj); - return ERR_PTR(err); -} - -static int -emit_rpcs_query(struct drm_i915_gem_object *obj, - struct intel_context *ce, - struct i915_request **rq_out) -{ - struct i915_request *rq; - struct i915_vma *batch; - struct i915_vma *vma; - int err; - - GEM_BUG_ON(!intel_engine_can_store_dword(ce->engine)); - - vma = i915_vma_instance(obj, &ce->gem_context->ppgtt->vm, NULL); - if (IS_ERR(vma)) - return PTR_ERR(vma); - - err = i915_gem_object_set_to_gtt_domain(obj, false); - if (err) - return err; - - err = i915_vma_pin(vma, 0, 0, PIN_USER); - if (err) - return err; - - batch = rpcs_query_batch(vma); - if (IS_ERR(batch)) { - err = PTR_ERR(batch); - goto err_vma; - } - - rq = i915_request_create(ce); - if (IS_ERR(rq)) { - err = PTR_ERR(rq); - goto err_batch; - } - - err = rq->engine->emit_bb_start(rq, - batch->node.start, batch->node.size, - 0); - if (err) - goto err_request; - - err = i915_vma_move_to_active(batch, rq, 0); - if (err) - goto skip_request; - - err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); - if (err) - goto skip_request; - - i915_gem_object_set_active_reference(batch->obj); - i915_vma_unpin(batch); - i915_vma_close(batch); - - i915_vma_unpin(vma); - - *rq_out = i915_request_get(rq); - - i915_request_add(rq); - - return 0; - -skip_request: - i915_request_skip(rq, err); -err_request: - i915_request_add(rq); -err_batch: - i915_vma_unpin(batch); -err_vma: - i915_vma_unpin(vma); - - return err; -} - -#define TEST_IDLE BIT(0) -#define TEST_BUSY BIT(1) -#define TEST_RESET BIT(2) - -static int -__sseu_prepare(struct drm_i915_private *i915, - const char *name, - unsigned int flags, - struct intel_context *ce, - struct igt_spinner **spin) -{ - struct i915_request *rq; - int ret; - - *spin = NULL; - if (!(flags & (TEST_BUSY | TEST_RESET))) - return 0; - - *spin = kzalloc(sizeof(**spin), GFP_KERNEL); - if (!*spin) - return -ENOMEM; - - ret = igt_spinner_init(*spin, i915); - if (ret) - goto err_free; - - rq = igt_spinner_create_request(*spin, - ce->gem_context, - ce->engine, - MI_NOOP); - if (IS_ERR(rq)) { - ret = PTR_ERR(rq); - goto err_fini; - } - - i915_request_add(rq); - - if (!igt_wait_for_spinner(*spin, rq)) { - pr_err("%s: Spinner failed to start!\n", name); - ret = -ETIMEDOUT; - goto err_end; - } - - return 0; - -err_end: - igt_spinner_end(*spin); -err_fini: - igt_spinner_fini(*spin); -err_free: - kfree(fetch_and_zero(spin)); - return ret; -} - -static int -__read_slice_count(struct drm_i915_private *i915, - struct intel_context *ce, - struct drm_i915_gem_object *obj, - struct igt_spinner *spin, - u32 *rpcs) -{ - struct i915_request *rq = NULL; - u32 s_mask, s_shift; - unsigned int cnt; - u32 *buf, val; - long ret; - - ret = emit_rpcs_query(obj, ce, &rq); - if (ret) - return ret; - - if (spin) - igt_spinner_end(spin); - - ret = i915_request_wait(rq, I915_WAIT_LOCKED, MAX_SCHEDULE_TIMEOUT); - i915_request_put(rq); - if (ret < 0) - return ret; - - buf = i915_gem_object_pin_map(obj, I915_MAP_WB); - if (IS_ERR(buf)) { - ret = PTR_ERR(buf); - return ret; - } - - if (INTEL_GEN(i915) >= 11) { - s_mask = GEN11_RPCS_S_CNT_MASK; - s_shift = GEN11_RPCS_S_CNT_SHIFT; - } else { - s_mask = GEN8_RPCS_S_CNT_MASK; - s_shift = GEN8_RPCS_S_CNT_SHIFT; - } - - val = *buf; - cnt = (val & s_mask) >> s_shift; - *rpcs = val; - - i915_gem_object_unpin_map(obj); - - return cnt; -} - -static int -__check_rpcs(const char *name, u32 rpcs, int slices, unsigned int expected, - const char *prefix, const char *suffix) -{ - if (slices == expected) - return 0; - - if (slices < 0) { - pr_err("%s: %s read slice count failed with %d%s\n", - name, prefix, slices, suffix); - return slices; - } - - pr_err("%s: %s slice count %d is not %u%s\n", - name, prefix, slices, expected, suffix); - - pr_info("RPCS=0x%x; %u%sx%u%s\n", - rpcs, slices, - (rpcs & GEN8_RPCS_S_CNT_ENABLE) ? "*" : "", - (rpcs & GEN8_RPCS_SS_CNT_MASK) >> GEN8_RPCS_SS_CNT_SHIFT, - (rpcs & GEN8_RPCS_SS_CNT_ENABLE) ? "*" : ""); - - return -EINVAL; -} - -static int -__sseu_finish(struct drm_i915_private *i915, - const char *name, - unsigned int flags, - struct intel_context *ce, - struct drm_i915_gem_object *obj, - unsigned int expected, - struct igt_spinner *spin) -{ - unsigned int slices = hweight32(ce->engine->sseu.slice_mask); - u32 rpcs = 0; - int ret = 0; - - if (flags & TEST_RESET) { - ret = i915_reset_engine(ce->engine, "sseu"); - if (ret) - goto out; - } - - ret = __read_slice_count(i915, ce, obj, - flags & TEST_RESET ? NULL : spin, &rpcs); - ret = __check_rpcs(name, rpcs, ret, expected, "Context", "!"); - if (ret) - goto out; - - ret = __read_slice_count(i915, ce->engine->kernel_context, obj, - NULL, &rpcs); - ret = __check_rpcs(name, rpcs, ret, slices, "Kernel context", "!"); - -out: - if (spin) - igt_spinner_end(spin); - - if ((flags & TEST_IDLE) && ret == 0) { - ret = i915_gem_wait_for_idle(i915, - I915_WAIT_LOCKED, - MAX_SCHEDULE_TIMEOUT); - if (ret) - return ret; - - ret = __read_slice_count(i915, ce, obj, NULL, &rpcs); - ret = __check_rpcs(name, rpcs, ret, expected, - "Context", " after idle!"); - } - - return ret; -} - -static int -__sseu_test(struct drm_i915_private *i915, - const char *name, - unsigned int flags, - struct intel_context *ce, - struct drm_i915_gem_object *obj, - struct intel_sseu sseu) -{ - struct igt_spinner *spin = NULL; - int ret; - - ret = __sseu_prepare(i915, name, flags, ce, &spin); - if (ret) - return ret; - - ret = __intel_context_reconfigure_sseu(ce, sseu); - if (ret) - goto out_spin; - - ret = __sseu_finish(i915, name, flags, ce, obj, - hweight32(sseu.slice_mask), spin); - -out_spin: - if (spin) { - igt_spinner_end(spin); - igt_spinner_fini(spin); - kfree(spin); - } - return ret; -} - -static int -__igt_ctx_sseu(struct drm_i915_private *i915, - const char *name, - unsigned int flags) -{ - struct intel_engine_cs *engine = i915->engine[RCS0]; - struct intel_sseu default_sseu = engine->sseu; - struct drm_i915_gem_object *obj; - struct i915_gem_context *ctx; - struct intel_context *ce; - struct intel_sseu pg_sseu; - intel_wakeref_t wakeref; - struct drm_file *file; - int ret; - - if (INTEL_GEN(i915) < 9) - return 0; - - if (!RUNTIME_INFO(i915)->sseu.has_slice_pg) - return 0; - - if (hweight32(default_sseu.slice_mask) < 2) - return 0; - - /* - * Gen11 VME friendly power-gated configuration with half enabled - * sub-slices. - */ - pg_sseu = default_sseu; - pg_sseu.slice_mask = 1; - pg_sseu.subslice_mask = - ~(~0 << (hweight32(default_sseu.subslice_mask) / 2)); - - pr_info("SSEU subtest '%s', flags=%x, def_slices=%u, pg_slices=%u\n", - name, flags, hweight32(default_sseu.slice_mask), - hweight32(pg_sseu.slice_mask)); - - file = mock_file(i915); - if (IS_ERR(file)) - return PTR_ERR(file); - - if (flags & TEST_RESET) - igt_global_reset_lock(i915); - - mutex_lock(&i915->drm.struct_mutex); - - ctx = live_context(i915, file); - if (IS_ERR(ctx)) { - ret = PTR_ERR(ctx); - goto out_unlock; - } - i915_gem_context_clear_bannable(ctx); /* to reset and beyond! */ - - obj = i915_gem_object_create_internal(i915, PAGE_SIZE); - if (IS_ERR(obj)) { - ret = PTR_ERR(obj); - goto out_unlock; - } - - wakeref = intel_runtime_pm_get(i915); - - ce = i915_gem_context_get_engine(ctx, RCS0); - if (IS_ERR(ce)) { - ret = PTR_ERR(ce); - goto out_rpm; - } - - ret = intel_context_pin(ce); - if (ret) - goto out_context; - - /* First set the default mask. */ - ret = __sseu_test(i915, name, flags, ce, obj, default_sseu); - if (ret) - goto out_fail; - - /* Then set a power-gated configuration. */ - ret = __sseu_test(i915, name, flags, ce, obj, pg_sseu); - if (ret) - goto out_fail; - - /* Back to defaults. */ - ret = __sseu_test(i915, name, flags, ce, obj, default_sseu); - if (ret) - goto out_fail; - - /* One last power-gated configuration for the road. */ - ret = __sseu_test(i915, name, flags, ce, obj, pg_sseu); - if (ret) - goto out_fail; - -out_fail: - if (igt_flush_test(i915, I915_WAIT_LOCKED)) - ret = -EIO; - - intel_context_unpin(ce); -out_context: - intel_context_put(ce); -out_rpm: - intel_runtime_pm_put(i915, wakeref); - i915_gem_object_put(obj); - -out_unlock: - mutex_unlock(&i915->drm.struct_mutex); - - if (flags & TEST_RESET) - igt_global_reset_unlock(i915); - - mock_file_free(i915, file); - - if (ret) - pr_err("%s: Failed with %d!\n", name, ret); - - return ret; -} - -static int igt_ctx_sseu(void *arg) -{ - struct { - const char *name; - unsigned int flags; - } *phase, phases[] = { - { .name = "basic", .flags = 0 }, - { .name = "idle", .flags = TEST_IDLE }, - { .name = "busy", .flags = TEST_BUSY }, - { .name = "busy-reset", .flags = TEST_BUSY | TEST_RESET }, - { .name = "busy-idle", .flags = TEST_BUSY | TEST_IDLE }, - { .name = "reset-idle", .flags = TEST_RESET | TEST_IDLE }, - }; - unsigned int i; - int ret = 0; - - for (i = 0, phase = phases; ret == 0 && i < ARRAY_SIZE(phases); - i++, phase++) - ret = __igt_ctx_sseu(arg, phase->name, phase->flags); - - return ret; -} - -static int igt_ctx_readonly(void *arg) -{ - struct drm_i915_private *i915 = arg; - struct drm_i915_gem_object *obj = NULL; - struct i915_gem_context *ctx; - struct i915_hw_ppgtt *ppgtt; - unsigned long idx, ndwords, dw; - struct igt_live_test t; - struct drm_file *file; - I915_RND_STATE(prng); - IGT_TIMEOUT(end_time); - LIST_HEAD(objects); - int err = -ENODEV; - - /* - * Create a few read-only objects (with the occasional writable object) - * and try to write into these object checking that the GPU discards - * any write to a read-only object. - */ - - file = mock_file(i915); - if (IS_ERR(file)) - return PTR_ERR(file); - - mutex_lock(&i915->drm.struct_mutex); - - err = igt_live_test_begin(&t, i915, __func__, ""); - if (err) - goto out_unlock; - - ctx = live_context(i915, file); - if (IS_ERR(ctx)) { - err = PTR_ERR(ctx); - goto out_unlock; - } - - ppgtt = ctx->ppgtt ?: i915->mm.aliasing_ppgtt; - if (!ppgtt || !ppgtt->vm.has_read_only) { - err = 0; - goto out_unlock; - } - - ndwords = 0; - dw = 0; - while (!time_after(jiffies, end_time)) { - struct intel_engine_cs *engine; - unsigned int id; - - for_each_engine(engine, i915, id) { - intel_wakeref_t wakeref; - - if (!intel_engine_can_store_dword(engine)) - continue; - - if (!obj) { - obj = create_test_object(ctx, file, &objects); - if (IS_ERR(obj)) { - err = PTR_ERR(obj); - goto out_unlock; - } - - if (prandom_u32_state(&prng) & 1) - i915_gem_object_set_readonly(obj); - } - - err = 0; - with_intel_runtime_pm(i915, wakeref) - err = gpu_fill(obj, ctx, engine, dw); - if (err) { - pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) in ctx %u [full-ppgtt? %s], err=%d\n", - ndwords, dw, max_dwords(obj), - engine->name, ctx->hw_id, - yesno(!!ctx->ppgtt), err); - goto out_unlock; - } - - if (++dw == max_dwords(obj)) { - obj = NULL; - dw = 0; - } - ndwords++; - } - } - pr_info("Submitted %lu dwords (across %u engines)\n", - ndwords, RUNTIME_INFO(i915)->num_engines); - - dw = 0; - idx = 0; - list_for_each_entry(obj, &objects, st_link) { - unsigned int rem = - min_t(unsigned int, ndwords - dw, max_dwords(obj)); - unsigned int num_writes; - - num_writes = rem; - if (i915_gem_object_is_readonly(obj)) - num_writes = 0; - - err = cpu_check(obj, idx++, num_writes); - if (err) - break; - - dw += rem; - } - -out_unlock: - if (igt_live_test_end(&t)) - err = -EIO; - mutex_unlock(&i915->drm.struct_mutex); - - mock_file_free(i915, file); - return err; -} - -static int check_scratch(struct i915_gem_context *ctx, u64 offset) -{ - struct drm_mm_node *node = - __drm_mm_interval_first(&ctx->ppgtt->vm.mm, - offset, offset + sizeof(u32) - 1); - if (!node || node->start > offset) - return 0; - - GEM_BUG_ON(offset >= node->start + node->size); - - pr_err("Target offset 0x%08x_%08x overlaps with a node in the mm!\n", - upper_32_bits(offset), lower_32_bits(offset)); - return -EINVAL; -} - -static int write_to_scratch(struct i915_gem_context *ctx, - struct intel_engine_cs *engine, - u64 offset, u32 value) -{ - struct drm_i915_private *i915 = ctx->i915; - struct drm_i915_gem_object *obj; - struct i915_request *rq; - struct i915_vma *vma; - u32 *cmd; - int err; - - GEM_BUG_ON(offset < I915_GTT_PAGE_SIZE); - - obj = i915_gem_object_create_internal(i915, PAGE_SIZE); - if (IS_ERR(obj)) - return PTR_ERR(obj); - - cmd = i915_gem_object_pin_map(obj, I915_MAP_WB); - if (IS_ERR(cmd)) { - err = PTR_ERR(cmd); - goto err; - } - - *cmd++ = MI_STORE_DWORD_IMM_GEN4; - if (INTEL_GEN(i915) >= 8) { - *cmd++ = lower_32_bits(offset); - *cmd++ = upper_32_bits(offset); - } else { - *cmd++ = 0; - *cmd++ = offset; - } - *cmd++ = value; - *cmd = MI_BATCH_BUFFER_END; - __i915_gem_object_flush_map(obj, 0, 64); - i915_gem_object_unpin_map(obj); - - vma = i915_vma_instance(obj, &ctx->ppgtt->vm, NULL); - if (IS_ERR(vma)) { - err = PTR_ERR(vma); - goto err; - } - - err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_OFFSET_FIXED); - if (err) - goto err; - - err = check_scratch(ctx, offset); - if (err) - goto err_unpin; - - rq = igt_request_alloc(ctx, engine); - if (IS_ERR(rq)) { - err = PTR_ERR(rq); - goto err_unpin; - } - - err = engine->emit_bb_start(rq, vma->node.start, vma->node.size, 0); - if (err) - goto err_request; - - err = i915_vma_move_to_active(vma, rq, 0); - if (err) - goto skip_request; - - i915_gem_object_set_active_reference(obj); - i915_vma_unpin(vma); - i915_vma_close(vma); - - i915_request_add(rq); - - return 0; - -skip_request: - i915_request_skip(rq, err); -err_request: - i915_request_add(rq); -err_unpin: - i915_vma_unpin(vma); -err: - i915_gem_object_put(obj); - return err; -} - -static int read_from_scratch(struct i915_gem_context *ctx, - struct intel_engine_cs *engine, - u64 offset, u32 *value) -{ - struct drm_i915_private *i915 = ctx->i915; - struct drm_i915_gem_object *obj; - const u32 RCS_GPR0 = 0x2600; /* not all engines have their own GPR! */ - const u32 result = 0x100; - struct i915_request *rq; - struct i915_vma *vma; - u32 *cmd; - int err; - - GEM_BUG_ON(offset < I915_GTT_PAGE_SIZE); - - obj = i915_gem_object_create_internal(i915, PAGE_SIZE); - if (IS_ERR(obj)) - return PTR_ERR(obj); - - cmd = i915_gem_object_pin_map(obj, I915_MAP_WB); - if (IS_ERR(cmd)) { - err = PTR_ERR(cmd); - goto err; - } - - memset(cmd, POISON_INUSE, PAGE_SIZE); - if (INTEL_GEN(i915) >= 8) { - *cmd++ = MI_LOAD_REGISTER_MEM_GEN8; - *cmd++ = RCS_GPR0; - *cmd++ = lower_32_bits(offset); - *cmd++ = upper_32_bits(offset); - *cmd++ = MI_STORE_REGISTER_MEM_GEN8; - *cmd++ = RCS_GPR0; - *cmd++ = result; - *cmd++ = 0; - } else { - *cmd++ = MI_LOAD_REGISTER_MEM; - *cmd++ = RCS_GPR0; - *cmd++ = offset; - *cmd++ = MI_STORE_REGISTER_MEM; - *cmd++ = RCS_GPR0; - *cmd++ = result; - } - *cmd = MI_BATCH_BUFFER_END; - - i915_gem_object_flush_map(obj); - i915_gem_object_unpin_map(obj); - - vma = i915_vma_instance(obj, &ctx->ppgtt->vm, NULL); - if (IS_ERR(vma)) { - err = PTR_ERR(vma); - goto err; - } - - err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_OFFSET_FIXED); - if (err) - goto err; - - err = check_scratch(ctx, offset); - if (err) - goto err_unpin; - - rq = igt_request_alloc(ctx, engine); - if (IS_ERR(rq)) { - err = PTR_ERR(rq); - goto err_unpin; - } - - err = engine->emit_bb_start(rq, vma->node.start, vma->node.size, 0); - if (err) - goto err_request; - - err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); - if (err) - goto skip_request; - - i915_vma_unpin(vma); - i915_vma_close(vma); - - i915_request_add(rq); - - err = i915_gem_object_set_to_cpu_domain(obj, false); - if (err) - goto err; - - cmd = i915_gem_object_pin_map(obj, I915_MAP_WB); - if (IS_ERR(cmd)) { - err = PTR_ERR(cmd); - goto err; - } - - *value = cmd[result / sizeof(*cmd)]; - i915_gem_object_unpin_map(obj); - i915_gem_object_put(obj); - - return 0; - -skip_request: - i915_request_skip(rq, err); -err_request: - i915_request_add(rq); -err_unpin: - i915_vma_unpin(vma); -err: - i915_gem_object_put(obj); - return err; -} - -static int igt_vm_isolation(void *arg) -{ - struct drm_i915_private *i915 = arg; - struct i915_gem_context *ctx_a, *ctx_b; - struct intel_engine_cs *engine; - intel_wakeref_t wakeref; - struct igt_live_test t; - struct drm_file *file; - I915_RND_STATE(prng); - unsigned long count; - unsigned int id; - u64 vm_total; - int err; - - if (INTEL_GEN(i915) < 7) - return 0; - - /* - * The simple goal here is that a write into one context is not - * observed in a second (separate page tables and scratch). - */ - - file = mock_file(i915); - if (IS_ERR(file)) - return PTR_ERR(file); - - mutex_lock(&i915->drm.struct_mutex); - - err = igt_live_test_begin(&t, i915, __func__, ""); - if (err) - goto out_unlock; - - ctx_a = live_context(i915, file); - if (IS_ERR(ctx_a)) { - err = PTR_ERR(ctx_a); - goto out_unlock; - } - - ctx_b = live_context(i915, file); - if (IS_ERR(ctx_b)) { - err = PTR_ERR(ctx_b); - goto out_unlock; - } - - /* We can only test vm isolation, if the vm are distinct */ - if (ctx_a->ppgtt == ctx_b->ppgtt) - goto out_unlock; - - vm_total = ctx_a->ppgtt->vm.total; - GEM_BUG_ON(ctx_b->ppgtt->vm.total != vm_total); - vm_total -= I915_GTT_PAGE_SIZE; - - wakeref = intel_runtime_pm_get(i915); - - count = 0; - for_each_engine(engine, i915, id) { - IGT_TIMEOUT(end_time); - unsigned long this = 0; - - if (!intel_engine_can_store_dword(engine)) - continue; - - while (!__igt_timeout(end_time, NULL)) { - u32 value = 0xc5c5c5c5; - u64 offset; - - div64_u64_rem(i915_prandom_u64_state(&prng), - vm_total, &offset); - offset &= -sizeof(u32); - offset += I915_GTT_PAGE_SIZE; - - err = write_to_scratch(ctx_a, engine, - offset, 0xdeadbeef); - if (err == 0) - err = read_from_scratch(ctx_b, engine, - offset, &value); - if (err) - goto out_rpm; - - if (value) { - pr_err("%s: Read %08x from scratch (offset 0x%08x_%08x), after %lu reads!\n", - engine->name, value, - upper_32_bits(offset), - lower_32_bits(offset), - this); - err = -EINVAL; - goto out_rpm; - } - - this++; - } - count += this; - } - pr_info("Checked %lu scratch offsets across %d engines\n", - count, RUNTIME_INFO(i915)->num_engines); - -out_rpm: - intel_runtime_pm_put(i915, wakeref); -out_unlock: - if (igt_live_test_end(&t)) - err = -EIO; - mutex_unlock(&i915->drm.struct_mutex); - - mock_file_free(i915, file); - return err; -} - -static __maybe_unused const char * -__engine_name(struct drm_i915_private *i915, intel_engine_mask_t engines) -{ - struct intel_engine_cs *engine; - intel_engine_mask_t tmp; - - if (engines == ALL_ENGINES) - return "all"; - - for_each_engine_masked(engine, i915, engines, tmp) - return engine->name; - - return "none"; -} - -static void mock_barrier_task(void *data) -{ - unsigned int *counter = data; - - ++*counter; -} - -static int mock_context_barrier(void *arg) -{ -#undef pr_fmt -#define pr_fmt(x) "context_barrier_task():" # x - struct drm_i915_private *i915 = arg; - struct i915_gem_context *ctx; - struct i915_request *rq; - unsigned int counter; - int err; - - /* - * The context barrier provides us with a callback after it emits - * a request; useful for retiring old state after loading new. - */ - - mutex_lock(&i915->drm.struct_mutex); - - ctx = mock_context(i915, "mock"); - if (!ctx) { - err = -ENOMEM; - goto unlock; - } - - counter = 0; - err = context_barrier_task(ctx, 0, - NULL, mock_barrier_task, &counter); - if (err) { - pr_err("Failed at line %d, err=%d\n", __LINE__, err); - goto out; - } - if (counter == 0) { - pr_err("Did not retire immediately with 0 engines\n"); - err = -EINVAL; - goto out; - } - - counter = 0; - err = context_barrier_task(ctx, ALL_ENGINES, - NULL, mock_barrier_task, &counter); - if (err) { - pr_err("Failed at line %d, err=%d\n", __LINE__, err); - goto out; - } - if (counter == 0) { - pr_err("Did not retire immediately for all unused engines\n"); - err = -EINVAL; - goto out; - } - - rq = igt_request_alloc(ctx, i915->engine[RCS0]); - if (IS_ERR(rq)) { - pr_err("Request allocation failed!\n"); - goto out; - } - i915_request_add(rq); - - counter = 0; - context_barrier_inject_fault = BIT(RCS0); - err = context_barrier_task(ctx, ALL_ENGINES, - NULL, mock_barrier_task, &counter); - context_barrier_inject_fault = 0; - if (err == -ENXIO) - err = 0; - else - pr_err("Did not hit fault injection!\n"); - if (counter != 0) { - pr_err("Invoked callback on error!\n"); - err = -EIO; - } - if (err) - goto out; - - counter = 0; - err = context_barrier_task(ctx, ALL_ENGINES, - NULL, mock_barrier_task, &counter); - if (err) { - pr_err("Failed at line %d, err=%d\n", __LINE__, err); - goto out; - } - mock_device_flush(i915); - if (counter == 0) { - pr_err("Did not retire on each active engines\n"); - err = -EINVAL; - goto out; - } - -out: - mock_context_close(ctx); -unlock: - mutex_unlock(&i915->drm.struct_mutex); - return err; -#undef pr_fmt -#define pr_fmt(x) x -} - -int i915_gem_context_mock_selftests(void) -{ - static const struct i915_subtest tests[] = { - SUBTEST(mock_context_barrier), - }; - struct drm_i915_private *i915; - int err; - - i915 = mock_gem_device(); - if (!i915) - return -ENOMEM; - - err = i915_subtests(tests, i915); - - drm_dev_put(&i915->drm); - return err; -} - -int i915_gem_context_live_selftests(struct drm_i915_private *dev_priv) -{ - static const struct i915_subtest tests[] = { - SUBTEST(live_nop_switch), - SUBTEST(igt_ctx_exec), - SUBTEST(igt_ctx_readonly), - SUBTEST(igt_ctx_sseu), - SUBTEST(igt_shared_ctx_exec), - SUBTEST(igt_vm_isolation), - }; - - if (i915_terminally_wedged(dev_priv)) - return 0; - - return i915_subtests(tests, dev_priv); -} diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/selftests/i915_gem_dmabuf.c deleted file mode 100644 index cc65a503e2f0..000000000000 --- a/drivers/gpu/drm/i915/selftests/i915_gem_dmabuf.c +++ /dev/null @@ -1,404 +0,0 @@ -/* - * Copyright © 2016 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - */ - -#include "../i915_selftest.h" - -#include "mock_gem_device.h" -#include "mock_dmabuf.h" - -static int igt_dmabuf_export(void *arg) -{ - struct drm_i915_private *i915 = arg; - struct drm_i915_gem_object *obj; - struct dma_buf *dmabuf; - - obj = i915_gem_object_create_shmem(i915, PAGE_SIZE); - if (IS_ERR(obj)) - return PTR_ERR(obj); - - dmabuf = i915_gem_prime_export(&i915->drm, &obj->base, 0); - i915_gem_object_put(obj); - if (IS_ERR(dmabuf)) { - pr_err("i915_gem_prime_export failed with err=%d\n", - (int)PTR_ERR(dmabuf)); - return PTR_ERR(dmabuf); - } - - dma_buf_put(dmabuf); - return 0; -} - -static int igt_dmabuf_import_self(void *arg) -{ - struct drm_i915_private *i915 = arg; - struct drm_i915_gem_object *obj; - struct drm_gem_object *import; - struct dma_buf *dmabuf; - int err; - - obj = i915_gem_object_create_shmem(i915, PAGE_SIZE); - if (IS_ERR(obj)) - return PTR_ERR(obj); - - dmabuf = i915_gem_prime_export(&i915->drm, &obj->base, 0); - if (IS_ERR(dmabuf)) { - pr_err("i915_gem_prime_export failed with err=%d\n", - (int)PTR_ERR(dmabuf)); - err = PTR_ERR(dmabuf); - goto out; - } - - import = i915_gem_prime_import(&i915->drm, dmabuf); - if (IS_ERR(import)) { - pr_err("i915_gem_prime_import failed with err=%d\n", - (int)PTR_ERR(import)); - err = PTR_ERR(import); - goto out_dmabuf; - } - - if (import != &obj->base) { - pr_err("i915_gem_prime_import created a new object!\n"); - err = -EINVAL; - goto out_import; - } - - err = 0; -out_import: - i915_gem_object_put(to_intel_bo(import)); -out_dmabuf: - dma_buf_put(dmabuf); -out: - i915_gem_object_put(obj); - return err; -} - -static int igt_dmabuf_import(void *arg) -{ - struct drm_i915_private *i915 = arg; - struct drm_i915_gem_object *obj; - struct dma_buf *dmabuf; - void *obj_map, *dma_map; - u32 pattern[] = { 0, 0xaa, 0xcc, 0x55, 0xff }; - int err, i; - - dmabuf = mock_dmabuf(1); - if (IS_ERR(dmabuf)) - return PTR_ERR(dmabuf); - - obj = to_intel_bo(i915_gem_prime_import(&i915->drm, dmabuf)); - if (IS_ERR(obj)) { - pr_err("i915_gem_prime_import failed with err=%d\n", - (int)PTR_ERR(obj)); - err = PTR_ERR(obj); - goto out_dmabuf; - } - - if (obj->base.dev != &i915->drm) { - pr_err("i915_gem_prime_import created a non-i915 object!\n"); - err = -EINVAL; - goto out_obj; - } - - if (obj->base.size != PAGE_SIZE) { - pr_err("i915_gem_prime_import is wrong size found %lld, expected %ld\n", - (long long)obj->base.size, PAGE_SIZE); - err = -EINVAL; - goto out_obj; - } - - dma_map = dma_buf_vmap(dmabuf); - if (!dma_map) { - pr_err("dma_buf_vmap failed\n"); - err = -ENOMEM; - goto out_obj; - } - - if (0) { /* Can not yet map dmabuf */ - obj_map = i915_gem_object_pin_map(obj, I915_MAP_WB); - if (IS_ERR(obj_map)) { - err = PTR_ERR(obj_map); - pr_err("i915_gem_object_pin_map failed with err=%d\n", err); - goto out_dma_map; - } - - for (i = 0; i < ARRAY_SIZE(pattern); i++) { - memset(dma_map, pattern[i], PAGE_SIZE); - if (memchr_inv(obj_map, pattern[i], PAGE_SIZE)) { - err = -EINVAL; - pr_err("imported vmap not all set to %x!\n", pattern[i]); - i915_gem_object_unpin_map(obj); - goto out_dma_map; - } - } - - for (i = 0; i < ARRAY_SIZE(pattern); i++) { - memset(obj_map, pattern[i], PAGE_SIZE); - if (memchr_inv(dma_map, pattern[i], PAGE_SIZE)) { - err = -EINVAL; - pr_err("exported vmap not all set to %x!\n", pattern[i]); - i915_gem_object_unpin_map(obj); - goto out_dma_map; - } - } - - i915_gem_object_unpin_map(obj); - } - - err = 0; -out_dma_map: - dma_buf_vunmap(dmabuf, dma_map); -out_obj: - i915_gem_object_put(obj); -out_dmabuf: - dma_buf_put(dmabuf); - return err; -} - -static int igt_dmabuf_import_ownership(void *arg) -{ - struct drm_i915_private *i915 = arg; - struct drm_i915_gem_object *obj; - struct dma_buf *dmabuf; - void *ptr; - int err; - - dmabuf = mock_dmabuf(1); - if (IS_ERR(dmabuf)) - return PTR_ERR(dmabuf); - - ptr = dma_buf_vmap(dmabuf); - if (!ptr) { - pr_err("dma_buf_vmap failed\n"); - err = -ENOMEM; - goto err_dmabuf; - } - - memset(ptr, 0xc5, PAGE_SIZE); - dma_buf_vunmap(dmabuf, ptr); - - obj = to_intel_bo(i915_gem_prime_import(&i915->drm, dmabuf)); - if (IS_ERR(obj)) { - pr_err("i915_gem_prime_import failed with err=%d\n", - (int)PTR_ERR(obj)); - err = PTR_ERR(obj); - goto err_dmabuf; - } - - dma_buf_put(dmabuf); - - err = i915_gem_object_pin_pages(obj); - if (err) { - pr_err("i915_gem_object_pin_pages failed with err=%d\n", err); - goto out_obj; - } - - err = 0; - i915_gem_object_unpin_pages(obj); -out_obj: - i915_gem_object_put(obj); - return err; - -err_dmabuf: - dma_buf_put(dmabuf); - return err; -} - -static int igt_dmabuf_export_vmap(void *arg) -{ - struct drm_i915_private *i915 = arg; - struct drm_i915_gem_object *obj; - struct dma_buf *dmabuf; - void *ptr; - int err; - - obj = i915_gem_object_create_shmem(i915, PAGE_SIZE); - if (IS_ERR(obj)) - return PTR_ERR(obj); - - dmabuf = i915_gem_prime_export(&i915->drm, &obj->base, 0); - if (IS_ERR(dmabuf)) { - pr_err("i915_gem_prime_export failed with err=%d\n", - (int)PTR_ERR(dmabuf)); - err = PTR_ERR(dmabuf); - goto err_obj; - } - i915_gem_object_put(obj); - - ptr = dma_buf_vmap(dmabuf); - if (!ptr) { - pr_err("dma_buf_vmap failed\n"); - err = -ENOMEM; - goto out; - } - - if (memchr_inv(ptr, 0, dmabuf->size)) { - pr_err("Exported object not initialiased to zero!\n"); - err = -EINVAL; - goto out; - } - - memset(ptr, 0xc5, dmabuf->size); - - err = 0; - dma_buf_vunmap(dmabuf, ptr); -out: - dma_buf_put(dmabuf); - return err; - -err_obj: - i915_gem_object_put(obj); - return err; -} - -static int igt_dmabuf_export_kmap(void *arg) -{ - struct drm_i915_private *i915 = arg; - struct drm_i915_gem_object *obj; - struct dma_buf *dmabuf; - void *ptr; - int err; - - obj = i915_gem_object_create_shmem(i915, 2 * PAGE_SIZE); - if (IS_ERR(obj)) - return PTR_ERR(obj); - - dmabuf = i915_gem_prime_export(&i915->drm, &obj->base, 0); - i915_gem_object_put(obj); - if (IS_ERR(dmabuf)) { - err = PTR_ERR(dmabuf); - pr_err("i915_gem_prime_export failed with err=%d\n", err); - return err; - } - - ptr = dma_buf_kmap(dmabuf, 0); - if (!ptr) { - pr_err("dma_buf_kmap failed\n"); - err = -ENOMEM; - goto err; - } - - if (memchr_inv(ptr, 0, PAGE_SIZE)) { - dma_buf_kunmap(dmabuf, 0, ptr); - pr_err("Exported page[0] not initialiased to zero!\n"); - err = -EINVAL; - goto err; - } - - memset(ptr, 0xc5, PAGE_SIZE); - dma_buf_kunmap(dmabuf, 0, ptr); - - ptr = i915_gem_object_pin_map(obj, I915_MAP_WB); - if (IS_ERR(ptr)) { - err = PTR_ERR(ptr); - pr_err("i915_gem_object_pin_map failed with err=%d\n", err); - goto err; - } - memset(ptr + PAGE_SIZE, 0xaa, PAGE_SIZE); - i915_gem_object_flush_map(obj); - i915_gem_object_unpin_map(obj); - - ptr = dma_buf_kmap(dmabuf, 1); - if (!ptr) { - pr_err("dma_buf_kmap failed\n"); - err = -ENOMEM; - goto err; - } - - if (memchr_inv(ptr, 0xaa, PAGE_SIZE)) { - dma_buf_kunmap(dmabuf, 1, ptr); - pr_err("Exported page[1] not set to 0xaa!\n"); - err = -EINVAL; - goto err; - } - - memset(ptr, 0xc5, PAGE_SIZE); - dma_buf_kunmap(dmabuf, 1, ptr); - - ptr = dma_buf_kmap(dmabuf, 0); - if (!ptr) { - pr_err("dma_buf_kmap failed\n"); - err = -ENOMEM; - goto err; - } - if (memchr_inv(ptr, 0xc5, PAGE_SIZE)) { - dma_buf_kunmap(dmabuf, 0, ptr); - pr_err("Exported page[0] did not retain 0xc5!\n"); - err = -EINVAL; - goto err; - } - dma_buf_kunmap(dmabuf, 0, ptr); - - ptr = dma_buf_kmap(dmabuf, 2); - if (ptr) { - pr_err("Erroneously kmapped beyond the end of the object!\n"); - dma_buf_kunmap(dmabuf, 2, ptr); - err = -EINVAL; - goto err; - } - - ptr = dma_buf_kmap(dmabuf, -1); - if (ptr) { - pr_err("Erroneously kmapped before the start of the object!\n"); - dma_buf_kunmap(dmabuf, -1, ptr); - err = -EINVAL; - goto err; - } - - err = 0; -err: - dma_buf_put(dmabuf); - return err; -} - -int i915_gem_dmabuf_mock_selftests(void) -{ - static const struct i915_subtest tests[] = { - SUBTEST(igt_dmabuf_export), - SUBTEST(igt_dmabuf_import_self), - SUBTEST(igt_dmabuf_import), - SUBTEST(igt_dmabuf_import_ownership), - SUBTEST(igt_dmabuf_export_vmap), - SUBTEST(igt_dmabuf_export_kmap), - }; - struct drm_i915_private *i915; - int err; - - i915 = mock_gem_device(); - if (!i915) - return -ENOMEM; - - err = i915_subtests(tests, i915); - - drm_dev_put(&i915->drm); - return err; -} - -int i915_gem_dmabuf_live_selftests(struct drm_i915_private *i915) -{ - static const struct i915_subtest tests[] = { - SUBTEST(igt_dmabuf_export), - }; - - return i915_subtests(tests, i915); -} diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c index 4fc6e5445dd1..1d8235303edf 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c @@ -22,11 +22,13 @@ * */ -#include "../i915_selftest.h" +#include "gem/i915_gem_pm.h" +#include "gem/selftests/igt_gem_utils.h" +#include "gem/selftests/mock_context.h" + +#include "i915_selftest.h" -#include "igt_gem_utils.h" #include "lib_sw_fence.h" -#include "mock_context.h" #include "mock_drm.h" #include "mock_gem_device.h" diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c index 9cca66e4420a..f1e95eaf6923 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c @@ -25,10 +25,11 @@ #include #include -#include "../i915_selftest.h" +#include "gem/selftests/mock_context.h" + #include "i915_random.h" +#include "i915_selftest.h" -#include "mock_context.h" #include "mock_drm.h" #include "mock_gem_device.h" diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_object.c b/drivers/gpu/drm/i915/selftests/i915_gem_object.c deleted file mode 100644 index a3dd2f1be95b..000000000000 --- a/drivers/gpu/drm/i915/selftests/i915_gem_object.c +++ /dev/null @@ -1,117 +0,0 @@ -/* - * Copyright © 2016 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - */ - -#include "../i915_selftest.h" - -#include "igt_flush_test.h" -#include "mock_gem_device.h" -#include "huge_gem_object.h" - -static int igt_gem_object(void *arg) -{ - struct drm_i915_private *i915 = arg; - struct drm_i915_gem_object *obj; - int err = -ENOMEM; - - /* Basic test to ensure we can create an object */ - - obj = i915_gem_object_create_shmem(i915, PAGE_SIZE); - if (IS_ERR(obj)) { - err = PTR_ERR(obj); - pr_err("i915_gem_object_create failed, err=%d\n", err); - goto out; - } - - err = 0; - i915_gem_object_put(obj); -out: - return err; -} - -static int igt_gem_huge(void *arg) -{ - const unsigned int nreal = 509; /* just to be awkward */ - struct drm_i915_private *i915 = arg; - struct drm_i915_gem_object *obj; - unsigned int n; - int err; - - /* Basic sanitycheck of our huge fake object allocation */ - - obj = huge_gem_object(i915, - nreal * PAGE_SIZE, - i915->ggtt.vm.total + PAGE_SIZE); - if (IS_ERR(obj)) - return PTR_ERR(obj); - - err = i915_gem_object_pin_pages(obj); - if (err) { - pr_err("Failed to allocate %u pages (%lu total), err=%d\n", - nreal, obj->base.size / PAGE_SIZE, err); - goto out; - } - - for (n = 0; n < obj->base.size / PAGE_SIZE; n++) { - if (i915_gem_object_get_page(obj, n) != - i915_gem_object_get_page(obj, n % nreal)) { - pr_err("Page lookup mismatch at index %u [%u]\n", - n, n % nreal); - err = -EINVAL; - goto out_unpin; - } - } - -out_unpin: - i915_gem_object_unpin_pages(obj); -out: - i915_gem_object_put(obj); - return err; -} - -int i915_gem_object_mock_selftests(void) -{ - static const struct i915_subtest tests[] = { - SUBTEST(igt_gem_object), - }; - struct drm_i915_private *i915; - int err; - - i915 = mock_gem_device(); - if (!i915) - return -ENOMEM; - - err = i915_subtests(tests, i915); - - drm_dev_put(&i915->drm); - return err; -} - -int i915_gem_object_live_selftests(struct drm_i915_private *i915) -{ - static const struct i915_subtest tests[] = { - SUBTEST(igt_gem_huge), - }; - - return i915_subtests(tests, i915); -} diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c index b60591531e4a..4fd5356c6577 100644 --- a/drivers/gpu/drm/i915/selftests/i915_request.c +++ b/drivers/gpu/drm/i915/selftests/i915_request.c @@ -24,12 +24,14 @@ #include -#include "../i915_selftest.h" +#include "gem/i915_gem_pm.h" +#include "gem/selftests/mock_context.h" + #include "i915_random.h" +#include "i915_selftest.h" #include "igt_live_test.h" #include "lib_sw_fence.h" -#include "mock_context.h" #include "mock_drm.h" #include "mock_gem_device.h" diff --git a/drivers/gpu/drm/i915/selftests/i915_timeline.c b/drivers/gpu/drm/i915/selftests/i915_timeline.c index ff9ebe50fae8..acb2cc5136b7 100644 --- a/drivers/gpu/drm/i915/selftests/i915_timeline.c +++ b/drivers/gpu/drm/i915/selftests/i915_timeline.c @@ -6,8 +6,10 @@ #include -#include "../i915_selftest.h" +#include "gem/i915_gem_pm.h" + #include "i915_random.h" +#include "i915_selftest.h" #include "igt_flush_test.h" #include "mock_gem_device.h" diff --git a/drivers/gpu/drm/i915/selftests/i915_vma.c b/drivers/gpu/drm/i915/selftests/i915_vma.c index 0027c1fac336..425b76133850 100644 --- a/drivers/gpu/drm/i915/selftests/i915_vma.c +++ b/drivers/gpu/drm/i915/selftests/i915_vma.c @@ -24,10 +24,11 @@ #include -#include "../i915_selftest.h" +#include "gem/selftests/mock_context.h" + +#include "i915_selftest.h" #include "mock_gem_device.h" -#include "mock_context.h" #include "mock_gtt.h" static bool assert_vma(struct i915_vma *vma, diff --git a/drivers/gpu/drm/i915/selftests/igt_flush_test.c b/drivers/gpu/drm/i915/selftests/igt_flush_test.c index e42f3c58536a..5bfd1b2626a2 100644 --- a/drivers/gpu/drm/i915/selftests/igt_flush_test.c +++ b/drivers/gpu/drm/i915/selftests/igt_flush_test.c @@ -4,9 +4,11 @@ * Copyright © 2018 Intel Corporation */ -#include "../i915_drv.h" +#include "gem/i915_gem_context.h" + +#include "i915_drv.h" +#include "i915_selftest.h" -#include "../i915_selftest.h" #include "igt_flush_test.h" int igt_flush_test(struct drm_i915_private *i915, unsigned int flags) diff --git a/drivers/gpu/drm/i915/selftests/igt_gem_utils.c b/drivers/gpu/drm/i915/selftests/igt_gem_utils.c deleted file mode 100644 index 16891b1a3e50..000000000000 --- a/drivers/gpu/drm/i915/selftests/igt_gem_utils.c +++ /dev/null @@ -1,34 +0,0 @@ -/* - * SPDX-License-Identifier: MIT - * - * Copyright © 2018 Intel Corporation - */ - -#include "igt_gem_utils.h" - -#include "gt/intel_context.h" - -#include "../i915_gem_context.h" -#include "../i915_gem_pm.h" -#include "../i915_request.h" - -struct i915_request * -igt_request_alloc(struct i915_gem_context *ctx, struct intel_engine_cs *engine) -{ - struct intel_context *ce; - struct i915_request *rq; - - /* - * Pinning the contexts may generate requests in order to acquire - * GGTT space, so do this first before we reserve a seqno for - * ourselves. - */ - ce = i915_gem_context_get_engine(ctx, engine->id); - if (IS_ERR(ce)) - return ERR_CAST(ce); - - rq = intel_context_create_request(ce); - intel_context_put(ce); - - return rq; -} diff --git a/drivers/gpu/drm/i915/selftests/igt_gem_utils.h b/drivers/gpu/drm/i915/selftests/igt_gem_utils.h deleted file mode 100644 index 0f17251cf75d..000000000000 --- a/drivers/gpu/drm/i915/selftests/igt_gem_utils.h +++ /dev/null @@ -1,17 +0,0 @@ -/* - * SPDX-License-Identifier: MIT - * - * Copyright © 2018 Intel Corporation - */ - -#ifndef __IGT_GEM_UTILS_H__ -#define __IGT_GEM_UTILS_H__ - -struct i915_request; -struct i915_gem_context; -struct intel_engine_cs; - -struct i915_request * -igt_request_alloc(struct i915_gem_context *ctx, struct intel_engine_cs *engine); - -#endif /* __IGT_GEM_UTILS_H__ */ diff --git a/drivers/gpu/drm/i915/selftests/igt_spinner.c b/drivers/gpu/drm/i915/selftests/igt_spinner.c index ece8a8a0d3b0..38d6f1b10c54 100644 --- a/drivers/gpu/drm/i915/selftests/igt_spinner.c +++ b/drivers/gpu/drm/i915/selftests/igt_spinner.c @@ -4,7 +4,8 @@ * Copyright © 2018 Intel Corporation */ -#include "igt_gem_utils.h" +#include "gem/selftests/igt_gem_utils.h" + #include "igt_spinner.h" int igt_spinner_init(struct igt_spinner *spin, struct drm_i915_private *i915) diff --git a/drivers/gpu/drm/i915/selftests/igt_spinner.h b/drivers/gpu/drm/i915/selftests/igt_spinner.h index d312e7cdab68..34a88ac9b47a 100644 --- a/drivers/gpu/drm/i915/selftests/igt_spinner.h +++ b/drivers/gpu/drm/i915/selftests/igt_spinner.h @@ -7,13 +7,12 @@ #ifndef __I915_SELFTESTS_IGT_SPINNER_H__ #define __I915_SELFTESTS_IGT_SPINNER_H__ -#include "../i915_selftest.h" - +#include "gem/i915_gem_context.h" #include "gt/intel_engine.h" -#include "../i915_drv.h" -#include "../i915_request.h" -#include "../i915_gem_context.h" +#include "i915_drv.h" +#include "i915_request.h" +#include "i915_selftest.h" struct igt_spinner { struct drm_i915_private *i915; diff --git a/drivers/gpu/drm/i915/selftests/intel_guc.c b/drivers/gpu/drm/i915/selftests/intel_guc.c index b05a21eaa8f4..7fd0321e0947 100644 --- a/drivers/gpu/drm/i915/selftests/intel_guc.c +++ b/drivers/gpu/drm/i915/selftests/intel_guc.c @@ -22,7 +22,8 @@ * */ -#include "../i915_selftest.h" +#include "i915_selftest.h" +#include "gem/i915_gem_pm.h" /* max doorbell number + negative test for each client type */ #define ATTEMPTS (GUC_NUM_DOORBELLS + GUC_CLIENT_PRIORITY_NUM) diff --git a/drivers/gpu/drm/i915/selftests/mock_context.c b/drivers/gpu/drm/i915/selftests/mock_context.c deleted file mode 100644 index 10e67c931ed1..000000000000 --- a/drivers/gpu/drm/i915/selftests/mock_context.c +++ /dev/null @@ -1,129 +0,0 @@ -/* - * Copyright © 2016 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - */ - -#include "mock_context.h" -#include "mock_gtt.h" - -struct i915_gem_context * -mock_context(struct drm_i915_private *i915, - const char *name) -{ - struct i915_gem_context *ctx; - struct i915_gem_engines *e; - int ret; - - ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); - if (!ctx) - return NULL; - - kref_init(&ctx->ref); - INIT_LIST_HEAD(&ctx->link); - ctx->i915 = i915; - - mutex_init(&ctx->engines_mutex); - e = default_engines(ctx); - if (IS_ERR(e)) - goto err_free; - RCU_INIT_POINTER(ctx->engines, e); - - INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL); - INIT_LIST_HEAD(&ctx->handles_list); - INIT_LIST_HEAD(&ctx->hw_id_link); - mutex_init(&ctx->mutex); - - ret = i915_gem_context_pin_hw_id(ctx); - if (ret < 0) - goto err_engines; - - if (name) { - struct i915_hw_ppgtt *ppgtt; - - ctx->name = kstrdup(name, GFP_KERNEL); - if (!ctx->name) - goto err_put; - - ppgtt = mock_ppgtt(i915, name); - if (!ppgtt) - goto err_put; - - __set_ppgtt(ctx, ppgtt); - } - - return ctx; - -err_engines: - free_engines(rcu_access_pointer(ctx->engines)); -err_free: - kfree(ctx); - return NULL; - -err_put: - i915_gem_context_set_closed(ctx); - i915_gem_context_put(ctx); - return NULL; -} - -void mock_context_close(struct i915_gem_context *ctx) -{ - context_close(ctx); -} - -void mock_init_contexts(struct drm_i915_private *i915) -{ - init_contexts(i915); -} - -struct i915_gem_context * -live_context(struct drm_i915_private *i915, struct drm_file *file) -{ - struct i915_gem_context *ctx; - int err; - - lockdep_assert_held(&i915->drm.struct_mutex); - - ctx = i915_gem_create_context(i915, 0); - if (IS_ERR(ctx)) - return ctx; - - err = gem_context_register(ctx, file->driver_priv); - if (err < 0) - goto err_ctx; - - return ctx; - -err_ctx: - context_close(ctx); - return ERR_PTR(err); -} - -struct i915_gem_context * -kernel_context(struct drm_i915_private *i915) -{ - return i915_gem_context_create_kernel(i915, I915_PRIORITY_NORMAL); -} - -void kernel_context_close(struct i915_gem_context *ctx) -{ - context_close(ctx); -} diff --git a/drivers/gpu/drm/i915/selftests/mock_context.h b/drivers/gpu/drm/i915/selftests/mock_context.h deleted file mode 100644 index 29b9d60a158b..000000000000 --- a/drivers/gpu/drm/i915/selftests/mock_context.h +++ /dev/null @@ -1,42 +0,0 @@ -/* - * Copyright © 2016 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - */ - -#ifndef __MOCK_CONTEXT_H -#define __MOCK_CONTEXT_H - -void mock_init_contexts(struct drm_i915_private *i915); - -struct i915_gem_context * -mock_context(struct drm_i915_private *i915, - const char *name); - -void mock_context_close(struct i915_gem_context *ctx); - -struct i915_gem_context * -live_context(struct drm_i915_private *i915, struct drm_file *file); - -struct i915_gem_context *kernel_context(struct drm_i915_private *i915); -void kernel_context_close(struct i915_gem_context *ctx); - -#endif /* !__MOCK_CONTEXT_H */ diff --git a/drivers/gpu/drm/i915/selftests/mock_dmabuf.c b/drivers/gpu/drm/i915/selftests/mock_dmabuf.c deleted file mode 100644 index ca682caf1062..000000000000 --- a/drivers/gpu/drm/i915/selftests/mock_dmabuf.c +++ /dev/null @@ -1,162 +0,0 @@ -/* - * Copyright © 2016 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - */ - -#include "mock_dmabuf.h" - -static struct sg_table *mock_map_dma_buf(struct dma_buf_attachment *attachment, - enum dma_data_direction dir) -{ - struct mock_dmabuf *mock = to_mock(attachment->dmabuf); - struct sg_table *st; - struct scatterlist *sg; - int i, err; - - st = kmalloc(sizeof(*st), GFP_KERNEL); - if (!st) - return ERR_PTR(-ENOMEM); - - err = sg_alloc_table(st, mock->npages, GFP_KERNEL); - if (err) - goto err_free; - - sg = st->sgl; - for (i = 0; i < mock->npages; i++) { - sg_set_page(sg, mock->pages[i], PAGE_SIZE, 0); - sg = sg_next(sg); - } - - if (!dma_map_sg(attachment->dev, st->sgl, st->nents, dir)) { - err = -ENOMEM; - goto err_st; - } - - return st; - -err_st: - sg_free_table(st); -err_free: - kfree(st); - return ERR_PTR(err); -} - -static void mock_unmap_dma_buf(struct dma_buf_attachment *attachment, - struct sg_table *st, - enum dma_data_direction dir) -{ - dma_unmap_sg(attachment->dev, st->sgl, st->nents, dir); - sg_free_table(st); - kfree(st); -} - -static void mock_dmabuf_release(struct dma_buf *dma_buf) -{ - struct mock_dmabuf *mock = to_mock(dma_buf); - int i; - - for (i = 0; i < mock->npages; i++) - put_page(mock->pages[i]); - - kfree(mock); -} - -static void *mock_dmabuf_vmap(struct dma_buf *dma_buf) -{ - struct mock_dmabuf *mock = to_mock(dma_buf); - - return vm_map_ram(mock->pages, mock->npages, 0, PAGE_KERNEL); -} - -static void mock_dmabuf_vunmap(struct dma_buf *dma_buf, void *vaddr) -{ - struct mock_dmabuf *mock = to_mock(dma_buf); - - vm_unmap_ram(vaddr, mock->npages); -} - -static void *mock_dmabuf_kmap(struct dma_buf *dma_buf, unsigned long page_num) -{ - struct mock_dmabuf *mock = to_mock(dma_buf); - - return kmap(mock->pages[page_num]); -} - -static void mock_dmabuf_kunmap(struct dma_buf *dma_buf, unsigned long page_num, void *addr) -{ - struct mock_dmabuf *mock = to_mock(dma_buf); - - return kunmap(mock->pages[page_num]); -} - -static int mock_dmabuf_mmap(struct dma_buf *dma_buf, struct vm_area_struct *vma) -{ - return -ENODEV; -} - -static const struct dma_buf_ops mock_dmabuf_ops = { - .map_dma_buf = mock_map_dma_buf, - .unmap_dma_buf = mock_unmap_dma_buf, - .release = mock_dmabuf_release, - .map = mock_dmabuf_kmap, - .unmap = mock_dmabuf_kunmap, - .mmap = mock_dmabuf_mmap, - .vmap = mock_dmabuf_vmap, - .vunmap = mock_dmabuf_vunmap, -}; - -static struct dma_buf *mock_dmabuf(int npages) -{ - struct mock_dmabuf *mock; - DEFINE_DMA_BUF_EXPORT_INFO(exp_info); - struct dma_buf *dmabuf; - int i; - - mock = kmalloc(sizeof(*mock) + npages * sizeof(struct page *), - GFP_KERNEL); - if (!mock) - return ERR_PTR(-ENOMEM); - - mock->npages = npages; - for (i = 0; i < npages; i++) { - mock->pages[i] = alloc_page(GFP_KERNEL); - if (!mock->pages[i]) - goto err; - } - - exp_info.ops = &mock_dmabuf_ops; - exp_info.size = npages * PAGE_SIZE; - exp_info.flags = O_CLOEXEC; - exp_info.priv = mock; - - dmabuf = dma_buf_export(&exp_info); - if (IS_ERR(dmabuf)) - goto err; - - return dmabuf; - -err: - while (i--) - put_page(mock->pages[i]); - kfree(mock); - return ERR_PTR(-ENOMEM); -} diff --git a/drivers/gpu/drm/i915/selftests/mock_dmabuf.h b/drivers/gpu/drm/i915/selftests/mock_dmabuf.h deleted file mode 100644 index ec80613159b9..000000000000 --- a/drivers/gpu/drm/i915/selftests/mock_dmabuf.h +++ /dev/null @@ -1,41 +0,0 @@ - -/* - * Copyright © 2016 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - */ - -#ifndef __MOCK_DMABUF_H__ -#define __MOCK_DMABUF_H__ - -#include - -struct mock_dmabuf { - int npages; - struct page *pages[]; -}; - -static struct mock_dmabuf *to_mock(struct dma_buf *buf) -{ - return buf->priv; -} - -#endif /* !__MOCK_DMABUF_H__ */ diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c index 9fd02025d382..e25b74a27f83 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c @@ -27,13 +27,14 @@ #include "gt/mock_engine.h" -#include "mock_context.h" #include "mock_request.h" #include "mock_gem_device.h" -#include "mock_gem_object.h" #include "mock_gtt.h" #include "mock_uncore.h" +#include "gem/selftests/mock_context.h" +#include "gem/selftests/mock_gem_object.h" + void mock_device_flush(struct drm_i915_private *i915) { struct intel_engine_cs *engine; diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_object.h b/drivers/gpu/drm/i915/selftests/mock_gem_object.h deleted file mode 100644 index 20acdbee7bd0..000000000000 --- a/drivers/gpu/drm/i915/selftests/mock_gem_object.h +++ /dev/null @@ -1,9 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __MOCK_GEM_OBJECT_H__ -#define __MOCK_GEM_OBJECT_H__ - -struct mock_object { - struct drm_i915_gem_object base; -}; - -#endif /* !__MOCK_GEM_OBJECT_H__ */ diff --git a/drivers/gpu/drm/i915/selftests/mock_request.c b/drivers/gpu/drm/i915/selftests/mock_request.c index b99f7576153c..9390fc09984b 100644 --- a/drivers/gpu/drm/i915/selftests/mock_request.c +++ b/drivers/gpu/drm/i915/selftests/mock_request.c @@ -22,9 +22,9 @@ * */ +#include "gem/selftests/igt_gem_utils.h" #include "gt/mock_engine.h" -#include "igt_gem_utils.h" #include "mock_request.h" struct i915_request * -- cgit v1.2.3