summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c')
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c110
1 files changed, 76 insertions, 34 deletions
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 845023c14eb3..f266b68cf012 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -30,6 +30,7 @@
#include "i915_gem_context.h"
#include "i915_gem_evict.h"
#include "i915_gem_ioctls.h"
+#include "i915_reg.h"
#include "i915_trace.h"
#include "i915_user_extensions.h"
@@ -53,13 +54,13 @@ enum {
#define DBG_FORCE_RELOC 0 /* choose one of the above! */
};
-/* __EXEC_OBJECT_NO_RESERVE is BIT(31), defined in i915_vma.h */
-#define __EXEC_OBJECT_HAS_PIN BIT(30)
-#define __EXEC_OBJECT_HAS_FENCE BIT(29)
-#define __EXEC_OBJECT_USERPTR_INIT BIT(28)
-#define __EXEC_OBJECT_NEEDS_MAP BIT(27)
-#define __EXEC_OBJECT_NEEDS_BIAS BIT(26)
-#define __EXEC_OBJECT_INTERNAL_FLAGS (~0u << 26) /* all of the above + */
+/* __EXEC_OBJECT_ flags > BIT(29) defined in i915_vma.h */
+#define __EXEC_OBJECT_HAS_PIN BIT(29)
+#define __EXEC_OBJECT_HAS_FENCE BIT(28)
+#define __EXEC_OBJECT_USERPTR_INIT BIT(27)
+#define __EXEC_OBJECT_NEEDS_MAP BIT(26)
+#define __EXEC_OBJECT_NEEDS_BIAS BIT(25)
+#define __EXEC_OBJECT_INTERNAL_FLAGS (~0u << 25) /* all of the above + */
#define __EXEC_OBJECT_RESERVED (__EXEC_OBJECT_HAS_PIN | __EXEC_OBJECT_HAS_FENCE)
#define __EXEC_HAS_RELOC BIT(31)
@@ -729,37 +730,74 @@ static int eb_reserve(struct i915_execbuffer *eb)
bool unpinned;
/*
- * Attempt to pin all of the buffers into the GTT.
- * This is done in 2 phases:
+ * We have one more buffers that we couldn't bind, which could be due to
+ * various reasons. To resolve this we have 4 passes, with every next
+ * level turning the screws tighter:
*
- * 1. Unbind all objects that do not match the GTT constraints for
- * the execbuffer (fenceable, mappable, alignment etc).
- * 2. Bind new objects.
+ * 0. Unbind all objects that do not match the GTT constraints for the
+ * execbuffer (fenceable, mappable, alignment etc). Bind all new
+ * objects. This avoids unnecessary unbinding of later objects in order
+ * to make room for the earlier objects *unless* we need to defragment.
*
- * This avoid unnecessary unbinding of later objects in order to make
- * room for the earlier objects *unless* we need to defragment.
+ * 1. Reorder the buffers, where objects with the most restrictive
+ * placement requirements go first (ignoring fixed location buffers for
+ * now). For example, objects needing the mappable aperture (the first
+ * 256M of GTT), should go first vs objects that can be placed just
+ * about anywhere. Repeat the previous pass.
*
- * Defragmenting is skipped if all objects are pinned at a fixed location.
+ * 2. Consider buffers that are pinned at a fixed location. Also try to
+ * evict the entire VM this time, leaving only objects that we were
+ * unable to lock. Try again to bind the buffers. (still using the new
+ * buffer order).
+ *
+ * 3. We likely have object lock contention for one or more stubborn
+ * objects in the VM, for which we need to evict to make forward
+ * progress (perhaps we are fighting the shrinker?). When evicting the
+ * VM this time around, anything that we can't lock we now track using
+ * the busy_bo, using the full lock (after dropping the vm->mutex to
+ * prevent deadlocks), instead of trylock. We then continue to evict the
+ * VM, this time with the stubborn object locked, which we can now
+ * hopefully unbind (if still bound in the VM). Repeat until the VM is
+ * evicted. Finally we should be able bind everything.
*/
- for (pass = 0; pass <= 2; pass++) {
+ for (pass = 0; pass <= 3; pass++) {
int pin_flags = PIN_USER | PIN_VALIDATE;
if (pass == 0)
pin_flags |= PIN_NONBLOCK;
if (pass >= 1)
- unpinned = eb_unbind(eb, pass == 2);
+ unpinned = eb_unbind(eb, pass >= 2);
if (pass == 2) {
err = mutex_lock_interruptible(&eb->context->vm->mutex);
if (!err) {
- err = i915_gem_evict_vm(eb->context->vm, &eb->ww);
+ err = i915_gem_evict_vm(eb->context->vm, &eb->ww, NULL);
mutex_unlock(&eb->context->vm->mutex);
}
if (err)
return err;
}
+ if (pass == 3) {
+retry:
+ err = mutex_lock_interruptible(&eb->context->vm->mutex);
+ if (!err) {
+ struct drm_i915_gem_object *busy_bo = NULL;
+
+ err = i915_gem_evict_vm(eb->context->vm, &eb->ww, &busy_bo);
+ mutex_unlock(&eb->context->vm->mutex);
+ if (err && busy_bo) {
+ err = i915_gem_object_lock(busy_bo, &eb->ww);
+ i915_gem_object_put(busy_bo);
+ if (!err)
+ goto retry;
+ }
+ }
+ if (err)
+ return err;
+ }
+
list_for_each_entry(ev, &eb->unbound, bind_link) {
err = eb_reserve_vma(eb, ev, pin_flags);
if (err)
@@ -2101,7 +2139,8 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb)
eb->composite_fence ?
eb->composite_fence :
&eb->requests[j]->fence,
- flags | __EXEC_OBJECT_NO_RESERVE);
+ flags | __EXEC_OBJECT_NO_RESERVE |
+ __EXEC_OBJECT_NO_REQUEST_AWAIT);
}
}
@@ -2148,7 +2187,8 @@ err_skip:
return err;
}
-static int i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec)
+static int i915_gem_check_execbuffer(struct drm_i915_private *i915,
+ struct drm_i915_gem_execbuffer2 *exec)
{
if (exec->flags & __I915_EXEC_ILLEGAL_FLAGS)
return -EINVAL;
@@ -2161,7 +2201,7 @@ static int i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec)
}
if (exec->DR4 == 0xffffffff) {
- DRM_DEBUG("UXA submitting garbage DR4, fixing up\n");
+ drm_dbg(&i915->drm, "UXA submitting garbage DR4, fixing up\n");
exec->DR4 = 0;
}
if (exec->DR1 || exec->DR4)
@@ -2424,7 +2464,7 @@ gen8_dispatch_bsd_engine(struct drm_i915_private *dev_priv,
/* Check whether the file_priv has already selected one ring. */
if ((int)file_priv->bsd_engine < 0)
file_priv->bsd_engine =
- prandom_u32_max(num_vcs_engines(dev_priv));
+ get_random_u32_below(num_vcs_engines(dev_priv));
return file_priv->bsd_engine;
}
@@ -2799,7 +2839,8 @@ add_timeline_fence_array(struct i915_execbuffer *eb,
syncobj = drm_syncobj_find(eb->file, user_fence.handle);
if (!syncobj) {
- DRM_DEBUG("Invalid syncobj handle provided\n");
+ drm_dbg(&eb->i915->drm,
+ "Invalid syncobj handle provided\n");
return -ENOENT;
}
@@ -2807,7 +2848,8 @@ add_timeline_fence_array(struct i915_execbuffer *eb,
if (!fence && user_fence.flags &&
!(user_fence.flags & I915_EXEC_FENCE_SIGNAL)) {
- DRM_DEBUG("Syncobj handle has no fence\n");
+ drm_dbg(&eb->i915->drm,
+ "Syncobj handle has no fence\n");
drm_syncobj_put(syncobj);
return -EINVAL;
}
@@ -2816,7 +2858,9 @@ add_timeline_fence_array(struct i915_execbuffer *eb,
err = dma_fence_chain_find_seqno(&fence, point);
if (err && !(user_fence.flags & I915_EXEC_FENCE_SIGNAL)) {
- DRM_DEBUG("Syncobj handle missing requested point %llu\n", point);
+ drm_dbg(&eb->i915->drm,
+ "Syncobj handle missing requested point %llu\n",
+ point);
dma_fence_put(fence);
drm_syncobj_put(syncobj);
return err;
@@ -2842,7 +2886,8 @@ add_timeline_fence_array(struct i915_execbuffer *eb,
* 0) would break the timeline.
*/
if (user_fence.flags & I915_EXEC_FENCE_WAIT) {
- DRM_DEBUG("Trying to wait & signal the same timeline point.\n");
+ drm_dbg(&eb->i915->drm,
+ "Trying to wait & signal the same timeline point.\n");
dma_fence_put(fence);
drm_syncobj_put(syncobj);
return -EINVAL;
@@ -2913,14 +2958,16 @@ static int add_fence_array(struct i915_execbuffer *eb)
syncobj = drm_syncobj_find(eb->file, user_fence.handle);
if (!syncobj) {
- DRM_DEBUG("Invalid syncobj handle provided\n");
+ drm_dbg(&eb->i915->drm,
+ "Invalid syncobj handle provided\n");
return -ENOENT;
}
if (user_fence.flags & I915_EXEC_FENCE_WAIT) {
fence = drm_syncobj_fence_get(syncobj);
if (!fence) {
- DRM_DEBUG("Syncobj handle has no fence\n");
+ drm_dbg(&eb->i915->drm,
+ "Syncobj handle has no fence\n");
drm_syncobj_put(syncobj);
return -EINVAL;
}
@@ -2954,11 +3001,6 @@ await_fence_array(struct i915_execbuffer *eb,
int err;
for (n = 0; n < eb->num_fences; n++) {
- struct drm_syncobj *syncobj;
- unsigned int flags;
-
- syncobj = ptr_unpack_bits(eb->fences[n].syncobj, &flags, 2);
-
if (!eb->fences[n].dma_fence)
continue;
@@ -3520,7 +3562,7 @@ i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data,
return -EINVAL;
}
- err = i915_gem_check_execbuffer(args);
+ err = i915_gem_check_execbuffer(i915, args);
if (err)
return err;