summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/i915/gt/selftest_hangcheck.c')
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_hangcheck.c173
1 files changed, 167 insertions, 6 deletions
diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
index fb5ebf930ab2..463bb6a700c8 100644
--- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
+++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
@@ -506,7 +506,8 @@ static int igt_reset_nop_engine(void *arg)
}
err = intel_engine_reset(engine, NULL);
if (err) {
- pr_err("i915_reset_engine failed\n");
+ pr_err("intel_engine_reset(%s) failed, err:%d\n",
+ engine->name, err);
break;
}
@@ -539,6 +540,149 @@ static int igt_reset_nop_engine(void *arg)
return 0;
}
+static void force_reset_timeout(struct intel_engine_cs *engine)
+{
+ engine->reset_timeout.probability = 999;
+ atomic_set(&engine->reset_timeout.times, -1);
+}
+
+static void cancel_reset_timeout(struct intel_engine_cs *engine)
+{
+ memset(&engine->reset_timeout, 0, sizeof(engine->reset_timeout));
+}
+
+static int igt_reset_fail_engine(void *arg)
+{
+ struct intel_gt *gt = arg;
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+
+ /* Check that we can recover from engine-reset failues */
+
+ if (!intel_has_reset_engine(gt))
+ return 0;
+
+ for_each_engine(engine, gt, id) {
+ unsigned int count;
+ struct intel_context *ce;
+ IGT_TIMEOUT(end_time);
+ int err;
+
+ ce = intel_context_create(engine);
+ if (IS_ERR(ce))
+ return PTR_ERR(ce);
+
+ st_engine_heartbeat_disable(engine);
+ set_bit(I915_RESET_ENGINE + id, &gt->reset.flags);
+
+ force_reset_timeout(engine);
+ err = intel_engine_reset(engine, NULL);
+ cancel_reset_timeout(engine);
+ if (err == 0) /* timeouts only generated on gen8+ */
+ goto skip;
+
+ count = 0;
+ do {
+ struct i915_request *last = NULL;
+ int i;
+
+ if (!wait_for_idle(engine)) {
+ pr_err("%s failed to idle before reset\n",
+ engine->name);
+ err = -EIO;
+ break;
+ }
+
+ for (i = 0; i < count % 15; i++) {
+ struct i915_request *rq;
+
+ rq = intel_context_create_request(ce);
+ if (IS_ERR(rq)) {
+ struct drm_printer p =
+ drm_info_printer(gt->i915->drm.dev);
+ intel_engine_dump(engine, &p,
+ "%s(%s): failed to submit request\n",
+ __func__,
+ engine->name);
+
+ GEM_TRACE("%s(%s): failed to submit request\n",
+ __func__,
+ engine->name);
+ GEM_TRACE_DUMP();
+
+ intel_gt_set_wedged(gt);
+ if (last)
+ i915_request_put(last);
+
+ err = PTR_ERR(rq);
+ goto out;
+ }
+
+ if (last)
+ i915_request_put(last);
+ last = i915_request_get(rq);
+ i915_request_add(rq);
+ }
+
+ if (count & 1) {
+ err = intel_engine_reset(engine, NULL);
+ if (err) {
+ GEM_TRACE_ERR("intel_engine_reset(%s) failed, err:%d\n",
+ engine->name, err);
+ GEM_TRACE_DUMP();
+ i915_request_put(last);
+ break;
+ }
+ } else {
+ force_reset_timeout(engine);
+ err = intel_engine_reset(engine, NULL);
+ cancel_reset_timeout(engine);
+ if (err != -ETIMEDOUT) {
+ pr_err("intel_engine_reset(%s) did not fail, err:%d\n",
+ engine->name, err);
+ i915_request_put(last);
+ break;
+ }
+ }
+
+ err = 0;
+ if (last) {
+ if (i915_request_wait(last, 0, HZ / 2) < 0) {
+ struct drm_printer p =
+ drm_info_printer(gt->i915->drm.dev);
+
+ intel_engine_dump(engine, &p,
+ "%s(%s): failed to complete request\n",
+ __func__,
+ engine->name);
+
+ GEM_TRACE("%s(%s): failed to complete request\n",
+ __func__,
+ engine->name);
+ GEM_TRACE_DUMP();
+
+ err = -EIO;
+ }
+ i915_request_put(last);
+ }
+ count++;
+ } while (err == 0 && time_before(jiffies, end_time));
+out:
+ pr_info("%s(%s): %d resets\n", __func__, engine->name, count);
+skip:
+ clear_bit(I915_RESET_ENGINE + id, &gt->reset.flags);
+ st_engine_heartbeat_enable(engine);
+ intel_context_put(ce);
+
+ if (igt_flush_test(gt->i915))
+ err = -EIO;
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
static int __igt_reset_engine(struct intel_gt *gt, bool active)
{
struct i915_gpu_error *global = &gt->i915->gpu_error;
@@ -560,6 +704,7 @@ static int __igt_reset_engine(struct intel_gt *gt, bool active)
for_each_engine(engine, gt, id) {
unsigned int reset_count, reset_engine_count;
+ unsigned long count;
IGT_TIMEOUT(end_time);
if (active && !intel_engine_can_store_dword(engine))
@@ -577,6 +722,7 @@ static int __igt_reset_engine(struct intel_gt *gt, bool active)
st_engine_heartbeat_disable(engine);
set_bit(I915_RESET_ENGINE + id, &gt->reset.flags);
+ count = 0;
do {
if (active) {
struct i915_request *rq;
@@ -608,7 +754,8 @@ static int __igt_reset_engine(struct intel_gt *gt, bool active)
err = intel_engine_reset(engine, NULL);
if (err) {
- pr_err("i915_reset_engine failed\n");
+ pr_err("intel_engine_reset(%s) failed, err:%d\n",
+ engine->name, err);
break;
}
@@ -625,9 +772,13 @@ static int __igt_reset_engine(struct intel_gt *gt, bool active)
err = -EINVAL;
break;
}
+
+ count++;
} while (time_before(jiffies, end_time));
clear_bit(I915_RESET_ENGINE + id, &gt->reset.flags);
st_engine_heartbeat_enable(engine);
+ pr_info("%s: Completed %lu %s resets\n",
+ engine->name, count, active ? "active" : "idle");
if (err)
break;
@@ -1478,7 +1629,8 @@ static int igt_reset_queue(void *arg)
prev = rq;
count++;
} while (time_before(jiffies, end_time));
- pr_info("%s: Completed %d resets\n", engine->name, count);
+ pr_info("%s: Completed %d queued resets\n",
+ engine->name, count);
*h.batch = MI_BATCH_BUFFER_END;
intel_gt_chipset_flush(engine->gt);
@@ -1575,13 +1727,21 @@ static int __igt_atomic_reset_engine(struct intel_engine_cs *engine,
GEM_TRACE("i915_reset_engine(%s:%s) under %s\n",
engine->name, mode, p->name);
- tasklet_disable(t);
+ if (t->func)
+ tasklet_disable(t);
+ if (strcmp(p->name, "softirq"))
+ local_bh_disable();
p->critical_section_begin();
- err = intel_engine_reset(engine, NULL);
+ err = __intel_engine_reset_bh(engine, NULL);
p->critical_section_end();
- tasklet_enable(t);
+ if (strcmp(p->name, "softirq"))
+ local_bh_enable();
+ if (t->func) {
+ tasklet_enable(t);
+ tasklet_hi_schedule(t);
+ }
if (err)
pr_err("i915_reset_engine(%s:%s) failed under %s\n",
@@ -1687,6 +1847,7 @@ int intel_hangcheck_live_selftests(struct drm_i915_private *i915)
SUBTEST(igt_reset_nop_engine),
SUBTEST(igt_reset_idle_engine),
SUBTEST(igt_reset_active_engine),
+ SUBTEST(igt_reset_fail_engine),
SUBTEST(igt_reset_engines),
SUBTEST(igt_reset_engines_atomic),
SUBTEST(igt_reset_queue),