From abeb4def31d9c3dbf06052e65cd884730ec8667e Mon Sep 17 00:00:00 2001 From: Michel Thierry Date: Tue, 20 Jun 2017 10:57:50 +0100 Subject: [PATCH] drm/i915/selftests: reset engine self tests Check that we can reset specific engines, also check the fallback to full reset if something didn't work. v2: rebase. v3: use RESET_ENGINE_IN_PROGRESS flag. v4: use I915_RESET_ENGINE flag. Signed-off-by: Michel Thierry Link: http://patchwork.freedesktop.org/patch/msgid/20170615201828.23144-12-michel.thierry@intel.com Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170620095751.13127-9-chris@chris-wilson.co.uk --- .../gpu/drm/i915/selftests/intel_hangcheck.c | 148 ++++++++++++++++++ 1 file changed, 148 insertions(+) diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c index cc00a361f0fa..af475189bd52 100644 --- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c +++ b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c @@ -324,6 +324,54 @@ static int igt_global_reset(void *arg) return err; } +static int igt_reset_engine(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct intel_engine_cs *engine; + enum intel_engine_id id; + unsigned int reset_count, reset_engine_count; + int err = 0; + + /* Check that we can issue a global GPU and engine reset */ + + if (!intel_has_reset_engine(i915)) + return 0; + + for_each_engine(engine, i915, id) { + set_bit(I915_RESET_ENGINE + engine->id, &i915->gpu_error.flags); + reset_count = i915_reset_count(&i915->gpu_error); + reset_engine_count = i915_reset_engine_count(&i915->gpu_error, + engine); + + err = i915_reset_engine(engine); + if (err) { + pr_err("i915_reset_engine failed\n"); + break; + } + + if (i915_reset_count(&i915->gpu_error) != reset_count) { + pr_err("Full GPU reset recorded! (engine reset expected)\n"); + err = -EINVAL; + break; + } + + if (i915_reset_engine_count(&i915->gpu_error, engine) == + reset_engine_count) { + pr_err("No %s engine reset recorded!\n", engine->name); + err = -EINVAL; + break; + } + + clear_bit(I915_RESET_ENGINE + engine->id, + &i915->gpu_error.flags); + } + + if (i915_terminally_wedged(&i915->gpu_error)) + err = -EIO; + + return err; +} + static u32 fake_hangcheck(struct drm_i915_gem_request *rq) { u32 reset_count; @@ -530,13 +578,113 @@ unlock: return err; } +static int igt_render_engine_reset_fallback(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct intel_engine_cs *engine = i915->engine[RCS]; + struct hang h; + struct drm_i915_gem_request *rq; + unsigned int reset_count, reset_engine_count; + int err = 0; + + /* Check that we can issue a global GPU and engine reset */ + + if (!intel_has_reset_engine(i915)) + return 0; + + set_bit(I915_RESET_BACKOFF, &i915->gpu_error.flags); + mutex_lock(&i915->drm.struct_mutex); + + err = hang_init(&h, i915); + if (err) + goto unlock; + + rq = hang_create_request(&h, engine, i915->kernel_context); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto fini; + } + + i915_gem_request_get(rq); + __i915_add_request(rq, true); + + /* make reset engine fail */ + rq->fence.error = -EIO; + + if (!wait_for_hang(&h, rq)) { + pr_err("Failed to start request %x\n", rq->fence.seqno); + err = -EIO; + goto out_rq; + } + + reset_engine_count = i915_reset_engine_count(&i915->gpu_error, engine); + reset_count = fake_hangcheck(rq); + + /* unlock since we'll call handle_error */ + mutex_unlock(&i915->drm.struct_mutex); + clear_bit(I915_RESET_BACKOFF, &i915->gpu_error.flags); + wake_up_all(&i915->gpu_error.reset_queue); + + i915_handle_error(i915, intel_engine_flag(engine), "live test"); + + if (i915_reset_engine_count(&i915->gpu_error, engine) != + reset_engine_count) { + pr_err("render engine reset recorded! (full reset expected)\n"); + err = -EINVAL; + goto out_rq; + } + + if (i915_reset_count(&i915->gpu_error) == reset_count) { + pr_err("No full GPU reset recorded!\n"); + err = -EINVAL; + goto out_rq; + } + + /* + * by using fence.error = -EIO, full reset sets the wedged flag, do one + * more full reset to re-enable the hw. + */ + if (i915_terminally_wedged(&i915->gpu_error)) { + set_bit(I915_RESET_BACKOFF, &i915->gpu_error.flags); + mutex_lock(&i915->drm.struct_mutex); + rq->fence.error = 0; + + set_bit(I915_RESET_HANDOFF, &i915->gpu_error.flags); + i915_reset(i915); + GEM_BUG_ON(test_bit(I915_RESET_HANDOFF, + &i915->gpu_error.flags)); + + if (i915_reset_count(&i915->gpu_error) == reset_count) { + pr_err("No full GPU reset recorded!\n"); + err = -EINVAL; + goto out_rq; + } + } + +out_rq: + i915_gem_request_put(rq); +fini: + hang_fini(&h); +unlock: + mutex_unlock(&i915->drm.struct_mutex); + clear_bit(I915_RESET_BACKOFF, &i915->gpu_error.flags); + wake_up_all(&i915->gpu_error.reset_queue); + + if (i915_terminally_wedged(&i915->gpu_error)) + return -EIO; + + return err; +} + int intel_hangcheck_live_selftests(struct drm_i915_private *i915) { static const struct i915_subtest tests[] = { SUBTEST(igt_hang_sanitycheck), SUBTEST(igt_global_reset), + SUBTEST(igt_reset_engine), SUBTEST(igt_wait_reset), SUBTEST(igt_reset_queue), + SUBTEST(igt_render_engine_reset_fallback), }; if (!intel_has_gpu_reset(i915)) -- 2.30.2