drm/i915/selftests: Add a safety net to live_workarounds
authorChris Wilson <chris@chris-wilson.co.uk>
Wed, 11 Jul 2018 12:29:52 +0000 (13:29 +0100)
committerChris Wilson <chris@chris-wilson.co.uk>
Wed, 11 Jul 2018 13:13:56 +0000 (14:13 +0100)
Since live_workarounds poke around the w/a registers and checks to see
if they survive across a reset, we are prone to fouling the machine and
leaving it in a non-recoverable state. Wrap the probe inside a timeout
to abort the test if the reset fails.

v2: Include GEM_TRACE on declaring wedged.
v3: Add a few includes to make the header look standalone.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=107188
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20180711122952.18448-1-chris@chris-wilson.co.uk
drivers/gpu/drm/i915/selftests/igt_wedge_me.h [new file with mode: 0644]
drivers/gpu/drm/i915/selftests/intel_workarounds.c

diff --git a/drivers/gpu/drm/i915/selftests/igt_wedge_me.h b/drivers/gpu/drm/i915/selftests/igt_wedge_me.h
new file mode 100644 (file)
index 0000000..08e5ff1
--- /dev/null
@@ -0,0 +1,58 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2018 Intel Corporation
+ */
+
+#ifndef IGT_WEDGE_ME_H
+#define IGT_WEDGE_ME_H
+
+#include <linux/workqueue.h>
+
+#include "../i915_gem.h"
+
+struct drm_i915_private;
+
+struct igt_wedge_me {
+       struct delayed_work work;
+       struct drm_i915_private *i915;
+       const char *name;
+};
+
+static void __igt_wedge_me(struct work_struct *work)
+{
+       struct igt_wedge_me *w = container_of(work, typeof(*w), work.work);
+
+       pr_err("%s timed out, cancelling test.\n", w->name);
+
+       GEM_TRACE("%s timed out.\n", w->name);
+       GEM_TRACE_DUMP();
+
+       i915_gem_set_wedged(w->i915);
+}
+
+static void __igt_init_wedge(struct igt_wedge_me *w,
+                            struct drm_i915_private *i915,
+                            long timeout,
+                            const char *name)
+{
+       w->i915 = i915;
+       w->name = name;
+
+       INIT_DELAYED_WORK_ONSTACK(&w->work, __igt_wedge_me);
+       schedule_delayed_work(&w->work, timeout);
+}
+
+static void __igt_fini_wedge(struct igt_wedge_me *w)
+{
+       cancel_delayed_work_sync(&w->work);
+       destroy_delayed_work_on_stack(&w->work);
+       w->i915 = NULL;
+}
+
+#define igt_wedge_on_timeout(W, DEV, TIMEOUT)                          \
+       for (__igt_init_wedge((W), (DEV), (TIMEOUT), __func__);         \
+            (W)->i915;                                                 \
+            __igt_fini_wedge((W)))
+
+#endif /* IGT_WEDGE_ME_H */
index fafdec3fe83eb0616e3cda093fc37549fc67cb24..0d39b3bf0c0d0f11aaa883d672e9ba11dc33f5b5 100644 (file)
@@ -6,6 +6,7 @@
 
 #include "../i915_selftest.h"
 
+#include "igt_wedge_me.h"
 #include "mock_context.h"
 
 static struct drm_i915_gem_object *
@@ -111,6 +112,7 @@ static int check_whitelist(const struct whitelist *w,
                           struct intel_engine_cs *engine)
 {
        struct drm_i915_gem_object *results;
+       struct igt_wedge_me wedge;
        u32 *vaddr;
        int err;
        int i;
@@ -119,7 +121,11 @@ static int check_whitelist(const struct whitelist *w,
        if (IS_ERR(results))
                return PTR_ERR(results);
 
-       err = i915_gem_object_set_to_cpu_domain(results, false);
+       err = 0;
+       igt_wedge_on_timeout(&wedge, ctx->i915, HZ / 5) /* a safety net! */
+               err = i915_gem_object_set_to_cpu_domain(results, false);
+       if (i915_terminally_wedged(&ctx->i915->gpu_error))
+               err = -EIO;
        if (err)
                goto out_put;