drm/i915/selftests: Verify we can perform resets from atomic context
authorChris Wilson <chris@chris-wilson.co.uk>
Thu, 13 Dec 2018 09:15:20 +0000 (09:15 +0000)
committerChris Wilson <chris@chris-wilson.co.uk>
Thu, 13 Dec 2018 10:07:54 +0000 (10:07 +0000)
We currently require that our per-engine reset can be called from any
context, even hardirq, and in the future wish to perform the device
reset without holding struct_mutex (which requires some lockless
shenanigans that demand the lowlevel intel_reset_gpu() be able to be
used in atomic context). Test that we meet the current requirements by
calling i915_reset_engine() from under various atomic contexts.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20181213091522.2926-2-chris@chris-wilson.co.uk
drivers/gpu/drm/i915/selftests/intel_hangcheck.c

index 9e6c9c171ed509931aedcc95b64b4195587c9118..797cf5e6d6d43b03fb4a0b3a8014e6e1b496870e 100644 (file)
@@ -1472,6 +1472,177 @@ err_unlock:
        return err;
 }
 
+static void __preempt_begin(void)
+{
+       preempt_disable();
+}
+
+static void __preempt_end(void)
+{
+       preempt_enable();
+}
+
+static void __softirq_begin(void)
+{
+       local_bh_disable();
+}
+
+static void __softirq_end(void)
+{
+       local_bh_enable();
+}
+
+static void __hardirq_begin(void)
+{
+       local_irq_disable();
+}
+
+static void __hardirq_end(void)
+{
+       local_irq_enable();
+}
+
+struct atomic_section {
+       const char *name;
+       void (*critical_section_begin)(void);
+       void (*critical_section_end)(void);
+};
+
+static int __igt_atomic_reset_engine(struct intel_engine_cs *engine,
+                                    const struct atomic_section *p,
+                                    const char *mode)
+{
+       struct tasklet_struct * const t = &engine->execlists.tasklet;
+       int err;
+
+       GEM_TRACE("i915_reset_engine(%s:%s) under %s\n",
+                 engine->name, mode, p->name);
+
+       tasklet_disable_nosync(t);
+       p->critical_section_begin();
+
+       err = i915_reset_engine(engine, NULL);
+
+       p->critical_section_end();
+       tasklet_enable(t);
+
+       if (err)
+               pr_err("i915_reset_engine(%s:%s) failed under %s\n",
+                      engine->name, mode, p->name);
+
+       return err;
+}
+
+static int igt_atomic_reset_engine(struct intel_engine_cs *engine,
+                                  const struct atomic_section *p)
+{
+       struct drm_i915_private *i915 = engine->i915;
+       struct i915_request *rq;
+       struct hang h;
+       int err;
+
+       err = __igt_atomic_reset_engine(engine, p, "idle");
+       if (err)
+               return err;
+
+       err = hang_init(&h, i915);
+       if (err)
+               return err;
+
+       rq = hang_create_request(&h, engine);
+       if (IS_ERR(rq)) {
+               err = PTR_ERR(rq);
+               goto out;
+       }
+
+       i915_request_get(rq);
+       i915_request_add(rq);
+
+       if (wait_until_running(&h, rq)) {
+               err = __igt_atomic_reset_engine(engine, p, "active");
+       } else {
+               pr_err("%s(%s): Failed to start request %llx, at %x\n",
+                      __func__, engine->name,
+                      rq->fence.seqno, hws_seqno(&h, rq));
+               err = -EIO;
+       }
+
+       if (err == 0) {
+               struct igt_wedge_me w;
+
+               igt_wedge_on_timeout(&w, i915, HZ / 20 /* 50ms timeout*/)
+                       i915_request_wait(rq,
+                                         I915_WAIT_LOCKED,
+                                         MAX_SCHEDULE_TIMEOUT);
+               if (i915_terminally_wedged(&i915->gpu_error))
+                       err = -EIO;
+       }
+
+       i915_request_put(rq);
+out:
+       hang_fini(&h);
+       return err;
+}
+
+static void force_reset(struct drm_i915_private *i915)
+{
+       i915_gem_set_wedged(i915);
+       set_bit(I915_RESET_HANDOFF, &i915->gpu_error.flags);
+       i915_reset(i915, 0, NULL);
+}
+
+static int igt_atomic_reset(void *arg)
+{
+       static const struct atomic_section phases[] = {
+               { "preempt", __preempt_begin, __preempt_end },
+               { "softirq", __softirq_begin, __softirq_end },
+               { "hardirq", __hardirq_begin, __hardirq_end },
+               { }
+       };
+       struct drm_i915_private *i915 = arg;
+       int err = 0;
+
+       /* Check that the resets are usable from atomic context */
+
+       if (USES_GUC_SUBMISSION(i915))
+               return 0; /* guc is dead; long live the guc */
+
+       igt_global_reset_lock(i915);
+       mutex_lock(&i915->drm.struct_mutex);
+       intel_runtime_pm_get(i915);
+
+       /* Flush any requests before we get started and check basics */
+       force_reset(i915);
+       if (i915_terminally_wedged(&i915->gpu_error))
+               goto unlock;
+
+       if (intel_has_reset_engine(i915)) {
+               struct intel_engine_cs *engine;
+               enum intel_engine_id id;
+
+               for_each_engine(engine, i915, id) {
+                       const typeof(*phases) *p;
+
+                       for (p = phases; p->name; p++) {
+                               err = igt_atomic_reset_engine(engine, p);
+                               if (err)
+                                       goto out;
+                       }
+               }
+       }
+
+out:
+       /* As we poke around the guts, do a full reset before continuing. */
+       force_reset(i915);
+
+unlock:
+       intel_runtime_pm_put(i915);
+       mutex_unlock(&i915->drm.struct_mutex);
+       igt_global_reset_unlock(i915);
+
+       return err;
+}
+
 int intel_hangcheck_live_selftests(struct drm_i915_private *i915)
 {
        static const struct i915_subtest tests[] = {
@@ -1487,6 +1658,7 @@ int intel_hangcheck_live_selftests(struct drm_i915_private *i915)
                SUBTEST(igt_reset_evict_ppgtt),
                SUBTEST(igt_reset_evict_fence),
                SUBTEST(igt_handle_error),
+               SUBTEST(igt_atomic_reset),
        };
        bool saved_hangcheck;
        int err;