drm/i915: Disable semaphore busywaits on saturated systems

author Chris Wilson <chris@chris-wilson.co.uk>

Sat, 4 May 2019 07:07:07 +0000 (08:07 +0100)

committer Joonas Lahtinen <joonas.lahtinen@linux.intel.com>

Tue, 7 May 2019 09:46:19 +0000 (12:46 +0300)
author Chris Wilson <chris@chris-wilson.co.uk>
Sat, 4 May 2019 07:07:07 +0000 (08:07 +0100)
committer Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Tue, 7 May 2019 09:46:19 +0000 (12:46 +0300)
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c

index 20c7c77b3768e1bd27d5929c95a7d458045b759f..ce342f7f7ddbf388ec0d2bc5d7d8eb2d0976fa8a 100644 (file)
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -425,6 +425,26 @@ void __i915_request_submit(struct i915_request *request)
         if (i915_gem_context_is_banned(request->gem_context))
                 i915_request_skip(request, -EIO);
  
+       /*
+        * Are we using semaphores when the gpu is already saturated?
+        *
+        * Using semaphores incurs a cost in having the GPU poll a
+        * memory location, busywaiting for it to change. The continual
+        * memory reads can have a noticeable impact on the rest of the
+        * system with the extra bus traffic, stalling the cpu as it too
+        * tries to access memory across the bus (perf stat -e bus-cycles).
+        *
+        * If we installed a semaphore on this request and we only submit
+        * the request after the signaler completed, that indicates the
+        * system is overloaded and using semaphores at this time only
+        * increases the amount of work we are doing. If so, we disable
+        * further use of semaphores until we are idle again, whence we
+        * optimistically try again.
+        */
+       if (request->sched.semaphores &&
+           i915_sw_fence_signaled(&request->semaphore))
+               request->hw_context->saturated |= request->sched.semaphores;
+
         /* We may be recursing from the signal callback of another i915 fence */
         spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING);
  
@@ -813,6 +833,24 @@ i915_request_await_start(struct i915_request *rq, struct i915_request *signal)
                                              I915_FENCE_GFP);
  }
  
+static intel_engine_mask_t
+already_busywaiting(struct i915_request *rq)
+{
+       /*
+        * Polling a semaphore causes bus traffic, delaying other users of
+        * both the GPU and CPU. We want to limit the impact on others,
+        * while taking advantage of early submission to reduce GPU
+        * latency. Therefore we restrict ourselves to not using more
+        * than one semaphore from each source, and not using a semaphore
+        * if we have detected the engine is saturated (i.e. would not be
+        * submitted early and cause bus traffic reading an already passed
+        * semaphore).
+        *
+        * See the are-we-too-late? check in __i915_request_submit().
+        */
+       return rq->sched.semaphores | rq->hw_context->saturated;
+}
+
  static int
  emit_semaphore_wait(struct i915_request *to,
                     struct i915_request *from,
@@ -826,7 +864,7 @@ emit_semaphore_wait(struct i915_request *to,
         GEM_BUG_ON(INTEL_GEN(to->i915) < 8);
  
         /* Just emit the first semaphore we see as request space is limited. */
-       if (to->sched.semaphores & from->engine->mask)
+       if (already_busywaiting(to) & from->engine->mask)
                 return i915_sw_fence_await_dma_fence(&to->submit,
                                                      &from->fence, 0,
                                                      I915_FENCE_GFP);
diff --git a/drivers/gpu/drm/i915/intel_context.c b/drivers/gpu/drm/i915/intel_context.c

index 8931e0fee873d48c8adbdba8c951f0b90ec85cc8..924cc556223ac575703c8aff3b6190ec7cbad25c 100644 (file)
--- a/drivers/gpu/drm/i915/intel_context.c
+++ b/drivers/gpu/drm/i915/intel_context.c
@@ -230,6 +230,7 @@ intel_context_init(struct intel_context *ce,
         ce->gem_context = ctx;
         ce->engine = engine;
         ce->ops = engine->cops;
+       ce->saturated = 0;
  
         INIT_LIST_HEAD(&ce->signal_link);
         INIT_LIST_HEAD(&ce->signals);
diff --git a/drivers/gpu/drm/i915/intel_context_types.h b/drivers/gpu/drm/i915/intel_context_types.h

index 68b4ca1611e0cb68d3a88e7ecf848f32ea70fdc0..339c7437fe82fe55cab2e4dbe929fc97e4c04e52 100644 (file)
--- a/drivers/gpu/drm/i915/intel_context_types.h
+++ b/drivers/gpu/drm/i915/intel_context_types.h
@@ -14,6 +14,7 @@
  #include <linux/types.h>
  
  #include "i915_active_types.h"
+#include "intel_engine_types.h"
  
  struct i915_gem_context;
  struct i915_vma;
@@ -58,6 +59,8 @@ struct intel_context {
         atomic_t pin_count;
         struct mutex pin_mutex; /* guards pinning and associated on-gpuing */
  
+       intel_engine_mask_t saturated; /* submitting semaphores too late? */
+
         /**
          * active_tracker: Active tracker for the external rq activity
          * on this intel_context object.
author	Chris Wilson <chris@chris-wilson.co.uk>
	Sat, 4 May 2019 07:07:07 +0000 (08:07 +0100)
committer	Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
	Tue, 7 May 2019 09:46:19 +0000 (12:46 +0300)
drivers/gpu/drm/i915/i915_request.c		patch \| blob \| history
drivers/gpu/drm/i915/intel_context.c		patch \| blob \| history
drivers/gpu/drm/i915/intel_context_types.h		patch \| blob \| history