drm/i915: Apply rps waitboosting for dma_fence_wait_timeout()

author Chris Wilson <chris@chris-wilson.co.uk>

Wed, 13 Feb 2019 09:25:04 +0000 (09:25 +0000)

committer Chris Wilson <chris@chris-wilson.co.uk>

Wed, 13 Feb 2019 12:16:39 +0000 (12:16 +0000)
author Chris Wilson <chris@chris-wilson.co.uk>
Wed, 13 Feb 2019 09:25:04 +0000 (09:25 +0000)
committer Chris Wilson <chris@chris-wilson.co.uk>
Wed, 13 Feb 2019 12:16:39 +0000 (12:16 +0000)
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c

index 4c4876967cd63ac287db9c2b7ca44e6c4286e03c..ca8fa4461fc9b70ffd3bf743e4f0dbc7048479f2 100644 (file)
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -2019,11 +2019,9 @@ static const char *rps_power_to_str(unsigned int power)
  static int i915_rps_boost_info(struct seq_file *m, void *data)
  {
         struct drm_i915_private *dev_priv = node_to_i915(m->private);
-       struct drm_device *dev = &dev_priv->drm;
         struct intel_rps *rps = &dev_priv->gt_pm.rps;
         u32 act_freq = rps->cur_freq;
         intel_wakeref_t wakeref;
-       struct drm_file *file;
  
         with_intel_runtime_pm_if_in_use(dev_priv, wakeref) {
                 if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) {
@@ -2057,22 +2055,7 @@ static int i915_rps_boost_info(struct seq_file *m, void *data)
                    intel_gpu_freq(dev_priv, rps->efficient_freq),
                    intel_gpu_freq(dev_priv, rps->boost_freq));
  
-       mutex_lock(&dev->filelist_mutex);
-       list_for_each_entry_reverse(file, &dev->filelist, lhead) {
-               struct drm_i915_file_private *file_priv = file->driver_priv;
-               struct task_struct *task;
-
-               rcu_read_lock();
-               task = pid_task(file->pid, PIDTYPE_PID);
-               seq_printf(m, "%s [%d]: %d boosts\n",
-                          task ? task->comm : "<unknown>",
-                          task ? task->pid : -1,
-                          atomic_read(&file_priv->rps_client.boosts));
-               rcu_read_unlock();
-       }
-       seq_printf(m, "Kernel (anonymous) boosts: %d\n",
-                  atomic_read(&rps->boosts));
-       mutex_unlock(&dev->filelist_mutex);
+       seq_printf(m, "Wait boosts: %d\n", atomic_read(&rps->boosts));
  
         if (INTEL_GEN(dev_priv) >= 6 &&
             rps->enabled &&
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h

index 380b994fe5dc4b7ce8c701f7bec26c4f2ed7fdcf..17fe942eaafa0ab57626676c4da5a4573b7324f1 100644 (file)
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -217,10 +217,6 @@ struct drm_i915_file_private {
         } mm;
         struct idr context_idr;
  
-       struct intel_rps_client {
-               atomic_t boosts;
-       } rps_client;
-
         unsigned int bsd_engine;
  
  /*
@@ -3056,8 +3052,7 @@ void i915_gem_resume(struct drm_i915_private *dev_priv);
  vm_fault_t i915_gem_fault(struct vm_fault *vmf);
  int i915_gem_object_wait(struct drm_i915_gem_object *obj,
                          unsigned int flags,
-                        long timeout,
-                        struct intel_rps_client *rps);
+                        long timeout);
  int i915_gem_object_wait_priority(struct drm_i915_gem_object *obj,
                                   unsigned int flags,
                                   const struct i915_sched_attr *attr);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c

index bf46c52229a893099b050fc4423ccd2a0cf25a98..5c1b9d44b7d341604a97b55a27dd8288eece2e08 100644 (file)
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -416,8 +416,7 @@ int i915_gem_object_unbind(struct drm_i915_gem_object *obj)
  static long
  i915_gem_object_wait_fence(struct dma_fence *fence,
                            unsigned int flags,
-                          long timeout,
-                          struct intel_rps_client *rps_client)
+                          long timeout)
  {
         struct i915_request *rq;
  
@@ -435,27 +434,6 @@ i915_gem_object_wait_fence(struct dma_fence *fence,
         if (i915_request_completed(rq))
                 goto out;
  
-       /*
-        * This client is about to stall waiting for the GPU. In many cases
-        * this is undesirable and limits the throughput of the system, as
-        * many clients cannot continue processing user input/output whilst
-        * blocked. RPS autotuning may take tens of milliseconds to respond
-        * to the GPU load and thus incurs additional latency for the client.
-        * We can circumvent that by promoting the GPU frequency to maximum
-        * before we wait. This makes the GPU throttle up much more quickly
-        * (good for benchmarks and user experience, e.g. window animations),
-        * but at a cost of spending more power processing the workload
-        * (bad for battery). Not all clients even want their results
-        * immediately and for them we should just let the GPU select its own
-        * frequency to maximise efficiency. To prevent a single client from
-        * forcing the clocks too high for the whole system, we only allow
-        * each client to waitboost once in a busy period.
-        */
-       if (rps_client && !i915_request_started(rq)) {
-               if (INTEL_GEN(rq->i915) >= 6)
-                       gen6_rps_boost(rq, rps_client);
-       }
-
         timeout = i915_request_wait(rq, flags, timeout);
  
  out:
@@ -468,8 +446,7 @@ out:
  static long
  i915_gem_object_wait_reservation(struct reservation_object *resv,
                                  unsigned int flags,
-                                long timeout,
-                                struct intel_rps_client *rps_client)
+                                long timeout)
  {
         unsigned int seq = __read_seqcount_begin(&resv->seq);
         struct dma_fence *excl;
@@ -487,8 +464,7 @@ i915_gem_object_wait_reservation(struct reservation_object *resv,
  
                 for (i = 0; i < count; i++) {
                         timeout = i915_gem_object_wait_fence(shared[i],
-                                                            flags, timeout,
-                                                            rps_client);
+                                                            flags, timeout);
                         if (timeout < 0)
                                 break;
  
@@ -514,8 +490,7 @@ i915_gem_object_wait_reservation(struct reservation_object *resv,
         }
  
         if (excl && timeout >= 0)
-               timeout = i915_gem_object_wait_fence(excl, flags, timeout,
-                                                    rps_client);
+               timeout = i915_gem_object_wait_fence(excl, flags, timeout);
  
         dma_fence_put(excl);
  
@@ -609,30 +584,19 @@ i915_gem_object_wait_priority(struct drm_i915_gem_object *obj,
   * @obj: i915 gem object
   * @flags: how to wait (under a lock, for all rendering or just for writes etc)
   * @timeout: how long to wait
- * @rps_client: client (user process) to charge for any waitboosting
   */
  int
  i915_gem_object_wait(struct drm_i915_gem_object *obj,
                      unsigned int flags,
-                    long timeout,
-                    struct intel_rps_client *rps_client)
+                    long timeout)
  {
         might_sleep();
         GEM_BUG_ON(timeout < 0);
  
-       timeout = i915_gem_object_wait_reservation(obj->resv,
-                                                  flags, timeout,
-                                                  rps_client);
+       timeout = i915_gem_object_wait_reservation(obj->resv, flags, timeout);
         return timeout < 0 ? timeout : 0;
  }
  
-static struct intel_rps_client *to_rps_client(struct drm_file *file)
-{
-       struct drm_i915_file_private *fpriv = file->driver_priv;
-
-       return &fpriv->rps_client;
-}
-
  static int
  i915_gem_phys_pwrite(struct drm_i915_gem_object *obj,
                      struct drm_i915_gem_pwrite *args,
@@ -838,8 +802,7 @@ int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
         ret = i915_gem_object_wait(obj,
                                    I915_WAIT_INTERRUPTIBLE |
                                    I915_WAIT_LOCKED,
-                                  MAX_SCHEDULE_TIMEOUT,
-                                  NULL);
+                                  MAX_SCHEDULE_TIMEOUT);
         if (ret)
                 return ret;
  
@@ -891,8 +854,7 @@ int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj,
                                    I915_WAIT_INTERRUPTIBLE |
                                    I915_WAIT_LOCKED |
                                    I915_WAIT_ALL,
-                                  MAX_SCHEDULE_TIMEOUT,
-                                  NULL);
+                                  MAX_SCHEDULE_TIMEOUT);
         if (ret)
                 return ret;
  
@@ -1154,8 +1116,7 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data,
  
         ret = i915_gem_object_wait(obj,
                                    I915_WAIT_INTERRUPTIBLE,
-                                  MAX_SCHEDULE_TIMEOUT,
-                                  to_rps_client(file));
+                                  MAX_SCHEDULE_TIMEOUT);
         if (ret)
                 goto out;
  
@@ -1454,8 +1415,7 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
         ret = i915_gem_object_wait(obj,
                                    I915_WAIT_INTERRUPTIBLE |
                                    I915_WAIT_ALL,
-                                  MAX_SCHEDULE_TIMEOUT,
-                                  to_rps_client(file));
+                                  MAX_SCHEDULE_TIMEOUT);
         if (ret)
                 goto err;
  
@@ -1553,8 +1513,7 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
                                    I915_WAIT_INTERRUPTIBLE |
                                    I915_WAIT_PRIORITY |
                                    (write_domain ? I915_WAIT_ALL : 0),
-                                  MAX_SCHEDULE_TIMEOUT,
-                                  to_rps_client(file));
+                                  MAX_SCHEDULE_TIMEOUT);
         if (err)
                 goto out;
  
@@ -1863,8 +1822,7 @@ vm_fault_t i915_gem_fault(struct vm_fault *vmf)
          */
         ret = i915_gem_object_wait(obj,
                                    I915_WAIT_INTERRUPTIBLE,
-                                  MAX_SCHEDULE_TIMEOUT,
-                                  NULL);
+                                  MAX_SCHEDULE_TIMEOUT);
         if (ret)
                 goto err;
  
@@ -3195,8 +3153,7 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
                                    I915_WAIT_INTERRUPTIBLE |
                                    I915_WAIT_PRIORITY |
                                    I915_WAIT_ALL,
-                                  to_wait_timeout(args->timeout_ns),
-                                  to_rps_client(file));
+                                  to_wait_timeout(args->timeout_ns));
  
         if (args->timeout_ns > 0) {
                 args->timeout_ns -= ktime_to_ns(ktime_sub(ktime_get(), start));
@@ -3265,7 +3222,7 @@ wait_for_timelines(struct drm_i915_private *i915,
                  * stalls, so allow the gpu to boost to maximum clocks.
                  */
                 if (flags & I915_WAIT_FOR_IDLE_BOOST)
-                       gen6_rps_boost(rq, NULL);
+                       gen6_rps_boost(rq);
  
                 timeout = i915_request_wait(rq, flags, timeout);
                 i915_request_put(rq);
@@ -3360,8 +3317,7 @@ i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write)
                                    I915_WAIT_INTERRUPTIBLE |
                                    I915_WAIT_LOCKED |
                                    (write ? I915_WAIT_ALL : 0),
-                                  MAX_SCHEDULE_TIMEOUT,
-                                  NULL);
+                                  MAX_SCHEDULE_TIMEOUT);
         if (ret)
                 return ret;
  
@@ -3423,8 +3379,7 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
                                    I915_WAIT_INTERRUPTIBLE |
                                    I915_WAIT_LOCKED |
                                    (write ? I915_WAIT_ALL : 0),
-                                  MAX_SCHEDULE_TIMEOUT,
-                                  NULL);
+                                  MAX_SCHEDULE_TIMEOUT);
         if (ret)
                 return ret;
  
@@ -3539,8 +3494,7 @@ restart:
                                            I915_WAIT_INTERRUPTIBLE |
                                            I915_WAIT_LOCKED |
                                            I915_WAIT_ALL,
-                                          MAX_SCHEDULE_TIMEOUT,
-                                          NULL);
+                                          MAX_SCHEDULE_TIMEOUT);
                 if (ret)
                         return ret;
  
@@ -3678,8 +3632,7 @@ int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
  
         ret = i915_gem_object_wait(obj,
                                    I915_WAIT_INTERRUPTIBLE,
-                                  MAX_SCHEDULE_TIMEOUT,
-                                  to_rps_client(file));
+                                  MAX_SCHEDULE_TIMEOUT);
         if (ret)
                 goto out;
  
@@ -3805,8 +3758,7 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
                                    I915_WAIT_INTERRUPTIBLE |
                                    I915_WAIT_LOCKED |
                                    (write ? I915_WAIT_ALL : 0),
-                                  MAX_SCHEDULE_TIMEOUT,
-                                  NULL);
+                                  MAX_SCHEDULE_TIMEOUT);
         if (ret)
                 return ret;
  
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c

index c2a5c48c7541d6d1bb230933748b210ff036bd78..0acd6baa3c880e6370d47823ae8a2f7fb74545bb 100644 (file)
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -68,7 +68,9 @@ static signed long i915_fence_wait(struct dma_fence *fence,
                                    bool interruptible,
                                    signed long timeout)
  {
-       return i915_request_wait(to_request(fence), interruptible, timeout);
+       return i915_request_wait(to_request(fence),
+                                interruptible | I915_WAIT_PRIORITY,
+                                timeout);
  }
  
  static void i915_fence_release(struct dma_fence *fence)
@@ -1136,8 +1138,23 @@ long i915_request_wait(struct i915_request *rq,
         if (__i915_spin_request(rq, state, 5))
                 goto out;
  
-       if (flags & I915_WAIT_PRIORITY)
+       /*
+        * This client is about to stall waiting for the GPU. In many cases
+        * this is undesirable and limits the throughput of the system, as
+        * many clients cannot continue processing user input/output whilst
+        * blocked. RPS autotuning may take tens of milliseconds to respond
+        * to the GPU load and thus incurs additional latency for the client.
+        * We can circumvent that by promoting the GPU frequency to maximum
+        * before we sleep. This makes the GPU throttle up much more quickly
+        * (good for benchmarks and user experience, e.g. window animations),
+        * but at a cost of spending more power processing the workload
+        * (bad for battery).
+        */
+       if (flags & I915_WAIT_PRIORITY) {
+               if (!i915_request_started(rq) && INTEL_GEN(rq->i915) >= 6)
+                       gen6_rps_boost(rq);
                 i915_schedule_bump_priority(rq, I915_PRIORITY_WAIT);
+       }
  
         wait.tsk = current;
         if (dma_fence_add_callback(&rq->fence, &wait.cb, request_wait_wake))
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c

index c496b6e0226b025f1ea99bec3523c620dc9edb6c..59544bb5c29467218f3d398161f1169f73bb3188 100644 (file)
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -13559,7 +13559,7 @@ static int do_rps_boost(struct wait_queue_entry *_wait,
          * vblank without our intervention, so leave RPS alone.
          */
         if (!i915_request_started(rq))
-               gen6_rps_boost(rq, NULL);
+               gen6_rps_boost(rq);
         i915_request_put(rq);
  
         drm_crtc_vblank_put(wait->crtc);
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h

index dd121966613b00da02eb668c7632df833e8bc4ff..48e89db23c5b7b272df7c692071a90a12d0bdd06 100644 (file)
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -2266,7 +2266,7 @@ void intel_suspend_gt_powersave(struct drm_i915_private *dev_priv);
  void gen6_rps_busy(struct drm_i915_private *dev_priv);
  void gen6_rps_reset_ei(struct drm_i915_private *dev_priv);
  void gen6_rps_idle(struct drm_i915_private *dev_priv);
-void gen6_rps_boost(struct i915_request *rq, struct intel_rps_client *rps);
+void gen6_rps_boost(struct i915_request *rq);
  void g4x_wm_get_hw_state(struct drm_i915_private *dev_priv);
  void vlv_wm_get_hw_state(struct drm_i915_private *dev_priv);
  void ilk_wm_get_hw_state(struct drm_i915_private *dev_priv);
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c

index 279031502d43f8b09b6a97589110977abe227b3d..af265d83101141b6b382ec6ff82204f9b973997a 100644 (file)
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -6768,8 +6768,7 @@ void gen6_rps_idle(struct drm_i915_private *dev_priv)
         mutex_unlock(&dev_priv->pcu_lock);
  }
  
-void gen6_rps_boost(struct i915_request *rq,
-                   struct intel_rps_client *rps_client)
+void gen6_rps_boost(struct i915_request *rq)
  {
         struct intel_rps *rps = &rq->i915->gt_pm.rps;
         unsigned long flags;
@@ -6798,7 +6797,7 @@ void gen6_rps_boost(struct i915_request *rq,
         if (READ_ONCE(rps->cur_freq) < rps->boost_freq)
                 schedule_work(&rps->work);
  
-       atomic_inc(rps_client ? &rps_client->boosts : &rps->boosts);
+       atomic_inc(&rps->boosts);
  }
  
  int intel_set_rps(struct drm_i915_private *dev_priv, u8 val)
author	Chris Wilson <chris@chris-wilson.co.uk>
	Wed, 13 Feb 2019 09:25:04 +0000 (09:25 +0000)
committer	Chris Wilson <chris@chris-wilson.co.uk>
	Wed, 13 Feb 2019 12:16:39 +0000 (12:16 +0000)
drivers/gpu/drm/i915/i915_debugfs.c		patch \| blob \| history
drivers/gpu/drm/i915/i915_drv.h		patch \| blob \| history
drivers/gpu/drm/i915/i915_gem.c		patch \| blob \| history
drivers/gpu/drm/i915/i915_request.c		patch \| blob \| history
drivers/gpu/drm/i915/intel_display.c		patch \| blob \| history
drivers/gpu/drm/i915/intel_drv.h		patch \| blob \| history
drivers/gpu/drm/i915/intel_pm.c		patch \| blob \| history