drm/i915: Apply rps waitboosting for dma_fence_wait_timeout()
authorChris Wilson <chris@chris-wilson.co.uk>
Wed, 13 Feb 2019 09:25:04 +0000 (09:25 +0000)
committerChris Wilson <chris@chris-wilson.co.uk>
Wed, 13 Feb 2019 12:16:39 +0000 (12:16 +0000)
As time goes by, usage of generic ioctls such as drm_syncobj and
sync_file are on the increase bypassing i915-specific ioctls like
GEM_WAIT. Currently, we only apply waitboosting to our driver ioctls as
we track the file/client and account the waitboosting to them. However,
since commit 7b92c1bd0540 ("drm/i915: Avoid keeping waitboost active for
signaling threads"), we no longer have been applying the client
ratelimiting on waitboosts and so that information has only been used
for debug tracking.

Push the application of waitboosting down to the common
i915_request_wait, and apply it to all foreign fence waits as well.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Eero Tamminen <eero.t.tamminen@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190213092504.25709-1-chris@chris-wilson.co.uk
drivers/gpu/drm/i915/i915_debugfs.c
drivers/gpu/drm/i915/i915_drv.h
drivers/gpu/drm/i915/i915_gem.c
drivers/gpu/drm/i915/i915_request.c
drivers/gpu/drm/i915/intel_display.c
drivers/gpu/drm/i915/intel_drv.h
drivers/gpu/drm/i915/intel_pm.c

index 4c4876967cd63ac287db9c2b7ca44e6c4286e03c..ca8fa4461fc9b70ffd3bf743e4f0dbc7048479f2 100644 (file)
@@ -2019,11 +2019,9 @@ static const char *rps_power_to_str(unsigned int power)
 static int i915_rps_boost_info(struct seq_file *m, void *data)
 {
        struct drm_i915_private *dev_priv = node_to_i915(m->private);
-       struct drm_device *dev = &dev_priv->drm;
        struct intel_rps *rps = &dev_priv->gt_pm.rps;
        u32 act_freq = rps->cur_freq;
        intel_wakeref_t wakeref;
-       struct drm_file *file;
 
        with_intel_runtime_pm_if_in_use(dev_priv, wakeref) {
                if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) {
@@ -2057,22 +2055,7 @@ static int i915_rps_boost_info(struct seq_file *m, void *data)
                   intel_gpu_freq(dev_priv, rps->efficient_freq),
                   intel_gpu_freq(dev_priv, rps->boost_freq));
 
-       mutex_lock(&dev->filelist_mutex);
-       list_for_each_entry_reverse(file, &dev->filelist, lhead) {
-               struct drm_i915_file_private *file_priv = file->driver_priv;
-               struct task_struct *task;
-
-               rcu_read_lock();
-               task = pid_task(file->pid, PIDTYPE_PID);
-               seq_printf(m, "%s [%d]: %d boosts\n",
-                          task ? task->comm : "<unknown>",
-                          task ? task->pid : -1,
-                          atomic_read(&file_priv->rps_client.boosts));
-               rcu_read_unlock();
-       }
-       seq_printf(m, "Kernel (anonymous) boosts: %d\n",
-                  atomic_read(&rps->boosts));
-       mutex_unlock(&dev->filelist_mutex);
+       seq_printf(m, "Wait boosts: %d\n", atomic_read(&rps->boosts));
 
        if (INTEL_GEN(dev_priv) >= 6 &&
            rps->enabled &&
index 380b994fe5dc4b7ce8c701f7bec26c4f2ed7fdcf..17fe942eaafa0ab57626676c4da5a4573b7324f1 100644 (file)
@@ -217,10 +217,6 @@ struct drm_i915_file_private {
        } mm;
        struct idr context_idr;
 
-       struct intel_rps_client {
-               atomic_t boosts;
-       } rps_client;
-
        unsigned int bsd_engine;
 
 /*
@@ -3056,8 +3052,7 @@ void i915_gem_resume(struct drm_i915_private *dev_priv);
 vm_fault_t i915_gem_fault(struct vm_fault *vmf);
 int i915_gem_object_wait(struct drm_i915_gem_object *obj,
                         unsigned int flags,
-                        long timeout,
-                        struct intel_rps_client *rps);
+                        long timeout);
 int i915_gem_object_wait_priority(struct drm_i915_gem_object *obj,
                                  unsigned int flags,
                                  const struct i915_sched_attr *attr);
index bf46c52229a893099b050fc4423ccd2a0cf25a98..5c1b9d44b7d341604a97b55a27dd8288eece2e08 100644 (file)
@@ -416,8 +416,7 @@ int i915_gem_object_unbind(struct drm_i915_gem_object *obj)
 static long
 i915_gem_object_wait_fence(struct dma_fence *fence,
                           unsigned int flags,
-                          long timeout,
-                          struct intel_rps_client *rps_client)
+                          long timeout)
 {
        struct i915_request *rq;
 
@@ -435,27 +434,6 @@ i915_gem_object_wait_fence(struct dma_fence *fence,
        if (i915_request_completed(rq))
                goto out;
 
-       /*
-        * This client is about to stall waiting for the GPU. In many cases
-        * this is undesirable and limits the throughput of the system, as
-        * many clients cannot continue processing user input/output whilst
-        * blocked. RPS autotuning may take tens of milliseconds to respond
-        * to the GPU load and thus incurs additional latency for the client.
-        * We can circumvent that by promoting the GPU frequency to maximum
-        * before we wait. This makes the GPU throttle up much more quickly
-        * (good for benchmarks and user experience, e.g. window animations),
-        * but at a cost of spending more power processing the workload
-        * (bad for battery). Not all clients even want their results
-        * immediately and for them we should just let the GPU select its own
-        * frequency to maximise efficiency. To prevent a single client from
-        * forcing the clocks too high for the whole system, we only allow
-        * each client to waitboost once in a busy period.
-        */
-       if (rps_client && !i915_request_started(rq)) {
-               if (INTEL_GEN(rq->i915) >= 6)
-                       gen6_rps_boost(rq, rps_client);
-       }
-
        timeout = i915_request_wait(rq, flags, timeout);
 
 out:
@@ -468,8 +446,7 @@ out:
 static long
 i915_gem_object_wait_reservation(struct reservation_object *resv,
                                 unsigned int flags,
-                                long timeout,
-                                struct intel_rps_client *rps_client)
+                                long timeout)
 {
        unsigned int seq = __read_seqcount_begin(&resv->seq);
        struct dma_fence *excl;
@@ -487,8 +464,7 @@ i915_gem_object_wait_reservation(struct reservation_object *resv,
 
                for (i = 0; i < count; i++) {
                        timeout = i915_gem_object_wait_fence(shared[i],
-                                                            flags, timeout,
-                                                            rps_client);
+                                                            flags, timeout);
                        if (timeout < 0)
                                break;
 
@@ -514,8 +490,7 @@ i915_gem_object_wait_reservation(struct reservation_object *resv,
        }
 
        if (excl && timeout >= 0)
-               timeout = i915_gem_object_wait_fence(excl, flags, timeout,
-                                                    rps_client);
+               timeout = i915_gem_object_wait_fence(excl, flags, timeout);
 
        dma_fence_put(excl);
 
@@ -609,30 +584,19 @@ i915_gem_object_wait_priority(struct drm_i915_gem_object *obj,
  * @obj: i915 gem object
  * @flags: how to wait (under a lock, for all rendering or just for writes etc)
  * @timeout: how long to wait
- * @rps_client: client (user process) to charge for any waitboosting
  */
 int
 i915_gem_object_wait(struct drm_i915_gem_object *obj,
                     unsigned int flags,
-                    long timeout,
-                    struct intel_rps_client *rps_client)
+                    long timeout)
 {
        might_sleep();
        GEM_BUG_ON(timeout < 0);
 
-       timeout = i915_gem_object_wait_reservation(obj->resv,
-                                                  flags, timeout,
-                                                  rps_client);
+       timeout = i915_gem_object_wait_reservation(obj->resv, flags, timeout);
        return timeout < 0 ? timeout : 0;
 }
 
-static struct intel_rps_client *to_rps_client(struct drm_file *file)
-{
-       struct drm_i915_file_private *fpriv = file->driver_priv;
-
-       return &fpriv->rps_client;
-}
-
 static int
 i915_gem_phys_pwrite(struct drm_i915_gem_object *obj,
                     struct drm_i915_gem_pwrite *args,
@@ -838,8 +802,7 @@ int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
        ret = i915_gem_object_wait(obj,
                                   I915_WAIT_INTERRUPTIBLE |
                                   I915_WAIT_LOCKED,
-                                  MAX_SCHEDULE_TIMEOUT,
-                                  NULL);
+                                  MAX_SCHEDULE_TIMEOUT);
        if (ret)
                return ret;
 
@@ -891,8 +854,7 @@ int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj,
                                   I915_WAIT_INTERRUPTIBLE |
                                   I915_WAIT_LOCKED |
                                   I915_WAIT_ALL,
-                                  MAX_SCHEDULE_TIMEOUT,
-                                  NULL);
+                                  MAX_SCHEDULE_TIMEOUT);
        if (ret)
                return ret;
 
@@ -1154,8 +1116,7 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data,
 
        ret = i915_gem_object_wait(obj,
                                   I915_WAIT_INTERRUPTIBLE,
-                                  MAX_SCHEDULE_TIMEOUT,
-                                  to_rps_client(file));
+                                  MAX_SCHEDULE_TIMEOUT);
        if (ret)
                goto out;
 
@@ -1454,8 +1415,7 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
        ret = i915_gem_object_wait(obj,
                                   I915_WAIT_INTERRUPTIBLE |
                                   I915_WAIT_ALL,
-                                  MAX_SCHEDULE_TIMEOUT,
-                                  to_rps_client(file));
+                                  MAX_SCHEDULE_TIMEOUT);
        if (ret)
                goto err;
 
@@ -1553,8 +1513,7 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
                                   I915_WAIT_INTERRUPTIBLE |
                                   I915_WAIT_PRIORITY |
                                   (write_domain ? I915_WAIT_ALL : 0),
-                                  MAX_SCHEDULE_TIMEOUT,
-                                  to_rps_client(file));
+                                  MAX_SCHEDULE_TIMEOUT);
        if (err)
                goto out;
 
@@ -1863,8 +1822,7 @@ vm_fault_t i915_gem_fault(struct vm_fault *vmf)
         */
        ret = i915_gem_object_wait(obj,
                                   I915_WAIT_INTERRUPTIBLE,
-                                  MAX_SCHEDULE_TIMEOUT,
-                                  NULL);
+                                  MAX_SCHEDULE_TIMEOUT);
        if (ret)
                goto err;
 
@@ -3195,8 +3153,7 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
                                   I915_WAIT_INTERRUPTIBLE |
                                   I915_WAIT_PRIORITY |
                                   I915_WAIT_ALL,
-                                  to_wait_timeout(args->timeout_ns),
-                                  to_rps_client(file));
+                                  to_wait_timeout(args->timeout_ns));
 
        if (args->timeout_ns > 0) {
                args->timeout_ns -= ktime_to_ns(ktime_sub(ktime_get(), start));
@@ -3265,7 +3222,7 @@ wait_for_timelines(struct drm_i915_private *i915,
                 * stalls, so allow the gpu to boost to maximum clocks.
                 */
                if (flags & I915_WAIT_FOR_IDLE_BOOST)
-                       gen6_rps_boost(rq, NULL);
+                       gen6_rps_boost(rq);
 
                timeout = i915_request_wait(rq, flags, timeout);
                i915_request_put(rq);
@@ -3360,8 +3317,7 @@ i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write)
                                   I915_WAIT_INTERRUPTIBLE |
                                   I915_WAIT_LOCKED |
                                   (write ? I915_WAIT_ALL : 0),
-                                  MAX_SCHEDULE_TIMEOUT,
-                                  NULL);
+                                  MAX_SCHEDULE_TIMEOUT);
        if (ret)
                return ret;
 
@@ -3423,8 +3379,7 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
                                   I915_WAIT_INTERRUPTIBLE |
                                   I915_WAIT_LOCKED |
                                   (write ? I915_WAIT_ALL : 0),
-                                  MAX_SCHEDULE_TIMEOUT,
-                                  NULL);
+                                  MAX_SCHEDULE_TIMEOUT);
        if (ret)
                return ret;
 
@@ -3539,8 +3494,7 @@ restart:
                                           I915_WAIT_INTERRUPTIBLE |
                                           I915_WAIT_LOCKED |
                                           I915_WAIT_ALL,
-                                          MAX_SCHEDULE_TIMEOUT,
-                                          NULL);
+                                          MAX_SCHEDULE_TIMEOUT);
                if (ret)
                        return ret;
 
@@ -3678,8 +3632,7 @@ int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
 
        ret = i915_gem_object_wait(obj,
                                   I915_WAIT_INTERRUPTIBLE,
-                                  MAX_SCHEDULE_TIMEOUT,
-                                  to_rps_client(file));
+                                  MAX_SCHEDULE_TIMEOUT);
        if (ret)
                goto out;
 
@@ -3805,8 +3758,7 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
                                   I915_WAIT_INTERRUPTIBLE |
                                   I915_WAIT_LOCKED |
                                   (write ? I915_WAIT_ALL : 0),
-                                  MAX_SCHEDULE_TIMEOUT,
-                                  NULL);
+                                  MAX_SCHEDULE_TIMEOUT);
        if (ret)
                return ret;
 
index c2a5c48c7541d6d1bb230933748b210ff036bd78..0acd6baa3c880e6370d47823ae8a2f7fb74545bb 100644 (file)
@@ -68,7 +68,9 @@ static signed long i915_fence_wait(struct dma_fence *fence,
                                   bool interruptible,
                                   signed long timeout)
 {
-       return i915_request_wait(to_request(fence), interruptible, timeout);
+       return i915_request_wait(to_request(fence),
+                                interruptible | I915_WAIT_PRIORITY,
+                                timeout);
 }
 
 static void i915_fence_release(struct dma_fence *fence)
@@ -1136,8 +1138,23 @@ long i915_request_wait(struct i915_request *rq,
        if (__i915_spin_request(rq, state, 5))
                goto out;
 
-       if (flags & I915_WAIT_PRIORITY)
+       /*
+        * This client is about to stall waiting for the GPU. In many cases
+        * this is undesirable and limits the throughput of the system, as
+        * many clients cannot continue processing user input/output whilst
+        * blocked. RPS autotuning may take tens of milliseconds to respond
+        * to the GPU load and thus incurs additional latency for the client.
+        * We can circumvent that by promoting the GPU frequency to maximum
+        * before we sleep. This makes the GPU throttle up much more quickly
+        * (good for benchmarks and user experience, e.g. window animations),
+        * but at a cost of spending more power processing the workload
+        * (bad for battery).
+        */
+       if (flags & I915_WAIT_PRIORITY) {
+               if (!i915_request_started(rq) && INTEL_GEN(rq->i915) >= 6)
+                       gen6_rps_boost(rq);
                i915_schedule_bump_priority(rq, I915_PRIORITY_WAIT);
+       }
 
        wait.tsk = current;
        if (dma_fence_add_callback(&rq->fence, &wait.cb, request_wait_wake))
index c496b6e0226b025f1ea99bec3523c620dc9edb6c..59544bb5c29467218f3d398161f1169f73bb3188 100644 (file)
@@ -13559,7 +13559,7 @@ static int do_rps_boost(struct wait_queue_entry *_wait,
         * vblank without our intervention, so leave RPS alone.
         */
        if (!i915_request_started(rq))
-               gen6_rps_boost(rq, NULL);
+               gen6_rps_boost(rq);
        i915_request_put(rq);
 
        drm_crtc_vblank_put(wait->crtc);
index dd121966613b00da02eb668c7632df833e8bc4ff..48e89db23c5b7b272df7c692071a90a12d0bdd06 100644 (file)
@@ -2266,7 +2266,7 @@ void intel_suspend_gt_powersave(struct drm_i915_private *dev_priv);
 void gen6_rps_busy(struct drm_i915_private *dev_priv);
 void gen6_rps_reset_ei(struct drm_i915_private *dev_priv);
 void gen6_rps_idle(struct drm_i915_private *dev_priv);
-void gen6_rps_boost(struct i915_request *rq, struct intel_rps_client *rps);
+void gen6_rps_boost(struct i915_request *rq);
 void g4x_wm_get_hw_state(struct drm_i915_private *dev_priv);
 void vlv_wm_get_hw_state(struct drm_i915_private *dev_priv);
 void ilk_wm_get_hw_state(struct drm_i915_private *dev_priv);
index 279031502d43f8b09b6a97589110977abe227b3d..af265d83101141b6b382ec6ff82204f9b973997a 100644 (file)
@@ -6768,8 +6768,7 @@ void gen6_rps_idle(struct drm_i915_private *dev_priv)
        mutex_unlock(&dev_priv->pcu_lock);
 }
 
-void gen6_rps_boost(struct i915_request *rq,
-                   struct intel_rps_client *rps_client)
+void gen6_rps_boost(struct i915_request *rq)
 {
        struct intel_rps *rps = &rq->i915->gt_pm.rps;
        unsigned long flags;
@@ -6798,7 +6797,7 @@ void gen6_rps_boost(struct i915_request *rq,
        if (READ_ONCE(rps->cur_freq) < rps->boost_freq)
                schedule_work(&rps->work);
 
-       atomic_inc(rps_client ? &rps_client->boosts : &rps->boosts);
+       atomic_inc(&rps->boosts);
 }
 
 int intel_set_rps(struct drm_i915_private *dev_priv, u8 val)