static int i915_rps_boost_info(struct seq_file *m, void *data)
{
struct drm_i915_private *dev_priv = node_to_i915(m->private);
- struct drm_device *dev = &dev_priv->drm;
struct intel_rps *rps = &dev_priv->gt_pm.rps;
u32 act_freq = rps->cur_freq;
intel_wakeref_t wakeref;
- struct drm_file *file;
with_intel_runtime_pm_if_in_use(dev_priv, wakeref) {
if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) {
intel_gpu_freq(dev_priv, rps->efficient_freq),
intel_gpu_freq(dev_priv, rps->boost_freq));
- mutex_lock(&dev->filelist_mutex);
- list_for_each_entry_reverse(file, &dev->filelist, lhead) {
- struct drm_i915_file_private *file_priv = file->driver_priv;
- struct task_struct *task;
-
- rcu_read_lock();
- task = pid_task(file->pid, PIDTYPE_PID);
- seq_printf(m, "%s [%d]: %d boosts\n",
- task ? task->comm : "<unknown>",
- task ? task->pid : -1,
- atomic_read(&file_priv->rps_client.boosts));
- rcu_read_unlock();
- }
- seq_printf(m, "Kernel (anonymous) boosts: %d\n",
- atomic_read(&rps->boosts));
- mutex_unlock(&dev->filelist_mutex);
+ seq_printf(m, "Wait boosts: %d\n", atomic_read(&rps->boosts));
if (INTEL_GEN(dev_priv) >= 6 &&
rps->enabled &&
static long
i915_gem_object_wait_fence(struct dma_fence *fence,
unsigned int flags,
- long timeout,
- struct intel_rps_client *rps_client)
+ long timeout)
{
struct i915_request *rq;
if (i915_request_completed(rq))
goto out;
- /*
- * This client is about to stall waiting for the GPU. In many cases
- * this is undesirable and limits the throughput of the system, as
- * many clients cannot continue processing user input/output whilst
- * blocked. RPS autotuning may take tens of milliseconds to respond
- * to the GPU load and thus incurs additional latency for the client.
- * We can circumvent that by promoting the GPU frequency to maximum
- * before we wait. This makes the GPU throttle up much more quickly
- * (good for benchmarks and user experience, e.g. window animations),
- * but at a cost of spending more power processing the workload
- * (bad for battery). Not all clients even want their results
- * immediately and for them we should just let the GPU select its own
- * frequency to maximise efficiency. To prevent a single client from
- * forcing the clocks too high for the whole system, we only allow
- * each client to waitboost once in a busy period.
- */
- if (rps_client && !i915_request_started(rq)) {
- if (INTEL_GEN(rq->i915) >= 6)
- gen6_rps_boost(rq, rps_client);
- }
-
timeout = i915_request_wait(rq, flags, timeout);
out:
static long
i915_gem_object_wait_reservation(struct reservation_object *resv,
unsigned int flags,
- long timeout,
- struct intel_rps_client *rps_client)
+ long timeout)
{
unsigned int seq = __read_seqcount_begin(&resv->seq);
struct dma_fence *excl;
for (i = 0; i < count; i++) {
timeout = i915_gem_object_wait_fence(shared[i],
- flags, timeout,
- rps_client);
+ flags, timeout);
if (timeout < 0)
break;
}
if (excl && timeout >= 0)
- timeout = i915_gem_object_wait_fence(excl, flags, timeout,
- rps_client);
+ timeout = i915_gem_object_wait_fence(excl, flags, timeout);
dma_fence_put(excl);
* @obj: i915 gem object
* @flags: how to wait (under a lock, for all rendering or just for writes etc)
* @timeout: how long to wait
- * @rps_client: client (user process) to charge for any waitboosting
*/
int
i915_gem_object_wait(struct drm_i915_gem_object *obj,
unsigned int flags,
- long timeout,
- struct intel_rps_client *rps_client)
+ long timeout)
{
might_sleep();
GEM_BUG_ON(timeout < 0);
- timeout = i915_gem_object_wait_reservation(obj->resv,
- flags, timeout,
- rps_client);
+ timeout = i915_gem_object_wait_reservation(obj->resv, flags, timeout);
return timeout < 0 ? timeout : 0;
}
-static struct intel_rps_client *to_rps_client(struct drm_file *file)
-{
- struct drm_i915_file_private *fpriv = file->driver_priv;
-
- return &fpriv->rps_client;
-}
-
static int
i915_gem_phys_pwrite(struct drm_i915_gem_object *obj,
struct drm_i915_gem_pwrite *args,
ret = i915_gem_object_wait(obj,
I915_WAIT_INTERRUPTIBLE |
I915_WAIT_LOCKED,
- MAX_SCHEDULE_TIMEOUT,
- NULL);
+ MAX_SCHEDULE_TIMEOUT);
if (ret)
return ret;
I915_WAIT_INTERRUPTIBLE |
I915_WAIT_LOCKED |
I915_WAIT_ALL,
- MAX_SCHEDULE_TIMEOUT,
- NULL);
+ MAX_SCHEDULE_TIMEOUT);
if (ret)
return ret;
ret = i915_gem_object_wait(obj,
I915_WAIT_INTERRUPTIBLE,
- MAX_SCHEDULE_TIMEOUT,
- to_rps_client(file));
+ MAX_SCHEDULE_TIMEOUT);
if (ret)
goto out;
ret = i915_gem_object_wait(obj,
I915_WAIT_INTERRUPTIBLE |
I915_WAIT_ALL,
- MAX_SCHEDULE_TIMEOUT,
- to_rps_client(file));
+ MAX_SCHEDULE_TIMEOUT);
if (ret)
goto err;
I915_WAIT_INTERRUPTIBLE |
I915_WAIT_PRIORITY |
(write_domain ? I915_WAIT_ALL : 0),
- MAX_SCHEDULE_TIMEOUT,
- to_rps_client(file));
+ MAX_SCHEDULE_TIMEOUT);
if (err)
goto out;
*/
ret = i915_gem_object_wait(obj,
I915_WAIT_INTERRUPTIBLE,
- MAX_SCHEDULE_TIMEOUT,
- NULL);
+ MAX_SCHEDULE_TIMEOUT);
if (ret)
goto err;
I915_WAIT_INTERRUPTIBLE |
I915_WAIT_PRIORITY |
I915_WAIT_ALL,
- to_wait_timeout(args->timeout_ns),
- to_rps_client(file));
+ to_wait_timeout(args->timeout_ns));
if (args->timeout_ns > 0) {
args->timeout_ns -= ktime_to_ns(ktime_sub(ktime_get(), start));
* stalls, so allow the gpu to boost to maximum clocks.
*/
if (flags & I915_WAIT_FOR_IDLE_BOOST)
- gen6_rps_boost(rq, NULL);
+ gen6_rps_boost(rq);
timeout = i915_request_wait(rq, flags, timeout);
i915_request_put(rq);
I915_WAIT_INTERRUPTIBLE |
I915_WAIT_LOCKED |
(write ? I915_WAIT_ALL : 0),
- MAX_SCHEDULE_TIMEOUT,
- NULL);
+ MAX_SCHEDULE_TIMEOUT);
if (ret)
return ret;
I915_WAIT_INTERRUPTIBLE |
I915_WAIT_LOCKED |
(write ? I915_WAIT_ALL : 0),
- MAX_SCHEDULE_TIMEOUT,
- NULL);
+ MAX_SCHEDULE_TIMEOUT);
if (ret)
return ret;
I915_WAIT_INTERRUPTIBLE |
I915_WAIT_LOCKED |
I915_WAIT_ALL,
- MAX_SCHEDULE_TIMEOUT,
- NULL);
+ MAX_SCHEDULE_TIMEOUT);
if (ret)
return ret;
ret = i915_gem_object_wait(obj,
I915_WAIT_INTERRUPTIBLE,
- MAX_SCHEDULE_TIMEOUT,
- to_rps_client(file));
+ MAX_SCHEDULE_TIMEOUT);
if (ret)
goto out;
I915_WAIT_INTERRUPTIBLE |
I915_WAIT_LOCKED |
(write ? I915_WAIT_ALL : 0),
- MAX_SCHEDULE_TIMEOUT,
- NULL);
+ MAX_SCHEDULE_TIMEOUT);
if (ret)
return ret;
bool interruptible,
signed long timeout)
{
- return i915_request_wait(to_request(fence), interruptible, timeout);
+ return i915_request_wait(to_request(fence),
+ interruptible | I915_WAIT_PRIORITY,
+ timeout);
}
static void i915_fence_release(struct dma_fence *fence)
if (__i915_spin_request(rq, state, 5))
goto out;
- if (flags & I915_WAIT_PRIORITY)
+ /*
+ * This client is about to stall waiting for the GPU. In many cases
+ * this is undesirable and limits the throughput of the system, as
+ * many clients cannot continue processing user input/output whilst
+ * blocked. RPS autotuning may take tens of milliseconds to respond
+ * to the GPU load and thus incurs additional latency for the client.
+ * We can circumvent that by promoting the GPU frequency to maximum
+ * before we sleep. This makes the GPU throttle up much more quickly
+ * (good for benchmarks and user experience, e.g. window animations),
+ * but at a cost of spending more power processing the workload
+ * (bad for battery).
+ */
+ if (flags & I915_WAIT_PRIORITY) {
+ if (!i915_request_started(rq) && INTEL_GEN(rq->i915) >= 6)
+ gen6_rps_boost(rq);
i915_schedule_bump_priority(rq, I915_PRIORITY_WAIT);
+ }
wait.tsk = current;
if (dma_fence_add_callback(&rq->fence, &wait.cb, request_wait_wake))