drm/i915/gvt: Separate cmd scan from request allocation
authorfred gao <fred.gao@intel.com>
Fri, 18 Aug 2017 07:41:06 +0000 (15:41 +0800)
committerZhenyu Wang <zhenyuw@linux.intel.com>
Fri, 8 Sep 2017 06:21:14 +0000 (14:21 +0800)
Currently i915 request structure and shadow ring buffer are allocated
before command scan, so it will have to restore to previous states once
any error happens afterwards in the long dispatch_workload path.

This patch is to introduce a reserved ring buffer created at the beginning
of vGPU initialization. Workload will be coped to this reserved buffer and
be scanned first, the i915 request and shadow ring buffer are only
allocated after the result of scan is successful.

To balance the memory usage and buffer alloc time, the coming bigger ring
buffer will be reallocated and kept until more bigger buffer is coming.

v2:
- use kmalloc for the smaller ring buffer, realloc if required. (Zhenyu)

v3:
- remove the dynamically allocated ring buffer. (Zhenyu)

v4:
- code style polish.
- kfree previous allocated buffer once kmalloc failed. (Zhenyu)

Signed-off-by: fred gao <fred.gao@intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
drivers/gpu/drm/i915/gvt/cmd_parser.c
drivers/gpu/drm/i915/gvt/execlist.c
drivers/gpu/drm/i915/gvt/gvt.h
drivers/gpu/drm/i915/gvt/scheduler.c

index 72b97ce525e8c713b067630b1d0978f3ccdc0632..e53efc0524f454b9da4bce1dbf000f02f3aa6a86 100644 (file)
@@ -2603,7 +2603,8 @@ static int shadow_workload_ring_buffer(struct intel_vgpu_workload *workload)
 {
        struct intel_vgpu *vgpu = workload->vgpu;
        unsigned long gma_head, gma_tail, gma_top, guest_rb_size;
-       u32 *cs;
+       void *shadow_ring_buffer_va;
+       int ring_id = workload->ring_id;
        int ret;
 
        guest_rb_size = _RING_CTL_BUF_SIZE(workload->rb_ctl);
@@ -2616,34 +2617,42 @@ static int shadow_workload_ring_buffer(struct intel_vgpu_workload *workload)
        gma_tail = workload->rb_start + workload->rb_tail;
        gma_top = workload->rb_start + guest_rb_size;
 
-       /* allocate shadow ring buffer */
-       cs = intel_ring_begin(workload->req, workload->rb_len / sizeof(u32));
-       if (IS_ERR(cs))
-               return PTR_ERR(cs);
+       if (workload->rb_len > vgpu->reserve_ring_buffer_size[ring_id]) {
+               void *va = vgpu->reserve_ring_buffer_va[ring_id];
+               /* realloc the new ring buffer if needed */
+               vgpu->reserve_ring_buffer_va[ring_id] =
+                       krealloc(va, workload->rb_len, GFP_KERNEL);
+               if (!vgpu->reserve_ring_buffer_va[ring_id]) {
+                       gvt_vgpu_err("fail to alloc reserve ring buffer\n");
+                       return -ENOMEM;
+               }
+               vgpu->reserve_ring_buffer_size[ring_id] = workload->rb_len;
+       }
+
+       shadow_ring_buffer_va = vgpu->reserve_ring_buffer_va[ring_id];
 
        /* get shadow ring buffer va */
-       workload->shadow_ring_buffer_va = cs;
+       workload->shadow_ring_buffer_va = shadow_ring_buffer_va;
 
        /* head > tail --> copy head <-> top */
        if (gma_head > gma_tail) {
                ret = copy_gma_to_hva(vgpu, vgpu->gtt.ggtt_mm,
-                                     gma_head, gma_top, cs);
+                                     gma_head, gma_top, shadow_ring_buffer_va);
                if (ret < 0) {
                        gvt_vgpu_err("fail to copy guest ring buffer\n");
                        return ret;
                }
-               cs += ret / sizeof(u32);
+               shadow_ring_buffer_va += ret;
                gma_head = workload->rb_start;
        }
 
        /* copy head or start <-> tail */
-       ret = copy_gma_to_hva(vgpu, vgpu->gtt.ggtt_mm, gma_head, gma_tail, cs);
+       ret = copy_gma_to_hva(vgpu, vgpu->gtt.ggtt_mm, gma_head, gma_tail,
+                               shadow_ring_buffer_va);
        if (ret < 0) {
                gvt_vgpu_err("fail to copy guest ring buffer\n");
                return ret;
        }
-       cs += ret / sizeof(u32);
-       intel_ring_advance(workload->req, cs);
        return 0;
 }
 
index 91b4300f3b394a59e6ad2f19ea5fae9834d82806..1e2c27704be548a322e6b0a68fad9a1d85660dff 100644 (file)
@@ -820,10 +820,21 @@ static void clean_workloads(struct intel_vgpu *vgpu, unsigned long engine_mask)
 
 void intel_vgpu_clean_execlist(struct intel_vgpu *vgpu)
 {
+       enum intel_engine_id i;
+       struct intel_engine_cs *engine;
+
        clean_workloads(vgpu, ALL_ENGINES);
        kmem_cache_destroy(vgpu->workloads);
+
+       for_each_engine(engine, vgpu->gvt->dev_priv, i) {
+               kfree(vgpu->reserve_ring_buffer_va[i]);
+               vgpu->reserve_ring_buffer_va[i] = NULL;
+               vgpu->reserve_ring_buffer_size[i] = 0;
+       }
+
 }
 
+#define RESERVE_RING_BUFFER_SIZE               ((1 * PAGE_SIZE)/8)
 int intel_vgpu_init_execlist(struct intel_vgpu *vgpu)
 {
        enum intel_engine_id i;
@@ -843,7 +854,26 @@ int intel_vgpu_init_execlist(struct intel_vgpu *vgpu)
        if (!vgpu->workloads)
                return -ENOMEM;
 
+       /* each ring has a shadow ring buffer until vgpu destroyed */
+       for_each_engine(engine, vgpu->gvt->dev_priv, i) {
+               vgpu->reserve_ring_buffer_va[i] =
+                       kmalloc(RESERVE_RING_BUFFER_SIZE, GFP_KERNEL);
+               if (!vgpu->reserve_ring_buffer_va[i]) {
+                       gvt_vgpu_err("fail to alloc reserve ring buffer\n");
+                       goto out;
+               }
+               vgpu->reserve_ring_buffer_size[i] = RESERVE_RING_BUFFER_SIZE;
+       }
        return 0;
+out:
+       for_each_engine(engine, vgpu->gvt->dev_priv, i) {
+               if (vgpu->reserve_ring_buffer_size[i]) {
+                       kfree(vgpu->reserve_ring_buffer_va[i]);
+                       vgpu->reserve_ring_buffer_va[i] = NULL;
+                       vgpu->reserve_ring_buffer_size[i] = 0;
+               }
+       }
+       return -ENOMEM;
 }
 
 void intel_vgpu_reset_execlist(struct intel_vgpu *vgpu,
index 2fe22e1b517c3d7910fc6e8b7b04b99b9b4a86c9..031868f08e1c9698b294f60e58ac8a6cabeb55e3 100644 (file)
@@ -166,6 +166,9 @@ struct intel_vgpu {
        struct list_head workload_q_head[I915_NUM_ENGINES];
        struct kmem_cache *workloads;
        atomic_t running_workload_num;
+       /* 1/2K for each reserve ring buffer */
+       void *reserve_ring_buffer_va[I915_NUM_ENGINES];
+       int reserve_ring_buffer_size[I915_NUM_ENGINES];
        DECLARE_BITMAP(tlb_handle_pending, I915_NUM_ENGINES);
        struct i915_gem_context *shadow_ctx;
        DECLARE_BITMAP(shadow_ctx_desc_updated, I915_NUM_ENGINES);
index 391800d2067b7614223487a47f3d04e2d74b412c..0e480f59f659f41905b1ca5029e368736225f9d4 100644 (file)
@@ -201,6 +201,34 @@ static void shadow_context_descriptor_update(struct i915_gem_context *ctx,
        ce->lrc_desc = desc;
 }
 
+static int copy_workload_to_ring_buffer(struct intel_vgpu_workload *workload)
+{
+       struct intel_vgpu *vgpu = workload->vgpu;
+       void *shadow_ring_buffer_va;
+       u32 *cs;
+
+       /* allocate shadow ring buffer */
+       cs = intel_ring_begin(workload->req, workload->rb_len / sizeof(u32));
+       if (IS_ERR(cs)) {
+               gvt_vgpu_err("fail to alloc size =%ld shadow  ring buffer\n",
+                       workload->rb_len);
+               return PTR_ERR(cs);
+       }
+
+       shadow_ring_buffer_va = workload->shadow_ring_buffer_va;
+
+       /* get shadow ring buffer va */
+       workload->shadow_ring_buffer_va = cs;
+
+       memcpy(cs, shadow_ring_buffer_va,
+                       workload->rb_len);
+
+       cs += workload->rb_len / sizeof(u32);
+       intel_ring_advance(workload->req, cs);
+
+       return 0;
+}
+
 /**
  * intel_gvt_scan_and_shadow_workload - audit the workload by scanning and
  * shadow it as well, include ringbuffer,wa_ctx and ctx.
@@ -214,8 +242,10 @@ int intel_gvt_scan_and_shadow_workload(struct intel_vgpu_workload *workload)
        int ring_id = workload->ring_id;
        struct i915_gem_context *shadow_ctx = workload->vgpu->shadow_ctx;
        struct drm_i915_private *dev_priv = workload->vgpu->gvt->dev_priv;
+       struct intel_engine_cs *engine = dev_priv->engine[ring_id];
        struct drm_i915_gem_request *rq;
        struct intel_vgpu *vgpu = workload->vgpu;
+       struct intel_ring *ring;
        int ret;
 
        lockdep_assert_held(&dev_priv->drm.struct_mutex);
@@ -231,17 +261,6 @@ int intel_gvt_scan_and_shadow_workload(struct intel_vgpu_workload *workload)
                shadow_context_descriptor_update(shadow_ctx,
                                        dev_priv->engine[ring_id]);
 
-       rq = i915_gem_request_alloc(dev_priv->engine[ring_id], shadow_ctx);
-       if (IS_ERR(rq)) {
-               gvt_vgpu_err("fail to allocate gem request\n");
-               ret = PTR_ERR(rq);
-               goto out;
-       }
-
-       gvt_dbg_sched("ring id %d get i915 gem request %p\n", ring_id, rq);
-
-       workload->req = i915_gem_request_get(rq);
-
        ret = intel_gvt_scan_and_shadow_ringbuffer(workload);
        if (ret)
                goto out;
@@ -253,10 +272,37 @@ int intel_gvt_scan_and_shadow_workload(struct intel_vgpu_workload *workload)
                        goto out;
        }
 
+       /* pin shadow context by gvt even the shadow context will be pinned
+        * when i915 alloc request. That is because gvt will update the guest
+        * context from shadow context when workload is completed, and at that
+        * moment, i915 may already unpined the shadow context to make the
+        * shadow_ctx pages invalid. So gvt need to pin itself. After update
+        * the guest context, gvt can unpin the shadow_ctx safely.
+        */
+       ring = engine->context_pin(engine, shadow_ctx);
+       if (IS_ERR(ring)) {
+               ret = PTR_ERR(ring);
+               gvt_vgpu_err("fail to pin shadow context\n");
+               goto out;
+       }
+
        ret = populate_shadow_context(workload);
        if (ret)
                goto out;
 
+       rq = i915_gem_request_alloc(dev_priv->engine[ring_id], shadow_ctx);
+       if (IS_ERR(rq)) {
+               gvt_vgpu_err("fail to allocate gem request\n");
+               ret = PTR_ERR(rq);
+               goto out;
+       }
+
+       gvt_dbg_sched("ring id %d get i915 gem request %p\n", ring_id, rq);
+
+       workload->req = i915_gem_request_get(rq);
+       ret = copy_workload_to_ring_buffer(workload);
+       if (ret)
+               goto out;
        workload->shadowed = true;
 
 out:
@@ -269,8 +315,6 @@ static int dispatch_workload(struct intel_vgpu_workload *workload)
        struct i915_gem_context *shadow_ctx = workload->vgpu->shadow_ctx;
        struct drm_i915_private *dev_priv = workload->vgpu->gvt->dev_priv;
        struct intel_engine_cs *engine = dev_priv->engine[ring_id];
-       struct intel_vgpu *vgpu = workload->vgpu;
-       struct intel_ring *ring;
        int ret = 0;
 
        gvt_dbg_sched("ring id %d prepare to dispatch workload %p\n",
@@ -288,20 +332,6 @@ static int dispatch_workload(struct intel_vgpu_workload *workload)
                        goto out;
        }
 
-       /* pin shadow context by gvt even the shadow context will be pinned
-        * when i915 alloc request. That is because gvt will update the guest
-        * context from shadow context when workload is completed, and at that
-        * moment, i915 may already unpined the shadow context to make the
-        * shadow_ctx pages invalid. So gvt need to pin itself. After update
-        * the guest context, gvt can unpin the shadow_ctx safely.
-        */
-       ring = engine->context_pin(engine, shadow_ctx);
-       if (IS_ERR(ring)) {
-               ret = PTR_ERR(ring);
-               gvt_vgpu_err("fail to pin shadow context\n");
-               goto out;
-       }
-
 out:
        if (ret)
                workload->status = ret;