drm/amdgpu: patch cond exec for SDMA
authorMonk Liu <monk.liu@amd.com>
Thu, 14 Jan 2016 11:07:38 +0000 (19:07 +0800)
committerAlex Deucher <alexander.deucher@amd.com>
Mon, 2 May 2016 19:11:09 +0000 (15:11 -0400)
More ground work for conditional execution on SDMA
necessary for preemption.

Signed-off-by: Monk Liu <monk.liu@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu.h
drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c

index 49adb451c3bc54c8fd5c17821f8a62e845dab9d7..412fc2f39fa5adffe81919dd15e99d6e0ce0361a 100644 (file)
@@ -302,6 +302,8 @@ struct amdgpu_ring_funcs {
        void (*insert_nop)(struct amdgpu_ring *ring, uint32_t count);
        /* pad the indirect buffer to the necessary number of dw */
        void (*pad_ib)(struct amdgpu_ring *ring, struct amdgpu_ib *ib);
+       unsigned (*init_cond_exec)(struct amdgpu_ring *ring);
+       void (*patch_cond_exec)(struct amdgpu_ring *ring, unsigned offset);
 };
 
 /*
@@ -2182,6 +2184,8 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
 #define amdgpu_ring_emit_hdp_flush(r) (r)->funcs->emit_hdp_flush((r))
 #define amdgpu_ring_emit_hdp_invalidate(r) (r)->funcs->emit_hdp_invalidate((r))
 #define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib)))
+#define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r))
+#define amdgpu_ring_patch_cond_exec(r,o) (r)->funcs->patch_cond_exec((r),(o))
 #define amdgpu_ih_get_wptr(adev) (adev)->irq.ih_funcs->get_wptr((adev))
 #define amdgpu_ih_decode_iv(adev, iv) (adev)->irq.ih_funcs->decode_iv((adev), (iv))
 #define amdgpu_ih_set_rptr(adev) (adev)->irq.ih_funcs->set_rptr((adev))
index a15d690d90891a9acfa31086ce65ba073bc68e9b..644336d76aca67c4f0582c399ab60c77c74c545f 100644 (file)
@@ -124,7 +124,8 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
        struct amdgpu_ctx *ctx, *old_ctx;
        struct amdgpu_vm *vm;
        struct fence *hwf;
-       unsigned i;
+       unsigned i, patch_offset = ~0;
+
        int r = 0;
 
        if (num_ibs == 0)
@@ -149,6 +150,9 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
                return r;
        }
 
+       if (ring->type == AMDGPU_RING_TYPE_SDMA && ring->funcs->init_cond_exec)
+               patch_offset = amdgpu_ring_init_cond_exec(ring);
+
        if (vm) {
                /* do context switch */
                amdgpu_vm_flush(ring, ib->vm_id, ib->vm_pd_addr,
@@ -204,6 +208,9 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
        if (f)
                *f = fence_get(hwf);
 
+       if (patch_offset != ~0 && ring->funcs->patch_cond_exec)
+               amdgpu_ring_patch_cond_exec(ring, patch_offset);
+
        amdgpu_ring_commit(ring);
        return 0;
 }
index 8c8ca98dd1298af2a1e7b397eb8f9d964e4b81fe..833d2658428fe6bf02f7758de5c27da0e76d0e4a 100644 (file)
@@ -452,6 +452,31 @@ static void sdma_v3_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se
        amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(0));
 }
 
+unsigned init_cond_exec(struct amdgpu_ring *ring)
+{
+       unsigned ret;
+       amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_COND_EXE));
+       amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
+       amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
+       amdgpu_ring_write(ring, 1);
+       ret = ring->wptr;/* this is the offset we need patch later */
+       amdgpu_ring_write(ring, 0x55aa55aa);/* insert dummy here and patch it later */
+       return ret;
+}
+
+void patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
+{
+       unsigned cur;
+       BUG_ON(ring->ring[offset] != 0x55aa55aa);
+
+       cur = ring->wptr - 1;
+       if (likely(cur > offset))
+               ring->ring[offset] = cur - offset;
+       else
+               ring->ring[offset] = (ring->ring_size>>2) - offset + cur;
+}
+
+
 /**
  * sdma_v3_0_gfx_stop - stop the gfx async dma engines
  *