drm/radeon: rework VMID handling
authorChristian König <deathsimple@vodafone.de>
Thu, 9 Aug 2012 14:21:08 +0000 (16:21 +0200)
committerAlex Deucher <alexander.deucher@amd.com>
Thu, 20 Sep 2012 17:10:39 +0000 (13:10 -0400)
Move binding onto the ring, simplifying handling a bit.

Signed-off-by: Christian König <deathsimple@vodafone.de>
Reviewed-by: Jerome Glisse <jglisse@redhat.com>
drivers/gpu/drm/radeon/ni.c
drivers/gpu/drm/radeon/radeon.h
drivers/gpu/drm/radeon/radeon_asic.c
drivers/gpu/drm/radeon/radeon_asic.h
drivers/gpu/drm/radeon/radeon_cs.c
drivers/gpu/drm/radeon/radeon_device.c
drivers/gpu/drm/radeon/radeon_gart.c
drivers/gpu/drm/radeon/si.c

index 8e3d70c7c9b72ec218a61d6aae27555b81e6d81b..de378d685803d5f2a40950828dfb2ab78ee08017 100644 (file)
@@ -1497,14 +1497,6 @@ void cayman_vm_fini(struct radeon_device *rdev)
 {
 }
 
-int cayman_vm_bind(struct radeon_device *rdev, struct radeon_vm *vm, int id)
-{
-       WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR + (id << 2), 0);
-       WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR + (id << 2), vm->last_pfn);
-       WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (id << 2), vm->pt_gpu_addr >> 12);
-       return 0;
-}
-
 #define R600_PTE_VALID     (1 << 0)
 #define R600_PTE_SYSTEM    (1 << 1)
 #define R600_PTE_SNOOPED   (1 << 2)
@@ -1540,10 +1532,20 @@ void cayman_vm_set_page(struct radeon_device *rdev, struct radeon_vm *vm,
 void cayman_vm_flush(struct radeon_device *rdev, struct radeon_ib *ib)
 {
        struct radeon_ring *ring = &rdev->ring[ib->ring];
+       struct radeon_vm *vm = ib->vm;
 
-       if (!ib->vm || ib->vm->id == -1)
+       if (vm == NULL)
                return;
 
+       radeon_ring_write(ring, PACKET0(VM_CONTEXT0_PAGE_TABLE_START_ADDR + (vm->id << 2), 0));
+       radeon_ring_write(ring, 0);
+
+       radeon_ring_write(ring, PACKET0(VM_CONTEXT0_PAGE_TABLE_END_ADDR + (vm->id << 2), 0));
+       radeon_ring_write(ring, vm->last_pfn);
+
+       radeon_ring_write(ring, PACKET0(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2), 0));
+       radeon_ring_write(ring, vm->pt_gpu_addr >> 12);
+
        /* flush hdp cache */
        radeon_ring_write(ring, PACKET0(HDP_MEM_COHERENCY_FLUSH_CNTL, 0));
        radeon_ring_write(ring, 0x1);
index 320355de9c5494d6ca50f0ca1e8a40b7fd605655..617ca45734de77b4c49e37de81530bbe93065462 100644 (file)
@@ -253,6 +253,22 @@ static inline struct radeon_fence *radeon_fence_later(struct radeon_fence *a,
        }
 }
 
+static inline bool radeon_fence_is_earlier(struct radeon_fence *a,
+                                          struct radeon_fence *b)
+{
+       if (!a) {
+               return false;
+       }
+
+       if (!b) {
+               return true;
+       }
+
+       BUG_ON(a->ring != b->ring);
+
+       return a->seq < b->seq;
+}
+
 /*
  * Tiling registers
  */
@@ -628,10 +644,13 @@ struct radeon_ring {
 /*
  * VM
  */
+
+#define RADEON_NUM_VM  16
+
 struct radeon_vm {
        struct list_head                list;
        struct list_head                va;
-       int                             id;
+       unsigned                        id;
        unsigned                        last_pfn;
        u64                             pt_gpu_addr;
        u64                             *pt;
@@ -646,7 +665,7 @@ struct radeon_vm {
 struct radeon_vm_manager {
        struct mutex                    lock;
        struct list_head                lru_vm;
-       uint32_t                        use_bitmap;
+       struct radeon_fence             *active[RADEON_NUM_VM];
        struct radeon_sa_manager        sa_manager;
        uint32_t                        max_pfn;
        /* number of VMIDs */
@@ -1117,7 +1136,6 @@ struct radeon_asic {
        struct {
                int (*init)(struct radeon_device *rdev);
                void (*fini)(struct radeon_device *rdev);
-               int (*bind)(struct radeon_device *rdev, struct radeon_vm *vm, int id);
                uint32_t (*page_flags)(struct radeon_device *rdev,
                                       struct radeon_vm *vm,
                                       uint32_t flags);
@@ -1734,7 +1752,6 @@ void radeon_ring_write(struct radeon_ring *ring, uint32_t v);
 #define radeon_gart_set_page(rdev, i, p) (rdev)->asic->gart.set_page((rdev), (i), (p))
 #define radeon_asic_vm_init(rdev) (rdev)->asic->vm.init((rdev))
 #define radeon_asic_vm_fini(rdev) (rdev)->asic->vm.fini((rdev))
-#define radeon_asic_vm_bind(rdev, v, id) (rdev)->asic->vm.bind((rdev), (v), (id))
 #define radeon_asic_vm_page_flags(rdev, v, flags) (rdev)->asic->vm.page_flags((rdev), (v), (flags))
 #define radeon_asic_vm_set_page(rdev, v, pfn, addr, flags) (rdev)->asic->vm.set_page((rdev), (v), (pfn), (addr), (flags))
 #define radeon_ring_start(rdev, r, cp) (rdev)->asic->ring[(r)].ring_start((rdev), (cp))
@@ -1817,6 +1834,11 @@ int radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm);
 void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm);
 int radeon_vm_bind(struct radeon_device *rdev, struct radeon_vm *vm);
 void radeon_vm_unbind(struct radeon_device *rdev, struct radeon_vm *vm);
+struct radeon_fence *radeon_vm_grab_id(struct radeon_device *rdev,
+                                      struct radeon_vm *vm, int ring);
+void radeon_vm_fence(struct radeon_device *rdev,
+                    struct radeon_vm *vm,
+                    struct radeon_fence *fence);
 int radeon_vm_bo_update_pte(struct radeon_device *rdev,
                            struct radeon_vm *vm,
                            struct radeon_bo *bo,
index 4a6e39f7ffd130bcbcdb042798809dabfe1a7643..98c586ac1999b933439cdf95a0fe92165a177841 100644 (file)
@@ -1375,7 +1375,6 @@ static struct radeon_asic cayman_asic = {
        .vm = {
                .init = &cayman_vm_init,
                .fini = &cayman_vm_fini,
-               .bind = &cayman_vm_bind,
                .page_flags = &cayman_vm_page_flags,
                .set_page = &cayman_vm_set_page,
        },
@@ -1480,7 +1479,6 @@ static struct radeon_asic trinity_asic = {
        .vm = {
                .init = &cayman_vm_init,
                .fini = &cayman_vm_fini,
-               .bind = &cayman_vm_bind,
                .page_flags = &cayman_vm_page_flags,
                .set_page = &cayman_vm_set_page,
        },
@@ -1585,7 +1583,6 @@ static struct radeon_asic si_asic = {
        .vm = {
                .init = &si_vm_init,
                .fini = &si_vm_fini,
-               .bind = &si_vm_bind,
                .page_flags = &cayman_vm_page_flags,
                .set_page = &cayman_vm_set_page,
        },
@@ -1599,7 +1596,7 @@ static struct radeon_asic si_asic = {
                        .ring_test = &r600_ring_test,
                        .ib_test = &r600_ib_test,
                        .is_lockup = &si_gpu_is_lockup,
-                       .vm_flush = &cayman_vm_flush,
+                       .vm_flush = &si_vm_flush,
                },
                [CAYMAN_RING_TYPE_CP1_INDEX] = {
                        .ib_execute = &si_ring_ib_execute,
@@ -1610,7 +1607,7 @@ static struct radeon_asic si_asic = {
                        .ring_test = &r600_ring_test,
                        .ib_test = &r600_ib_test,
                        .is_lockup = &si_gpu_is_lockup,
-                       .vm_flush = &cayman_vm_flush,
+                       .vm_flush = &si_vm_flush,
                },
                [CAYMAN_RING_TYPE_CP2_INDEX] = {
                        .ib_execute = &si_ring_ib_execute,
@@ -1621,7 +1618,7 @@ static struct radeon_asic si_asic = {
                        .ring_test = &r600_ring_test,
                        .ib_test = &r600_ib_test,
                        .is_lockup = &si_gpu_is_lockup,
-                       .vm_flush = &cayman_vm_flush,
+                       .vm_flush = &si_vm_flush,
                }
        },
        .irq = {
index 11a31d64bacc40bf2ac71c0b3e60b70f3cf81820..25e8d000dac970136188416dd5c87c5b0a5bd3d4 100644 (file)
@@ -440,7 +440,6 @@ int cayman_asic_reset(struct radeon_device *rdev);
 void cayman_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib);
 int cayman_vm_init(struct radeon_device *rdev);
 void cayman_vm_fini(struct radeon_device *rdev);
-int cayman_vm_bind(struct radeon_device *rdev, struct radeon_vm *vm, int id);
 void cayman_vm_unbind(struct radeon_device *rdev, struct radeon_vm *vm);
 void cayman_vm_flush(struct radeon_device *rdev, struct radeon_ib *ib);
 uint32_t cayman_vm_page_flags(struct radeon_device *rdev,
@@ -470,8 +469,7 @@ int si_irq_set(struct radeon_device *rdev);
 int si_irq_process(struct radeon_device *rdev);
 int si_vm_init(struct radeon_device *rdev);
 void si_vm_fini(struct radeon_device *rdev);
-int si_vm_bind(struct radeon_device *rdev, struct radeon_vm *vm, int id);
-void si_vm_unbind(struct radeon_device *rdev, struct radeon_vm *vm);
+void si_vm_flush(struct radeon_device *rdev, struct radeon_ib *ib);
 int si_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib);
 uint64_t si_get_gpu_clock(struct radeon_device *rdev);
 
index d4a804b58feba7d998d2ddd249f4d6a01f73d0ab..dc4554e0a7113e21e6be6f909525e8e97fbdab49 100644 (file)
@@ -485,6 +485,7 @@ static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev,
        }
        radeon_cs_sync_rings(parser);
        radeon_cs_sync_to(parser, vm->last_flush);
+       radeon_cs_sync_to(parser, radeon_vm_grab_id(rdev, vm, parser->ring));
 
        if ((rdev->family >= CHIP_TAHITI) &&
            (parser->chunk_const_ib_idx != -1)) {
@@ -493,13 +494,11 @@ static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev,
                r = radeon_ib_schedule(rdev, &parser->ib, NULL);
        }
 
-out:
        if (!r) {
-               if (vm->fence) {
-                       radeon_fence_unref(&vm->fence);
-               }
-               vm->fence = radeon_fence_ref(parser->ib.fence);
+               radeon_vm_fence(rdev, vm, parser->ib.fence);
        }
+
+out:
        mutex_unlock(&vm->mutex);
        mutex_unlock(&rdev->vm_manager.lock);
        return r;
index c78f0346dfe4bbb1f106e18e67af2c99e8e0e0f3..331a952c9b53f403315cd11c79d0cb20b1cd9a7b 100644 (file)
@@ -1018,7 +1018,6 @@ int radeon_device_init(struct radeon_device *rdev,
                return r;
        /* initialize vm here */
        mutex_init(&rdev->vm_manager.lock);
-       rdev->vm_manager.use_bitmap = 1;
        rdev->vm_manager.max_pfn = 1 << 20;
        INIT_LIST_HEAD(&rdev->vm_manager.lru_vm);
 
index 125b7c31fafc48bcbbfa26c5269d2de013e4746b..0fd0ba9236a6101f282c43fbb935a3bbdb26cc91 100644 (file)
@@ -437,7 +437,6 @@ int radeon_vm_manager_init(struct radeon_device *rdev)
        int r;
 
        if (!rdev->vm_manager.enabled) {
-               /* mark first vm as always in use, it's the system one */
                /* allocate enough for 2 full VM pts */
                r = radeon_sa_bo_manager_init(rdev, &rdev->vm_manager.sa_manager,
                                              rdev->vm_manager.max_pfn * 8 * 2,
@@ -461,7 +460,7 @@ int radeon_vm_manager_init(struct radeon_device *rdev)
 
        /* restore page table */
        list_for_each_entry(vm, &rdev->vm_manager.lru_vm, list) {
-               if (vm->id == -1)
+               if (vm->sa_bo == NULL)
                        continue;
 
                list_for_each_entry(bo_va, &vm->va, vm_list) {
@@ -475,11 +474,6 @@ int radeon_vm_manager_init(struct radeon_device *rdev)
                                DRM_ERROR("Failed to update pte for vm %d!\n", vm->id);
                        }
                }
-
-               r = radeon_asic_vm_bind(rdev, vm, vm->id);
-               if (r) {
-                       DRM_ERROR("Failed to bind vm %d!\n", vm->id);
-               }
        }
        return 0;
 }
@@ -500,10 +494,6 @@ static void radeon_vm_unbind_locked(struct radeon_device *rdev,
 {
        struct radeon_bo_va *bo_va;
 
-       if (vm->id == -1) {
-               return;
-       }
-
        /* wait for vm use to end */
        while (vm->fence) {
                int r;
@@ -523,9 +513,7 @@ static void radeon_vm_unbind_locked(struct radeon_device *rdev,
        radeon_fence_unref(&vm->last_flush);
 
        /* hw unbind */
-       rdev->vm_manager.use_bitmap &= ~(1 << vm->id);
        list_del_init(&vm->list);
-       vm->id = -1;
        radeon_sa_bo_free(rdev, &vm->sa_bo, NULL);
        vm->pt = NULL;
 
@@ -544,6 +532,7 @@ static void radeon_vm_unbind_locked(struct radeon_device *rdev,
 void radeon_vm_manager_fini(struct radeon_device *rdev)
 {
        struct radeon_vm *vm, *tmp;
+       int i;
 
        if (!rdev->vm_manager.enabled)
                return;
@@ -553,6 +542,9 @@ void radeon_vm_manager_fini(struct radeon_device *rdev)
        list_for_each_entry_safe(vm, tmp, &rdev->vm_manager.lru_vm, list) {
                radeon_vm_unbind_locked(rdev, vm);
        }
+       for (i = 0; i < RADEON_NUM_VM; ++i) {
+               radeon_fence_unref(&rdev->vm_manager.active[i]);
+       }
        radeon_asic_vm_fini(rdev);
        mutex_unlock(&rdev->vm_manager.lock);
 
@@ -593,14 +585,13 @@ void radeon_vm_unbind(struct radeon_device *rdev, struct radeon_vm *vm)
 int radeon_vm_bind(struct radeon_device *rdev, struct radeon_vm *vm)
 {
        struct radeon_vm *vm_evict;
-       unsigned i;
-       int id = -1, r;
+       int r;
 
        if (vm == NULL) {
                return -EINVAL;
        }
 
-       if (vm->id != -1) {
+       if (vm->sa_bo != NULL) {
                /* update lru */
                list_del_init(&vm->list);
                list_add_tail(&vm->list, &rdev->vm_manager.lru_vm);
@@ -623,33 +614,86 @@ retry:
        vm->pt_gpu_addr = radeon_sa_bo_gpu_addr(vm->sa_bo);
        memset(vm->pt, 0, RADEON_GPU_PAGE_ALIGN(vm->last_pfn * 8));
 
-retry_id:
-       /* search for free vm */
-       for (i = 0; i < rdev->vm_manager.nvm; i++) {
-               if (!(rdev->vm_manager.use_bitmap & (1 << i))) {
-                       id = i;
-                       break;
+       list_add_tail(&vm->list, &rdev->vm_manager.lru_vm);
+       return radeon_vm_bo_update_pte(rdev, vm, rdev->ring_tmp_bo.bo,
+                                      &rdev->ring_tmp_bo.bo->tbo.mem);
+}
+
+/**
+ * radeon_vm_grab_id - allocate the next free VMID
+ *
+ * @rdev: radeon_device pointer
+ * @vm: vm to allocate id for
+ * @ring: ring we want to submit job to
+ *
+ * Allocate an id for the vm (cayman+).
+ * Returns the fence we need to sync to (if any).
+ *
+ * Global and local mutex must be locked!
+ */
+struct radeon_fence *radeon_vm_grab_id(struct radeon_device *rdev,
+                                      struct radeon_vm *vm, int ring)
+{
+       struct radeon_fence *best[RADEON_NUM_RINGS] = {};
+       unsigned choices[2] = {};
+       unsigned i;
+
+       /* check if the id is still valid */
+       if (vm->fence && vm->fence == rdev->vm_manager.active[vm->id])
+               return NULL;
+
+       /* we definately need to flush */
+       radeon_fence_unref(&vm->last_flush);
+
+       /* skip over VMID 0, since it is the system VM */
+       for (i = 1; i < rdev->vm_manager.nvm; ++i) {
+               struct radeon_fence *fence = rdev->vm_manager.active[i];
+
+               if (fence == NULL) {
+                       /* found a free one */
+                       vm->id = i;
+                       return NULL;
+               }
+
+               if (radeon_fence_is_earlier(fence, best[fence->ring])) {
+                       best[fence->ring] = fence;
+                       choices[fence->ring == ring ? 0 : 1] = i;
                }
-       }
-       /* evict vm if necessary */
-       if (id == -1) {
-               vm_evict = list_first_entry(&rdev->vm_manager.lru_vm, struct radeon_vm, list);
-               radeon_vm_unbind(rdev, vm_evict);
-               goto retry_id;
        }
 
-       /* do hw bind */
-       r = radeon_asic_vm_bind(rdev, vm, id);
-       radeon_fence_unref(&vm->last_flush);
-       if (r) {
-               radeon_sa_bo_free(rdev, &vm->sa_bo, NULL);
-               return r;
+       for (i = 0; i < 2; ++i) {
+               if (choices[i]) {
+                       vm->id = choices[i];
+                       return rdev->vm_manager.active[choices[i]];
+               }
        }
-       rdev->vm_manager.use_bitmap |= 1 << id;
-       vm->id = id;
-       list_add_tail(&vm->list, &rdev->vm_manager.lru_vm);
-       return radeon_vm_bo_update_pte(rdev, vm, rdev->ring_tmp_bo.bo,
-                                      &rdev->ring_tmp_bo.bo->tbo.mem);
+
+       /* should never happen */
+       BUG();
+       return NULL;
+}
+
+/**
+ * radeon_vm_fence - remember fence for vm
+ *
+ * @rdev: radeon_device pointer
+ * @vm: vm we want to fence
+ * @fence: fence to remember
+ *
+ * Fence the vm (cayman+).
+ * Set the fence used to protect page table and id.
+ *
+ * Global and local mutex must be locked!
+ */
+void radeon_vm_fence(struct radeon_device *rdev,
+                    struct radeon_vm *vm,
+                    struct radeon_fence *fence)
+{
+       radeon_fence_unref(&rdev->vm_manager.active[vm->id]);
+       rdev->vm_manager.active[vm->id] = radeon_fence_ref(fence);
+
+       radeon_fence_unref(&vm->fence);
+       vm->fence = radeon_fence_ref(fence);
 }
 
 /* object have to be reserved */
@@ -806,7 +850,7 @@ int radeon_vm_bo_update_pte(struct radeon_device *rdev,
        uint32_t flags;
 
        /* nothing to do if vm isn't bound */
-       if (vm->id == -1)
+       if (vm->sa_bo == NULL)
                return 0;
 
        bo_va = radeon_bo_va(bo, vm);
@@ -928,7 +972,7 @@ int radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm)
 {
        int r;
 
-       vm->id = -1;
+       vm->id = 0;
        vm->fence = NULL;
        mutex_init(&vm->mutex);
        INIT_LIST_HEAD(&vm->list);
index 4016927b268dd83a1dda4c255298c3b555ca7b0f..51a471dc319cb27aedbfc3b0c0ee71764dbf3cd0 100644 (file)
@@ -2789,14 +2789,30 @@ void si_vm_fini(struct radeon_device *rdev)
 {
 }
 
-int si_vm_bind(struct radeon_device *rdev, struct radeon_vm *vm, int id)
+void si_vm_flush(struct radeon_device *rdev, struct radeon_ib *ib)
 {
-       if (id < 8)
-               WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (id << 2), vm->pt_gpu_addr >> 12);
-       else
-               WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((id - 8) << 2),
-                      vm->pt_gpu_addr >> 12);
-       return 0;
+       struct radeon_ring *ring = &rdev->ring[ib->ring];
+       struct radeon_vm *vm = ib->vm;
+
+       if (vm == NULL)
+               return;
+
+       if (vm->id < 8) {
+               radeon_ring_write(ring, PACKET0(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR
+                                               + (vm->id << 2), 0));
+       } else {
+               radeon_ring_write(ring, PACKET0(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR
+                                               + ((vm->id - 8) << 2), 0));
+       }
+       radeon_ring_write(ring, vm->pt_gpu_addr >> 12);
+
+       /* flush hdp cache */
+       radeon_ring_write(ring, PACKET0(HDP_MEM_COHERENCY_FLUSH_CNTL, 0));
+       radeon_ring_write(ring, 0x1);
+
+       /* bits 0-7 are the VM contexts0-7 */
+       radeon_ring_write(ring, PACKET0(VM_INVALIDATE_REQUEST, 0));
+       radeon_ring_write(ring, 1 << ib->vm->id);
 }
 
 /*