drm/amdkfd: Add support for doorbell BOs
authorFelix Kuehling <Felix.Kuehling@amd.com>
Wed, 21 Nov 2018 02:44:27 +0000 (21:44 -0500)
committerAlex Deucher <alexander.deucher@amd.com>
Fri, 7 Dec 2018 23:14:00 +0000 (18:14 -0500)
This allows user mode to map doorbell pages into GPUVM address space.
That way GPUs can submit to user mode queues (self-dispatch).

Acked-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
drivers/gpu/drm/amd/include/kgd_kfd_interface.h

index a0a500d458864aac40e406f04e01a6c89cdf181e..be1ab43473c6c727bd1832efa3d5b7c09edffed2 100644 (file)
@@ -887,6 +887,24 @@ update_gpuvm_pte_failed:
        return ret;
 }
 
+static struct sg_table *create_doorbell_sg(uint64_t addr, uint32_t size)
+{
+       struct sg_table *sg = kmalloc(sizeof(*sg), GFP_KERNEL);
+
+       if (!sg)
+               return NULL;
+       if (sg_alloc_table(sg, 1, GFP_KERNEL)) {
+               kfree(sg);
+               return NULL;
+       }
+       sg->sgl->dma_address = addr;
+       sg->sgl->length = size;
+#ifdef CONFIG_NEED_SG_DMA_LENGTH
+       sg->sgl->dma_length = size;
+#endif
+       return sg;
+}
+
 static int process_validate_vms(struct amdkfd_process_info *process_info)
 {
        struct amdgpu_vm *peer_vm;
@@ -1170,6 +1188,8 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
 {
        struct amdgpu_device *adev = get_amdgpu_device(kgd);
        struct amdgpu_vm *avm = (struct amdgpu_vm *)vm;
+       enum ttm_bo_type bo_type = ttm_bo_type_device;
+       struct sg_table *sg = NULL;
        uint64_t user_addr = 0;
        struct amdgpu_bo *bo;
        struct amdgpu_bo_param bp;
@@ -1198,13 +1218,25 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
                if (!offset || !*offset)
                        return -EINVAL;
                user_addr = *offset;
+       } else if (flags & ALLOC_MEM_FLAGS_DOORBELL) {
+               domain = AMDGPU_GEM_DOMAIN_GTT;
+               alloc_domain = AMDGPU_GEM_DOMAIN_CPU;
+               bo_type = ttm_bo_type_sg;
+               alloc_flags = 0;
+               if (size > UINT_MAX)
+                       return -EINVAL;
+               sg = create_doorbell_sg(*offset, size);
+               if (!sg)
+                       return -ENOMEM;
        } else {
                return -EINVAL;
        }
 
        *mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL);
-       if (!*mem)
-               return -ENOMEM;
+       if (!*mem) {
+               ret = -ENOMEM;
+               goto err;
+       }
        INIT_LIST_HEAD(&(*mem)->bo_va_list);
        mutex_init(&(*mem)->lock);
        (*mem)->aql_queue = !!(flags & ALLOC_MEM_FLAGS_AQL_QUEUE_MEM);
@@ -1237,7 +1269,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
 
        amdgpu_sync_create(&(*mem)->sync);
 
-       ret = amdgpu_amdkfd_reserve_mem_limit(adev, size, alloc_domain, false);
+       ret = amdgpu_amdkfd_reserve_mem_limit(adev, size, alloc_domain, !!sg);
        if (ret) {
                pr_debug("Insufficient system memory\n");
                goto err_reserve_limit;
@@ -1251,7 +1283,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
        bp.byte_align = byte_align;
        bp.domain = alloc_domain;
        bp.flags = alloc_flags;
-       bp.type = ttm_bo_type_device;
+       bp.type = bo_type;
        bp.resv = NULL;
        ret = amdgpu_bo_create(adev, &bp, &bo);
        if (ret) {
@@ -1259,6 +1291,10 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
                                domain_string(alloc_domain), ret);
                goto err_bo_create;
        }
+       if (bo_type == ttm_bo_type_sg) {
+               bo->tbo.sg = sg;
+               bo->tbo.ttm->sg = sg;
+       }
        bo->kfd_bo = *mem;
        (*mem)->bo = bo;
        if (user_addr)
@@ -1290,10 +1326,15 @@ allocate_init_user_pages_failed:
        /* Don't unreserve system mem limit twice */
        goto err_reserve_limit;
 err_bo_create:
-       unreserve_mem_limit(adev, size, alloc_domain, false);
+       unreserve_mem_limit(adev, size, alloc_domain, !!sg);
 err_reserve_limit:
        mutex_destroy(&(*mem)->lock);
        kfree(*mem);
+err:
+       if (sg) {
+               sg_free_table(sg);
+               kfree(sg);
+       }
        return ret;
 }
 
@@ -1363,6 +1404,14 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
        /* Free the sync object */
        amdgpu_sync_free(&mem->sync);
 
+       /* If the SG is not NULL, it's one we created for a doorbell
+        * BO. We need to free it.
+        */
+       if (mem->bo->tbo.sg) {
+               sg_free_table(mem->bo->tbo.sg);
+               kfree(mem->bo->tbo.sg);
+       }
+
        /* Free the BO*/
        amdgpu_bo_unref(&mem->bo);
        mutex_destroy(&mem->lock);
index ae3ae0fb260255d3fe948744713cd8d313710d2f..3623538baf6fc9c20dd79167826ded7d52acbc9e 100644 (file)
@@ -1274,6 +1274,12 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
                return -EINVAL;
        }
 
+       if (flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL) {
+               if (args->size != kfd_doorbell_process_slice(dev))
+                       return -EINVAL;
+               offset = kfd_get_process_doorbells(dev, p);
+       }
+
        mutex_lock(&p->mutex);
 
        pdd = kfd_bind_process_to_device(dev, p);
index 58ac0b90c310e721ea68ba2eae2b04473cf56e90..8154d67388ccef99185431ce8d04b7b16af59762 100644 (file)
@@ -188,8 +188,8 @@ struct tile_config {
  */
 #define ALLOC_MEM_FLAGS_VRAM           (1 << 0)
 #define ALLOC_MEM_FLAGS_GTT            (1 << 1)
-#define ALLOC_MEM_FLAGS_USERPTR                (1 << 2) /* TODO */
-#define ALLOC_MEM_FLAGS_DOORBELL       (1 << 3) /* TODO */
+#define ALLOC_MEM_FLAGS_USERPTR                (1 << 2)
+#define ALLOC_MEM_FLAGS_DOORBELL       (1 << 3)
 
 /*
  * Allocation flags attributes/access options.