drm/amdgpu: Add function to add/remove gws to kfd process
authorOak Zeng <Oak.Zeng@amd.com>
Wed, 8 May 2019 21:14:45 +0000 (16:14 -0500)
committerAlex Deucher <alexander.deucher@amd.com>
Tue, 28 May 2019 19:44:18 +0000 (14:44 -0500)
GWS bo is shared between all kfd processes. Add function to add gws
to kfd process's bo list so gws can be evicted from and restored
for process.

Signed-off-by: Oak Zeng <Oak.Zeng@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c

index c00c9749406e84e820af8e87e4dbcc2e69530b62..f968bf147c5e0162e9b3d66f037a4e7d9797807a 100644 (file)
@@ -155,6 +155,8 @@ int amdgpu_amdkfd_alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
 void amdgpu_amdkfd_free_gtt_mem(struct kgd_dev *kgd, void *mem_obj);
 int amdgpu_amdkfd_alloc_gws(struct kgd_dev *kgd, size_t size, void **mem_obj);
 void amdgpu_amdkfd_free_gws(struct kgd_dev *kgd, void *mem_obj);
+int amdgpu_amdkfd_add_gws_to_process(void *info, void *gws, struct kgd_mem **mem);
+int amdgpu_amdkfd_remove_gws_from_process(void *info, void *mem);
 uint32_t amdgpu_amdkfd_get_fw_version(struct kgd_dev *kgd,
                                      enum kgd_engine_type type);
 void amdgpu_amdkfd_get_local_mem_info(struct kgd_dev *kgd,
index e1cae4a3711348b5bc3d97afba2584377c2c9f38..87177ed37dd2ed592b164d9e6e54a39a771609f3 100644 (file)
@@ -457,6 +457,17 @@ static void add_kgd_mem_to_kfd_bo_list(struct kgd_mem *mem,
        mutex_unlock(&process_info->lock);
 }
 
+static void remove_kgd_mem_from_kfd_bo_list(struct kgd_mem *mem,
+               struct amdkfd_process_info *process_info)
+{
+       struct ttm_validate_buffer *bo_list_entry;
+
+       bo_list_entry = &mem->validate_list;
+       mutex_lock(&process_info->lock);
+       list_del(&bo_list_entry->head);
+       mutex_unlock(&process_info->lock);
+}
+
 /* Initializes user pages. It registers the MMU notifier and validates
  * the userptr BO in the GTT domain.
  *
@@ -1183,12 +1194,8 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
 
        if (user_addr) {
                ret = init_user_pages(*mem, current->mm, user_addr);
-               if (ret) {
-                       mutex_lock(&avm->process_info->lock);
-                       list_del(&(*mem)->validate_list.head);
-                       mutex_unlock(&avm->process_info->lock);
+               if (ret)
                        goto allocate_init_user_pages_failed;
-               }
        }
 
        if (offset)
@@ -1197,6 +1204,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
        return 0;
 
 allocate_init_user_pages_failed:
+       remove_kgd_mem_from_kfd_bo_list(*mem, avm->process_info);
        amdgpu_bo_unref(&bo);
        /* Don't unreserve system mem limit twice */
        goto err_reserve_limit;
@@ -2104,3 +2112,88 @@ ttm_reserve_fail:
        kfree(pd_bo_list);
        return ret;
 }
+
+int amdgpu_amdkfd_add_gws_to_process(void *info, void *gws, struct kgd_mem **mem)
+{
+       struct amdkfd_process_info *process_info = (struct amdkfd_process_info *)info;
+       struct amdgpu_bo *gws_bo = (struct amdgpu_bo *)gws;
+       int ret;
+
+       if (!info || !gws)
+               return -EINVAL;
+
+       *mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL);
+       if (!*mem)
+               return -EINVAL;
+
+       mutex_init(&(*mem)->lock);
+       (*mem)->bo = amdgpu_bo_ref(gws_bo);
+       (*mem)->domain = AMDGPU_GEM_DOMAIN_GWS;
+       (*mem)->process_info = process_info;
+       add_kgd_mem_to_kfd_bo_list(*mem, process_info, false);
+       amdgpu_sync_create(&(*mem)->sync);
+
+
+       /* Validate gws bo the first time it is added to process */
+       mutex_lock(&(*mem)->process_info->lock);
+       ret = amdgpu_bo_reserve(gws_bo, false);
+       if (unlikely(ret)) {
+               pr_err("Reserve gws bo failed %d\n", ret);
+               goto bo_reservation_failure;
+       }
+
+       ret = amdgpu_amdkfd_bo_validate(gws_bo, AMDGPU_GEM_DOMAIN_GWS, true);
+       if (ret) {
+               pr_err("GWS BO validate failed %d\n", ret);
+               goto bo_validation_failure;
+       }
+       /* GWS resource is shared b/t amdgpu and amdkfd
+        * Add process eviction fence to bo so they can
+        * evict each other.
+        */
+       amdgpu_bo_fence(gws_bo, &process_info->eviction_fence->base, true);
+       amdgpu_bo_unreserve(gws_bo);
+       mutex_unlock(&(*mem)->process_info->lock);
+
+       return ret;
+
+bo_validation_failure:
+       amdgpu_bo_unreserve(gws_bo);
+bo_reservation_failure:
+       mutex_unlock(&(*mem)->process_info->lock);
+       amdgpu_sync_free(&(*mem)->sync);
+       remove_kgd_mem_from_kfd_bo_list(*mem, process_info);
+       amdgpu_bo_unref(&gws_bo);
+       mutex_destroy(&(*mem)->lock);
+       kfree(*mem);
+       *mem = NULL;
+       return ret;
+}
+
+int amdgpu_amdkfd_remove_gws_from_process(void *info, void *mem)
+{
+       int ret;
+       struct amdkfd_process_info *process_info = (struct amdkfd_process_info *)info;
+       struct kgd_mem *kgd_mem = (struct kgd_mem *)mem;
+       struct amdgpu_bo *gws_bo = kgd_mem->bo;
+
+       /* Remove BO from process's validate list so restore worker won't touch
+        * it anymore
+        */
+       remove_kgd_mem_from_kfd_bo_list(kgd_mem, process_info);
+
+       ret = amdgpu_bo_reserve(gws_bo, false);
+       if (unlikely(ret)) {
+               pr_err("Reserve gws bo failed %d\n", ret);
+               //TODO add BO back to validate_list?
+               return ret;
+       }
+       amdgpu_amdkfd_remove_eviction_fence(gws_bo,
+                       process_info->eviction_fence);
+       amdgpu_bo_unreserve(gws_bo);
+       amdgpu_sync_free(&kgd_mem->sync);
+       amdgpu_bo_unref(&gws_bo);
+       mutex_destroy(&kgd_mem->lock);
+       kfree(mem);
+       return 0;
+}