drm/amdgpu: Refactor amdgpu_xgmi_add_device
authorAndrey Grodzovsky <andrey.grodzovsky@amd.com>
Mon, 12 Nov 2018 21:16:03 +0000 (16:16 -0500)
committerAlex Deucher <alexander.deucher@amd.com>
Wed, 28 Nov 2018 20:55:35 +0000 (15:55 -0500)
This is prep work for updating each PSP FW in hive after
GPU reset.
Split into build topology SW state and update each PSP FW in the hive.
Save topology and count of XGMI devices for reuse.

v2: Create seperate header for XGMI.

Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu.h
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h [new file with mode: 0644]

index 78fe21b88a000a5c67ec4d62c97df159bc2867ec..5163798f636bbf4a6dbadbcf905f5c4aefd0fd5f 100644 (file)
@@ -1097,12 +1097,6 @@ void amdgpu_disable_vblank_kms(struct drm_device *dev, unsigned int pipe);
 long amdgpu_kms_compat_ioctl(struct file *filp, unsigned int cmd,
                             unsigned long arg);
 
-
-/*
- * functions used by amdgpu_xgmi.c
- */
-int amdgpu_xgmi_add_device(struct amdgpu_device *adev);
-
 /*
  * functions used by amdgpu_encoder.c
  */
index fdbc2c229bc608e024c3fe2406d17142a5c5f10f..5a95cea58d46b2f1830106f930d256a6a5e30b4a 100644 (file)
@@ -59,6 +59,8 @@
 #include "amdgpu_amdkfd.h"
 #include "amdgpu_pm.h"
 
+#include "amdgpu_xgmi.h"
+
 MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
 MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
 MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
index 909216a9b447ba799e871bc4e5deec4f4f37b336..59e667a8f01bedc1f3299b1fc4a130b3196ee9c2 100644 (file)
@@ -24,6 +24,7 @@
 #include <linux/list.h>
 #include "amdgpu.h"
 #include "amdgpu_psp.h"
+#include "amdgpu_xgmi.h"
 
 
 static DEFINE_MUTEX(xgmi_mutex);
@@ -34,12 +35,14 @@ static DEFINE_MUTEX(xgmi_mutex);
 struct amdgpu_hive_info {
        uint64_t                hive_id;
        struct list_head        device_list;
+       struct psp_xgmi_topology_info   topology_info;
+       int number_devices;
 };
 
 static struct amdgpu_hive_info xgmi_hives[AMDGPU_MAX_XGMI_HIVE];
 static unsigned hive_count = 0;
 
-static struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev)
+struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev)
 {
        int i;
        struct amdgpu_hive_info *tmp;
@@ -61,12 +64,33 @@ static struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev)
        return tmp;
 }
 
+int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_device *adev)
+{
+       int ret = -EINVAL;
+
+       /* Each psp need to set the latest topology */
+       ret = psp_xgmi_set_topology_info(&adev->psp,
+                                        hive->number_devices,
+                                        &hive->topology_info);
+       if (ret)
+               dev_err(adev->dev,
+                       "XGMI: Set topology failure on device %llx, hive %llx, ret %d",
+                       adev->gmc.xgmi.node_id,
+                       adev->gmc.xgmi.hive_id, ret);
+       else
+               dev_info(adev->dev, "XGMI: Add node %d to hive 0x%llx.\n",
+                        adev->gmc.xgmi.physical_node_id,
+                                adev->gmc.xgmi.hive_id);
+
+       return ret;
+}
+
 int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
 {
-       struct psp_xgmi_topology_info *tmp_topology;
+       struct psp_xgmi_topology_info *hive_topology;
        struct amdgpu_hive_info *hive;
        struct amdgpu_xgmi      *entry;
-       struct amdgpu_device    *tmp_adev;
+       struct amdgpu_device *tmp_adev = NULL;
 
        int count = 0, ret = -EINVAL;
 
@@ -76,21 +100,21 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
        adev->gmc.xgmi.node_id = psp_xgmi_get_node_id(&adev->psp);
        adev->gmc.xgmi.hive_id = psp_xgmi_get_hive_id(&adev->psp);
 
-       tmp_topology = kzalloc(sizeof(struct psp_xgmi_topology_info), GFP_KERNEL);
-       if (!tmp_topology)
-               return -ENOMEM;
        mutex_lock(&xgmi_mutex);
        hive = amdgpu_get_xgmi_hive(adev);
        if (!hive)
                goto exit;
 
+       hive_topology = &hive->topology_info;
+
        list_add_tail(&adev->gmc.xgmi.head, &hive->device_list);
        list_for_each_entry(entry, &hive->device_list, head)
-               tmp_topology->nodes[count++].node_id = entry->node_id;
+               hive_topology->nodes[count++].node_id = entry->node_id;
+       hive->number_devices = count;
 
        /* Each psp need to get the latest topology */
        list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
-               ret = psp_xgmi_get_topology_info(&tmp_adev->psp, count, tmp_topology);
+               ret = psp_xgmi_get_topology_info(&tmp_adev->psp, count, hive_topology);
                if (ret) {
                        dev_err(tmp_adev->dev,
                                "XGMI: Get topology failure on device %llx, hive %llx, ret %d",
@@ -101,25 +125,13 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
                }
        }
 
-       /* Each psp need to set the latest topology */
        list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
-               ret = psp_xgmi_set_topology_info(&tmp_adev->psp, count, tmp_topology);
-               if (ret) {
-                       dev_err(tmp_adev->dev,
-                               "XGMI: Set topology failure on device %llx, hive %llx, ret %d",
-                               tmp_adev->gmc.xgmi.node_id,
-                               tmp_adev->gmc.xgmi.hive_id, ret);
-                       /* To do : continue with some  node failed or disable the  whole  hive */
+               ret = amdgpu_xgmi_update_topology(hive, tmp_adev);
+               if (ret)
                        break;
-               }
        }
-       if (!ret)
-               dev_info(adev->dev, "XGMI: Add node %d to hive 0x%llx.\n",
-                       adev->gmc.xgmi.physical_node_id,
-                       adev->gmc.xgmi.hive_id);
 
 exit:
        mutex_unlock(&xgmi_mutex);
-       kfree(tmp_topology);
        return ret;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
new file mode 100644 (file)
index 0000000..85a7263
--- /dev/null
@@ -0,0 +1,36 @@
+/*
+ * Copyright 2016 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef __AMDGPU_XGMI_H__
+#define __AMDGPU_XGMI_H__
+
+/*
+ * functions used by amdgpu_xgmi.c
+ */
+
+struct amdgpu_device;
+struct amdgpu_hive_info;
+
+struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev);
+int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_device *adev);
+int amdgpu_xgmi_add_device(struct amdgpu_device *adev);
+
+#endif