drm/msm/adreno: Add a5xx specific registers for the GPU state
authorJordan Crouse <jcrouse@codeaurora.org>
Tue, 24 Jul 2018 16:33:30 +0000 (10:33 -0600)
committerRob Clark <robdclark@gmail.com>
Mon, 30 Jul 2018 12:50:06 +0000 (08:50 -0400)
HLSQ, SP and TP registers are only accessible from a special
aperture and to make matters worse the aperture is blocked from
the CPU on targets that can support secure rendering. Luckily the
GPU hardware has its own purpose built register dumper that can
access the registers from the aperture. Add a5xx specific code
to program the crashdumper and retrieve the wayward registers
and dump them for the crash state.

Also, remove a block of registers the regular CPU accessible
list that aren't useful for debug which helps reduce the size
of the crash state file by a goodly amount.

Signed-off-by: Jordan Crouse <jcrouse@codeaurora.org>
Signed-off-by: Rob Clark <robdclark@gmail.com>
Documentation/gpu/msm-crash-dump.rst
drivers/gpu/drm/msm/adreno/a3xx_gpu.c
drivers/gpu/drm/msm/adreno/a4xx_gpu.c
drivers/gpu/drm/msm/adreno/a5xx_gpu.c
drivers/gpu/drm/msm/adreno/adreno_gpu.c
drivers/gpu/drm/msm/adreno/adreno_gpu.h

index 35e87004e006e66b110fb9a2ae345cd76d52e1f2..7943f43f70d6c47a93cff9cb727ead0fd5132522 100644 (file)
@@ -76,3 +76,7 @@ registers
 
        value
                Hexadecimal value of the register.
+
+registers-hlsq
+               (5xx only) Register values from the HLSQ aperture.
+               Same format as the register section.
index fc502e41213289dedfc5d00fbc0c11c18f40e0e4..669c2d4b070dea4c711e0e8df9585d68dd810cd9 100644 (file)
@@ -421,10 +421,12 @@ static void a3xx_dump(struct msm_gpu *gpu)
 
 static struct msm_gpu_state *a3xx_gpu_state_get(struct msm_gpu *gpu)
 {
-       struct msm_gpu_state *state = adreno_gpu_state_get(gpu);
+       struct msm_gpu_state *state = kzalloc(sizeof(*state), GFP_KERNEL);
 
-       if (IS_ERR(state))
-               return state;
+       if (!state)
+               return ERR_PTR(-ENOMEM);
+
+       adreno_gpu_state_get(gpu, state);
 
        state->rbbm_status = gpu_read(gpu, REG_A3XX_RBBM_STATUS);
 
index 8129cf037db194c025c76d96b3d95af090352fb2..7c4e6dc1ed59961e0df2f14fcbc9805c5a2cc359 100644 (file)
@@ -457,10 +457,12 @@ static const unsigned int a4xx_registers[] = {
 
 static struct msm_gpu_state *a4xx_gpu_state_get(struct msm_gpu *gpu)
 {
-       struct msm_gpu_state *state = adreno_gpu_state_get(gpu);
+       struct msm_gpu_state *state = kzalloc(sizeof(*state), GFP_KERNEL);
 
-       if (IS_ERR(state))
-               return state;
+       if (!state)
+               return ERR_PTR(-ENOMEM);
+
+       adreno_gpu_state_get(gpu, state);
 
        state->rbbm_status = gpu_read(gpu, REG_A4XX_RBBM_STATUS);
 
index 16074fa6bf1ef3878171762b06bcc73454b144cf..bd84f71d27d8567243956d9c1ddc1d9e6a09d458 100644 (file)
@@ -19,6 +19,7 @@
 #include <linux/soc/qcom/mdt_loader.h>
 #include <linux/pm_opp.h>
 #include <linux/nvmem-consumer.h>
+#include <linux/iopoll.h>
 #include "msm_gem.h"
 #include "msm_mmu.h"
 #include "a5xx_gpu.h"
@@ -1123,8 +1124,9 @@ static const u32 a5xx_registers[] = {
        0xE800, 0xE806, 0xE810, 0xE89A, 0xE8A0, 0xE8A4, 0xE8AA, 0xE8EB,
        0xE900, 0xE905, 0xEB80, 0xEB8F, 0xEBB0, 0xEBB0, 0xEC00, 0xEC05,
        0xEC08, 0xECE9, 0xECF0, 0xECF0, 0xEA80, 0xEA80, 0xEA82, 0xEAA3,
-       0xEAA5, 0xEAC2, 0xA800, 0xA8FF, 0xAC60, 0xAC60, 0xB000, 0xB97F,
-       0xB9A0, 0xB9BF, ~0
+       0xEAA5, 0xEAC2, 0xA800, 0xA800, 0xA820, 0xA828, 0xA840, 0xA87D,
+       0XA880, 0xA88D, 0xA890, 0xA8A3, 0xA8D0, 0xA8D8, 0xA8E0, 0xA8F5,
+       0xAC60, 0xAC60, ~0,
 };
 
 static void a5xx_dump(struct msm_gpu *gpu)
@@ -1195,25 +1197,233 @@ static int a5xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value)
        return 0;
 }
 
+struct a5xx_crashdumper {
+       void *ptr;
+       struct drm_gem_object *bo;
+       u64 iova;
+};
+
+struct a5xx_gpu_state {
+       struct msm_gpu_state base;
+       u32 *hlsqregs;
+};
+
+#define gpu_poll_timeout(gpu, addr, val, cond, interval, timeout) \
+       readl_poll_timeout((gpu)->mmio + ((addr) << 2), val, cond, \
+               interval, timeout)
+
+static int a5xx_crashdumper_init(struct msm_gpu *gpu,
+               struct a5xx_crashdumper *dumper)
+{
+       dumper->ptr = msm_gem_kernel_new_locked(gpu->dev,
+               SZ_1M, MSM_BO_UNCACHED, gpu->aspace,
+               &dumper->bo, &dumper->iova);
+
+       if (IS_ERR(dumper->ptr))
+               return PTR_ERR(dumper->ptr);
+
+       return 0;
+}
+
+static void a5xx_crashdumper_free(struct msm_gpu *gpu,
+               struct a5xx_crashdumper *dumper)
+{
+       msm_gem_put_iova(dumper->bo, gpu->aspace);
+       msm_gem_put_vaddr(dumper->bo);
+
+       drm_gem_object_unreference(dumper->bo);
+}
+
+static int a5xx_crashdumper_run(struct msm_gpu *gpu,
+               struct a5xx_crashdumper *dumper)
+{
+       u32 val;
+
+       if (IS_ERR_OR_NULL(dumper->ptr))
+               return -EINVAL;
+
+       gpu_write64(gpu, REG_A5XX_CP_CRASH_SCRIPT_BASE_LO,
+               REG_A5XX_CP_CRASH_SCRIPT_BASE_HI, dumper->iova);
+
+       gpu_write(gpu, REG_A5XX_CP_CRASH_DUMP_CNTL, 1);
+
+       return gpu_poll_timeout(gpu, REG_A5XX_CP_CRASH_DUMP_CNTL, val,
+               val & 0x04, 100, 10000);
+}
+
+/*
+ * These are a list of the registers that need to be read through the HLSQ
+ * aperture through the crashdumper.  These are not nominally accessible from
+ * the CPU on a secure platform.
+ */
+static const struct {
+       u32 type;
+       u32 regoffset;
+       u32 count;
+} a5xx_hlsq_aperture_regs[] = {
+       { 0x35, 0xe00, 0x32 },   /* HSLQ non-context */
+       { 0x31, 0x2080, 0x1 },   /* HLSQ 2D context 0 */
+       { 0x33, 0x2480, 0x1 },   /* HLSQ 2D context 1 */
+       { 0x32, 0xe780, 0x62 },  /* HLSQ 3D context 0 */
+       { 0x34, 0xef80, 0x62 },  /* HLSQ 3D context 1 */
+       { 0x3f, 0x0ec0, 0x40 },  /* SP non-context */
+       { 0x3d, 0x2040, 0x1 },   /* SP 2D context 0 */
+       { 0x3b, 0x2440, 0x1 },   /* SP 2D context 1 */
+       { 0x3e, 0xe580, 0x170 }, /* SP 3D context 0 */
+       { 0x3c, 0xed80, 0x170 }, /* SP 3D context 1 */
+       { 0x3a, 0x0f00, 0x1c },  /* TP non-context */
+       { 0x38, 0x2000, 0xa },   /* TP 2D context 0 */
+       { 0x36, 0x2400, 0xa },   /* TP 2D context 1 */
+       { 0x39, 0xe700, 0x80 },  /* TP 3D context 0 */
+       { 0x37, 0xef00, 0x80 },  /* TP 3D context 1 */
+};
+
+static void a5xx_gpu_state_get_hlsq_regs(struct msm_gpu *gpu,
+               struct a5xx_gpu_state *a5xx_state)
+{
+       struct a5xx_crashdumper dumper = { 0 };
+       u32 offset, count = 0;
+       u64 *ptr;
+       int i;
+
+       if (a5xx_crashdumper_init(gpu, &dumper))
+               return;
+
+       /* The script will be written at offset 0 */
+       ptr = dumper.ptr;
+
+       /* Start writing the data at offset 256k */
+       offset = dumper.iova + (256 * SZ_1K);
+
+       /* Count how many additional registers to get from the HLSQ aperture */
+       for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_aperture_regs); i++)
+               count += a5xx_hlsq_aperture_regs[i].count;
+
+       a5xx_state->hlsqregs = kcalloc(count, sizeof(u32), GFP_KERNEL);
+       if (!a5xx_state->hlsqregs)
+               return;
+
+       /* Build the crashdump script */
+       for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_aperture_regs); i++) {
+               u32 type = a5xx_hlsq_aperture_regs[i].type;
+               u32 c = a5xx_hlsq_aperture_regs[i].count;
+
+               /* Write the register to select the desired bank */
+               *ptr++ = ((u64) type << 8);
+               *ptr++ = (((u64) REG_A5XX_HLSQ_DBG_READ_SEL) << 44) |
+                       (1 << 21) | 1;
+
+               *ptr++ = offset;
+               *ptr++ = (((u64) REG_A5XX_HLSQ_DBG_AHB_READ_APERTURE) << 44)
+                       | c;
+
+               offset += c * sizeof(u32);
+       }
+
+       /* Write two zeros to close off the script */
+       *ptr++ = 0;
+       *ptr++ = 0;
+
+       if (a5xx_crashdumper_run(gpu, &dumper)) {
+               kfree(a5xx_state->hlsqregs);
+               a5xx_crashdumper_free(gpu, &dumper);
+               return;
+       }
+
+       /* Copy the data from the crashdumper to the state */
+       memcpy(a5xx_state->hlsqregs, dumper.ptr + (256 * SZ_1K),
+               count * sizeof(u32));
+
+       a5xx_crashdumper_free(gpu, &dumper);
+}
+
 static struct msm_gpu_state *a5xx_gpu_state_get(struct msm_gpu *gpu)
 {
-       struct msm_gpu_state *state;
+       struct a5xx_gpu_state *a5xx_state = kzalloc(sizeof(*a5xx_state),
+                       GFP_KERNEL);
 
-       /*
-        * Temporarily disable hardware clock gating before going into
-        * adreno_show to avoid issues while reading the registers
-        */
+       if (!a5xx_state)
+               return ERR_PTR(-ENOMEM);
+
+       /* Temporarily disable hardware clock gating before reading the hw */
        a5xx_set_hwcg(gpu, false);
 
-       state = adreno_gpu_state_get(gpu);
+       /* First get the generic state from the adreno core */
+       adreno_gpu_state_get(gpu, &(a5xx_state->base));
+
+       a5xx_state->base.rbbm_status = gpu_read(gpu, REG_A5XX_RBBM_STATUS);
 
-       if (!IS_ERR(state))
-               state->rbbm_status = gpu_read(gpu, REG_A5XX_RBBM_STATUS);
+       /* Get the HLSQ regs with the help of the crashdumper */
+       a5xx_gpu_state_get_hlsq_regs(gpu, a5xx_state);
 
        a5xx_set_hwcg(gpu, true);
 
-       return state;
+       return &a5xx_state->base;
+}
+
+static void a5xx_gpu_state_destroy(struct kref *kref)
+{
+       struct msm_gpu_state *state = container_of(kref,
+               struct msm_gpu_state, ref);
+       struct a5xx_gpu_state *a5xx_state = container_of(state,
+               struct a5xx_gpu_state, base);
+
+       kfree(a5xx_state->hlsqregs);
+
+       adreno_gpu_state_destroy(state);
+       kfree(a5xx_state);
+}
+
+int a5xx_gpu_state_put(struct msm_gpu_state *state)
+{
+       if (IS_ERR_OR_NULL(state))
+               return 1;
+
+       return kref_put(&state->ref, a5xx_gpu_state_destroy);
+}
+
+
+#if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
+void a5xx_show(struct msm_gpu *gpu, struct msm_gpu_state *state,
+               struct drm_printer *p)
+{
+       int i, j;
+       u32 pos = 0;
+       struct a5xx_gpu_state *a5xx_state = container_of(state,
+               struct a5xx_gpu_state, base);
+
+       if (IS_ERR_OR_NULL(state))
+               return;
+
+       adreno_show(gpu, state, p);
+
+       /* Dump the additional a5xx HLSQ registers */
+       if (!a5xx_state->hlsqregs)
+               return;
+
+       drm_printf(p, "registers-hlsq:\n");
+
+       for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_aperture_regs); i++) {
+               u32 o = a5xx_hlsq_aperture_regs[i].regoffset;
+               u32 c = a5xx_hlsq_aperture_regs[i].count;
+
+               for (j = 0; j < c; j++, pos++, o++) {
+                       /*
+                        * To keep the crashdump simple we pull the entire range
+                        * for each register type but not all of the registers
+                        * in the range are valid. Fortunately invalid registers
+                        * stick out like a sore thumb with a value of
+                        * 0xdeadbeef
+                        */
+                       if (a5xx_state->hlsqregs[pos] == 0xdeadbeef)
+                               continue;
+
+                       drm_printf(p, "  - { offset: 0x%04x, value: 0x%08x }\n",
+                               o << 2, a5xx_state->hlsqregs[pos]);
+               }
+       }
 }
+#endif
 
 static struct msm_ringbuffer *a5xx_active_ring(struct msm_gpu *gpu)
 {
@@ -1244,14 +1454,14 @@ static const struct adreno_gpu_funcs funcs = {
                .irq = a5xx_irq,
                .destroy = a5xx_destroy,
 #if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
-               .show = adreno_show,
+               .show = a5xx_show,
 #endif
 #if defined(CONFIG_DEBUG_FS)
                .debugfs_init = a5xx_debugfs_init,
 #endif
                .gpu_busy = a5xx_gpu_busy,
                .gpu_state_get = a5xx_gpu_state_get,
-               .gpu_state_put = adreno_gpu_state_put,
+               .gpu_state_put = a5xx_gpu_state_put,
        },
        .get_timestamp = a5xx_get_timestamp,
 };
index 808d4fc9c4a15120106dbede461587b105494016..cd418dab7a623204f6eb4c2cb70af80d3d7a93ca 100644 (file)
@@ -369,16 +369,11 @@ bool adreno_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
        return false;
 }
 
-struct msm_gpu_state *adreno_gpu_state_get(struct msm_gpu *gpu)
+int adreno_gpu_state_get(struct msm_gpu *gpu, struct msm_gpu_state *state)
 {
        struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
-       struct msm_gpu_state *state;
        int i, count = 0;
 
-       state = kzalloc(sizeof(*state), GFP_KERNEL);
-       if (!state)
-               return ERR_PTR(-ENOMEM);
-
        kref_init(&state->ref);
 
        do_gettimeofday(&state->time);
@@ -432,14 +427,12 @@ struct msm_gpu_state *adreno_gpu_state_get(struct msm_gpu *gpu)
                state->nr_registers = count;
        }
 
-       return state;
+       return 0;
 }
 
-static void adreno_gpu_state_destroy(struct kref *kref)
+void adreno_gpu_state_destroy(struct msm_gpu_state *state)
 {
        int i;
-       struct msm_gpu_state *state = container_of(kref,
-               struct msm_gpu_state, ref);
 
        for (i = 0; i < ARRAY_SIZE(state->ring); i++)
                kfree(state->ring[i].data);
@@ -447,6 +440,14 @@ static void adreno_gpu_state_destroy(struct kref *kref)
        kfree(state->comm);
        kfree(state->cmd);
        kfree(state->registers);
+}
+
+static void adreno_gpu_state_kref_destroy(struct kref *kref)
+{
+       struct msm_gpu_state *state = container_of(kref,
+               struct msm_gpu_state, ref);
+
+       adreno_gpu_state_destroy(state);
        kfree(state);
 }
 
@@ -455,7 +456,7 @@ int adreno_gpu_state_put(struct msm_gpu_state *state)
        if (IS_ERR_OR_NULL(state))
                return 1;
 
-       return kref_put(&state->ref, adreno_gpu_state_destroy);
+       return kref_put(&state->ref, adreno_gpu_state_kref_destroy);
 }
 
 #if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
index 4a868aaf1a705cfe0885d7c512e7f901bc4eb235..4406776597fdb126ce161555c662cc2a7d9f0ad5 100644 (file)
@@ -230,7 +230,9 @@ int adreno_gpu_init(struct drm_device *drm, struct platform_device *pdev,
 void adreno_gpu_cleanup(struct adreno_gpu *gpu);
 
 
-struct msm_gpu_state *adreno_gpu_state_get(struct msm_gpu *gpu);
+void adreno_gpu_state_destroy(struct msm_gpu_state *state);
+
+int adreno_gpu_state_get(struct msm_gpu *gpu, struct msm_gpu_state *state);
 int adreno_gpu_state_put(struct msm_gpu_state *state);
 
 /* ringbuffer helpers (the parts that are adreno specific) */