From b82175750131c9ac228cf08116a208c856d850c9 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Christian=20K=C3=B6nig?= Date: Fri, 14 Dec 2018 15:31:24 +0100 Subject: [PATCH] drm/amdgpu: fix IH overflow on Vega10 v2 MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit When an ring buffer overflow happens the appropriate bit is set in the WPTR register which is also written back to memory. But clearing the bit in the WPTR doesn't trigger another memory writeback. So what can happen is that we end up processing the buffer overflow over and over again because the bit is never cleared. Resulting in a random system lockup because of an infinite loop in an interrupt handler. This is 100% reproducible on Vega10, but it's most likely an issue we have in the driver over all generations all the way back to radeon. v2: rebase Signed-off-by: Christian König Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/vega10_ih.c | 41 ++++++++++++++++---------- 1 file changed, 25 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c index 3e9ebb0de94d..562701939d3e 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c @@ -192,22 +192,31 @@ static u32 vega10_ih_get_wptr(struct amdgpu_device *adev, wptr = le32_to_cpu(*ih->wptr_cpu); - if (REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW)) { - wptr = REG_SET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW, 0); - - /* When a ring buffer overflow happen start parsing interrupt - * from the last not overwritten vector (wptr + 32). Hopefully - * this should allow us to catchup. - */ - tmp = (wptr + 32) & ih->ptr_mask; - dev_warn(adev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n", - wptr, ih->rptr, tmp); - ih->rptr = tmp; - - tmp = RREG32_NO_KIQ(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_CNTL)); - tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1); - WREG32_NO_KIQ(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_CNTL), tmp); - } + if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW)) + goto out; + + /* Double check that the overflow wasn't already cleared. */ + wptr = RREG32_NO_KIQ(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_WPTR)); + if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW)) + goto out; + + wptr = REG_SET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW, 0); + + /* When a ring buffer overflow happen start parsing interrupt + * from the last not overwritten vector (wptr + 32). Hopefully + * this should allow us to catchup. + */ + tmp = (wptr + 32) & ih->ptr_mask; + dev_warn(adev->dev, "IH ring buffer overflow " + "(0x%08X, 0x%08X, 0x%08X)\n", + wptr, ih->rptr, tmp); + ih->rptr = tmp; + + tmp = RREG32_NO_KIQ(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_CNTL)); + tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1); + WREG32_NO_KIQ(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_CNTL), tmp); + +out: return (wptr & ih->ptr_mask); } -- 2.30.2