From: Michel Dänzer Date: Thu, 31 Jul 2014 09:43:49 +0000 (+0900) Subject: drm/radeon: Always flush the HDP cache before submitting a CS to the GPU X-Git-Url: http://git.cdn.openwrt.org/?a=commitdiff_plain;h=72a9987edcedb89db988079a03c9b9c65b6ec9ac;p=openwrt%2Fstaging%2Fblogic.git drm/radeon: Always flush the HDP cache before submitting a CS to the GPU This ensures the GPU sees all previous CPU writes to VRAM, which makes it safe: * For userspace to stream data from CPU to GPU via VRAM instead of GTT * For IBs to be stored in VRAM instead of GTT * For ring buffers to be stored in VRAM instead of GTT, if the HPD flush is performed via MMIO Signed-off-by: Michel Dänzer Signed-off-by: Alex Deucher --- diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index 60c276538ba9..afdfe04a9290 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c @@ -3890,8 +3890,6 @@ void cik_fence_gfx_ring_emit(struct radeon_device *rdev, radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2)); radeon_ring_write(ring, fence->seq); radeon_ring_write(ring, 0); - /* HDP flush */ - cik_hdp_flush_cp_ring_emit(rdev, fence->ring); } /** @@ -3920,8 +3918,6 @@ void cik_fence_compute_ring_emit(struct radeon_device *rdev, radeon_ring_write(ring, upper_32_bits(addr)); radeon_ring_write(ring, fence->seq); radeon_ring_write(ring, 0); - /* HDP flush */ - cik_hdp_flush_cp_ring_emit(rdev, fence->ring); } bool cik_semaphore_ring_emit(struct radeon_device *rdev, diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c index 5fd242795178..04b5940b8923 100644 --- a/drivers/gpu/drm/radeon/r100.c +++ b/drivers/gpu/drm/radeon/r100.c @@ -837,11 +837,7 @@ void r100_fence_ring_emit(struct radeon_device *rdev, /* Wait until IDLE & CLEAN */ radeon_ring_write(ring, PACKET0(RADEON_WAIT_UNTIL, 0)); radeon_ring_write(ring, RADEON_WAIT_2D_IDLECLEAN | RADEON_WAIT_3D_IDLECLEAN); - radeon_ring_write(ring, PACKET0(RADEON_HOST_PATH_CNTL, 0)); - radeon_ring_write(ring, rdev->config.r100.hdp_cntl | - RADEON_HDP_READ_BUFFER_INVALIDATE); - radeon_ring_write(ring, PACKET0(RADEON_HOST_PATH_CNTL, 0)); - radeon_ring_write(ring, rdev->config.r100.hdp_cntl); + r100_ring_hdp_flush(rdev, ring); /* Emit fence sequence & fire IRQ */ radeon_ring_write(ring, PACKET0(rdev->fence_drv[fence->ring].scratch_reg, 0)); radeon_ring_write(ring, fence->seq); @@ -1060,6 +1056,20 @@ void r100_gfx_set_wptr(struct radeon_device *rdev, (void)RREG32(RADEON_CP_RB_WPTR); } +/** + * r100_ring_hdp_flush - flush Host Data Path via the ring buffer + * rdev: radeon device structure + * ring: ring buffer struct for emitting packets + */ +void r100_ring_hdp_flush(struct radeon_device *rdev, struct radeon_ring *ring) +{ + radeon_ring_write(ring, PACKET0(RADEON_HOST_PATH_CNTL, 0)); + radeon_ring_write(ring, rdev->config.r100.hdp_cntl | + RADEON_HDP_READ_BUFFER_INVALIDATE); + radeon_ring_write(ring, PACKET0(RADEON_HOST_PATH_CNTL, 0)); + radeon_ring_write(ring, rdev->config.r100.hdp_cntl); +} + static void r100_cp_load_microcode(struct radeon_device *rdev) { const __be32 *fw_data; diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index bcc98c1c135e..29731584b6ae 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -1749,6 +1749,7 @@ struct radeon_asic_ring { /* command emmit functions */ void (*ib_execute)(struct radeon_device *rdev, struct radeon_ib *ib); void (*emit_fence)(struct radeon_device *rdev, struct radeon_fence *fence); + void (*hdp_flush)(struct radeon_device *rdev, struct radeon_ring *ring); bool (*emit_semaphore)(struct radeon_device *rdev, struct radeon_ring *cp, struct radeon_semaphore *semaphore, bool emit_wait); void (*vm_flush)(struct radeon_device *rdev, int ridx, struct radeon_vm *vm); diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c index 5781fde5c1ce..c49a01f92b4d 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.c +++ b/drivers/gpu/drm/radeon/radeon_asic.c @@ -185,6 +185,7 @@ static struct radeon_asic_ring r100_gfx_ring = { .get_rptr = &r100_gfx_get_rptr, .get_wptr = &r100_gfx_get_wptr, .set_wptr = &r100_gfx_set_wptr, + .hdp_flush = &r100_ring_hdp_flush, }; static struct radeon_asic r100_asic = { @@ -331,6 +332,7 @@ static struct radeon_asic_ring r300_gfx_ring = { .get_rptr = &r100_gfx_get_rptr, .get_wptr = &r100_gfx_get_wptr, .set_wptr = &r100_gfx_set_wptr, + .hdp_flush = &r100_ring_hdp_flush, }; static struct radeon_asic r300_asic = { @@ -1987,7 +1989,7 @@ static struct radeon_asic ci_asic = { .resume = &cik_resume, .asic_reset = &cik_asic_reset, .vga_set_state = &r600_vga_set_state, - .mmio_hdp_flush = NULL, + .mmio_hdp_flush = &r600_mmio_hdp_flush, .gui_idle = &r600_gui_idle, .mc_wait_for_idle = &evergreen_mc_wait_for_idle, .get_xclk = &cik_get_xclk, @@ -2091,7 +2093,7 @@ static struct radeon_asic kv_asic = { .resume = &cik_resume, .asic_reset = &cik_asic_reset, .vga_set_state = &r600_vga_set_state, - .mmio_hdp_flush = NULL, + .mmio_hdp_flush = &r600_mmio_hdp_flush, .gui_idle = &r600_gui_idle, .mc_wait_for_idle = &evergreen_mc_wait_for_idle, .get_xclk = &cik_get_xclk, diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h index b8826c655685..3cf6be6666fc 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.h +++ b/drivers/gpu/drm/radeon/radeon_asic.h @@ -148,7 +148,8 @@ u32 r100_gfx_get_wptr(struct radeon_device *rdev, struct radeon_ring *ring); void r100_gfx_set_wptr(struct radeon_device *rdev, struct radeon_ring *ring); - +void r100_ring_hdp_flush(struct radeon_device *rdev, + struct radeon_ring *ring); /* * r200,rv250,rs300,rv280 */ diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c index e9e361084249..54aa293a8175 100644 --- a/drivers/gpu/drm/radeon/radeon_drv.c +++ b/drivers/gpu/drm/radeon/radeon_drv.c @@ -82,9 +82,11 @@ * 2.38.0 - RADEON_GEM_OP (GET_INITIAL_DOMAIN, SET_INITIAL_DOMAIN), * CIK: 1D and linear tiling modes contain valid PIPE_CONFIG * 2.39.0 - Add INFO query for number of active CUs + * 2.40.0 - Add RADEON_GEM_GTT_WC/UC, flush HDP cache before submitting + * CS to GPU */ #define KMS_DRIVER_MAJOR 2 -#define KMS_DRIVER_MINOR 39 +#define KMS_DRIVER_MINOR 40 #define KMS_DRIVER_PATCHLEVEL 0 int radeon_driver_load_kms(struct drm_device *dev, unsigned long flags); int radeon_driver_unload_kms(struct drm_device *dev); diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c index 9c86ac947275..5b4e0cf231a0 100644 --- a/drivers/gpu/drm/radeon/radeon_ring.c +++ b/drivers/gpu/drm/radeon/radeon_ring.c @@ -183,11 +183,21 @@ int radeon_ring_lock(struct radeon_device *rdev, struct radeon_ring *ring, unsig */ void radeon_ring_commit(struct radeon_device *rdev, struct radeon_ring *ring) { + /* If we are emitting the HDP flush via the ring buffer, we need to + * do it before padding. + */ + if (rdev->asic->ring[ring->idx]->hdp_flush) + rdev->asic->ring[ring->idx]->hdp_flush(rdev, ring); /* We pad to match fetch size */ while (ring->wptr & ring->align_mask) { radeon_ring_write(ring, ring->nop); } mb(); + /* If we are emitting the HDP flush via MMIO, we need to do it after + * all CPU writes to VRAM finished. + */ + if (rdev->asic->mmio_hdp_flush) + rdev->asic->mmio_hdp_flush(rdev); radeon_ring_set_wptr(rdev, ring); }