17ccd5a2cSjsg /*
27ccd5a2cSjsg * Copyright 2013 Advanced Micro Devices, Inc.
37ccd5a2cSjsg *
47ccd5a2cSjsg * Permission is hereby granted, free of charge, to any person obtaining a
57ccd5a2cSjsg * copy of this software and associated documentation files (the "Software"),
67ccd5a2cSjsg * to deal in the Software without restriction, including without limitation
77ccd5a2cSjsg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
87ccd5a2cSjsg * and/or sell copies of the Software, and to permit persons to whom the
97ccd5a2cSjsg * Software is furnished to do so, subject to the following conditions:
107ccd5a2cSjsg *
117ccd5a2cSjsg * The above copyright notice and this permission notice shall be included in
127ccd5a2cSjsg * all copies or substantial portions of the Software.
137ccd5a2cSjsg *
147ccd5a2cSjsg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
157ccd5a2cSjsg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
167ccd5a2cSjsg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
177ccd5a2cSjsg * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
187ccd5a2cSjsg * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
197ccd5a2cSjsg * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
207ccd5a2cSjsg * OTHER DEALINGS IN THE SOFTWARE.
217ccd5a2cSjsg *
227ccd5a2cSjsg * Authors: Alex Deucher
237ccd5a2cSjsg */
247f4dd379Sjsg #include <linux/firmware.h>
25c349dbc7Sjsg
267ccd5a2cSjsg #include "radeon.h"
277ccd5a2cSjsg #include "radeon_ucode.h"
287ccd5a2cSjsg #include "radeon_asic.h"
297ccd5a2cSjsg #include "radeon_trace.h"
30*5ca02815Sjsg #include "cik.h"
317ccd5a2cSjsg #include "cikd.h"
327ccd5a2cSjsg
337ccd5a2cSjsg /* sdma */
347ccd5a2cSjsg #define CIK_SDMA_UCODE_SIZE 1050
357ccd5a2cSjsg #define CIK_SDMA_UCODE_VERSION 64
367ccd5a2cSjsg
377ccd5a2cSjsg /*
387ccd5a2cSjsg * sDMA - System DMA
397ccd5a2cSjsg * Starting with CIK, the GPU has new asynchronous
407ccd5a2cSjsg * DMA engines. These engines are used for compute
417ccd5a2cSjsg * and gfx. There are two DMA engines (SDMA0, SDMA1)
427ccd5a2cSjsg * and each one supports 1 ring buffer used for gfx
437ccd5a2cSjsg * and 2 queues used for compute.
447ccd5a2cSjsg *
457ccd5a2cSjsg * The programming model is very similar to the CP
467ccd5a2cSjsg * (ring buffer, IBs, etc.), but sDMA has it's own
477ccd5a2cSjsg * packet format that is different from the PM4 format
487ccd5a2cSjsg * used by the CP. sDMA supports copying data, writing
497ccd5a2cSjsg * embedded data, solid fills, and a number of other
507ccd5a2cSjsg * things. It also has support for tiling/detiling of
517ccd5a2cSjsg * buffers.
527ccd5a2cSjsg */
537ccd5a2cSjsg
547ccd5a2cSjsg /**
557ccd5a2cSjsg * cik_sdma_get_rptr - get the current read pointer
567ccd5a2cSjsg *
577ccd5a2cSjsg * @rdev: radeon_device pointer
587ccd5a2cSjsg * @ring: radeon ring pointer
597ccd5a2cSjsg *
607ccd5a2cSjsg * Get the current rptr from the hardware (CIK+).
617ccd5a2cSjsg */
cik_sdma_get_rptr(struct radeon_device * rdev,struct radeon_ring * ring)627ccd5a2cSjsg uint32_t cik_sdma_get_rptr(struct radeon_device *rdev,
637ccd5a2cSjsg struct radeon_ring *ring)
647ccd5a2cSjsg {
657ccd5a2cSjsg u32 rptr, reg;
667ccd5a2cSjsg
677ccd5a2cSjsg if (rdev->wb.enabled) {
687ccd5a2cSjsg rptr = rdev->wb.wb[ring->rptr_offs/4];
697ccd5a2cSjsg } else {
707ccd5a2cSjsg if (ring->idx == R600_RING_TYPE_DMA_INDEX)
717ccd5a2cSjsg reg = SDMA0_GFX_RB_RPTR + SDMA0_REGISTER_OFFSET;
727ccd5a2cSjsg else
737ccd5a2cSjsg reg = SDMA0_GFX_RB_RPTR + SDMA1_REGISTER_OFFSET;
747ccd5a2cSjsg
757ccd5a2cSjsg rptr = RREG32(reg);
767ccd5a2cSjsg }
777ccd5a2cSjsg
787ccd5a2cSjsg return (rptr & 0x3fffc) >> 2;
797ccd5a2cSjsg }
807ccd5a2cSjsg
817ccd5a2cSjsg /**
827ccd5a2cSjsg * cik_sdma_get_wptr - get the current write pointer
837ccd5a2cSjsg *
847ccd5a2cSjsg * @rdev: radeon_device pointer
857ccd5a2cSjsg * @ring: radeon ring pointer
867ccd5a2cSjsg *
877ccd5a2cSjsg * Get the current wptr from the hardware (CIK+).
887ccd5a2cSjsg */
cik_sdma_get_wptr(struct radeon_device * rdev,struct radeon_ring * ring)897ccd5a2cSjsg uint32_t cik_sdma_get_wptr(struct radeon_device *rdev,
907ccd5a2cSjsg struct radeon_ring *ring)
917ccd5a2cSjsg {
927ccd5a2cSjsg u32 reg;
937ccd5a2cSjsg
947ccd5a2cSjsg if (ring->idx == R600_RING_TYPE_DMA_INDEX)
957ccd5a2cSjsg reg = SDMA0_GFX_RB_WPTR + SDMA0_REGISTER_OFFSET;
967ccd5a2cSjsg else
977ccd5a2cSjsg reg = SDMA0_GFX_RB_WPTR + SDMA1_REGISTER_OFFSET;
987ccd5a2cSjsg
997ccd5a2cSjsg return (RREG32(reg) & 0x3fffc) >> 2;
1007ccd5a2cSjsg }
1017ccd5a2cSjsg
1027ccd5a2cSjsg /**
1037ccd5a2cSjsg * cik_sdma_set_wptr - commit the write pointer
1047ccd5a2cSjsg *
1057ccd5a2cSjsg * @rdev: radeon_device pointer
1067ccd5a2cSjsg * @ring: radeon ring pointer
1077ccd5a2cSjsg *
1087ccd5a2cSjsg * Write the wptr back to the hardware (CIK+).
1097ccd5a2cSjsg */
cik_sdma_set_wptr(struct radeon_device * rdev,struct radeon_ring * ring)1107ccd5a2cSjsg void cik_sdma_set_wptr(struct radeon_device *rdev,
1117ccd5a2cSjsg struct radeon_ring *ring)
1127ccd5a2cSjsg {
1137ccd5a2cSjsg u32 reg;
1147ccd5a2cSjsg
1157ccd5a2cSjsg if (ring->idx == R600_RING_TYPE_DMA_INDEX)
1167ccd5a2cSjsg reg = SDMA0_GFX_RB_WPTR + SDMA0_REGISTER_OFFSET;
1177ccd5a2cSjsg else
1187ccd5a2cSjsg reg = SDMA0_GFX_RB_WPTR + SDMA1_REGISTER_OFFSET;
1197ccd5a2cSjsg
1207ccd5a2cSjsg WREG32(reg, (ring->wptr << 2) & 0x3fffc);
1217ccd5a2cSjsg (void)RREG32(reg);
1227ccd5a2cSjsg }
1237ccd5a2cSjsg
1247ccd5a2cSjsg /**
1257ccd5a2cSjsg * cik_sdma_ring_ib_execute - Schedule an IB on the DMA engine
1267ccd5a2cSjsg *
1277ccd5a2cSjsg * @rdev: radeon_device pointer
1287ccd5a2cSjsg * @ib: IB object to schedule
1297ccd5a2cSjsg *
1307ccd5a2cSjsg * Schedule an IB in the DMA ring (CIK).
1317ccd5a2cSjsg */
cik_sdma_ring_ib_execute(struct radeon_device * rdev,struct radeon_ib * ib)1327ccd5a2cSjsg void cik_sdma_ring_ib_execute(struct radeon_device *rdev,
1337ccd5a2cSjsg struct radeon_ib *ib)
1347ccd5a2cSjsg {
1357ccd5a2cSjsg struct radeon_ring *ring = &rdev->ring[ib->ring];
1367ccd5a2cSjsg u32 extra_bits = (ib->vm ? ib->vm->ids[ib->ring].id : 0) & 0xf;
1377ccd5a2cSjsg
1387ccd5a2cSjsg if (rdev->wb.enabled) {
1397ccd5a2cSjsg u32 next_rptr = ring->wptr + 5;
1407ccd5a2cSjsg while ((next_rptr & 7) != 4)
1417ccd5a2cSjsg next_rptr++;
1427ccd5a2cSjsg next_rptr += 4;
1437ccd5a2cSjsg radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0));
1447ccd5a2cSjsg radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
1457ccd5a2cSjsg radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
1467ccd5a2cSjsg radeon_ring_write(ring, 1); /* number of DWs to follow */
1477ccd5a2cSjsg radeon_ring_write(ring, next_rptr);
1487ccd5a2cSjsg }
1497ccd5a2cSjsg
1507ccd5a2cSjsg /* IB packet must end on a 8 DW boundary */
1517ccd5a2cSjsg while ((ring->wptr & 7) != 4)
1527ccd5a2cSjsg radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
1537ccd5a2cSjsg radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_INDIRECT_BUFFER, 0, extra_bits));
1547ccd5a2cSjsg radeon_ring_write(ring, ib->gpu_addr & 0xffffffe0); /* base must be 32 byte aligned */
1557ccd5a2cSjsg radeon_ring_write(ring, upper_32_bits(ib->gpu_addr));
1567ccd5a2cSjsg radeon_ring_write(ring, ib->length_dw);
1577ccd5a2cSjsg
1587ccd5a2cSjsg }
1597ccd5a2cSjsg
1607ccd5a2cSjsg /**
1617ccd5a2cSjsg * cik_sdma_hdp_flush_ring_emit - emit an hdp flush on the DMA ring
1627ccd5a2cSjsg *
1637ccd5a2cSjsg * @rdev: radeon_device pointer
1647ccd5a2cSjsg * @ridx: radeon ring index
1657ccd5a2cSjsg *
1667ccd5a2cSjsg * Emit an hdp flush packet on the requested DMA ring.
1677ccd5a2cSjsg */
cik_sdma_hdp_flush_ring_emit(struct radeon_device * rdev,int ridx)1687ccd5a2cSjsg static void cik_sdma_hdp_flush_ring_emit(struct radeon_device *rdev,
1697ccd5a2cSjsg int ridx)
1707ccd5a2cSjsg {
1717ccd5a2cSjsg struct radeon_ring *ring = &rdev->ring[ridx];
1727ccd5a2cSjsg u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(1) |
1737ccd5a2cSjsg SDMA_POLL_REG_MEM_EXTRA_FUNC(3)); /* == */
1747ccd5a2cSjsg u32 ref_and_mask;
1757ccd5a2cSjsg
1767ccd5a2cSjsg if (ridx == R600_RING_TYPE_DMA_INDEX)
1777ccd5a2cSjsg ref_and_mask = SDMA0;
1787ccd5a2cSjsg else
1797ccd5a2cSjsg ref_and_mask = SDMA1;
1807ccd5a2cSjsg
1817ccd5a2cSjsg radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0, extra_bits));
1827ccd5a2cSjsg radeon_ring_write(ring, GPU_HDP_FLUSH_DONE);
1837ccd5a2cSjsg radeon_ring_write(ring, GPU_HDP_FLUSH_REQ);
1847ccd5a2cSjsg radeon_ring_write(ring, ref_and_mask); /* reference */
1857ccd5a2cSjsg radeon_ring_write(ring, ref_and_mask); /* mask */
1867ccd5a2cSjsg radeon_ring_write(ring, (0xfff << 16) | 10); /* retry count, poll interval */
1877ccd5a2cSjsg }
1887ccd5a2cSjsg
1897ccd5a2cSjsg /**
1907ccd5a2cSjsg * cik_sdma_fence_ring_emit - emit a fence on the DMA ring
1917ccd5a2cSjsg *
1927ccd5a2cSjsg * @rdev: radeon_device pointer
1937ccd5a2cSjsg * @fence: radeon fence object
1947ccd5a2cSjsg *
1957ccd5a2cSjsg * Add a DMA fence packet to the ring to write
1967ccd5a2cSjsg * the fence seq number and DMA trap packet to generate
1977ccd5a2cSjsg * an interrupt if needed (CIK).
1987ccd5a2cSjsg */
cik_sdma_fence_ring_emit(struct radeon_device * rdev,struct radeon_fence * fence)1997ccd5a2cSjsg void cik_sdma_fence_ring_emit(struct radeon_device *rdev,
2007ccd5a2cSjsg struct radeon_fence *fence)
2017ccd5a2cSjsg {
2027ccd5a2cSjsg struct radeon_ring *ring = &rdev->ring[fence->ring];
2037ccd5a2cSjsg u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
2047ccd5a2cSjsg
2057ccd5a2cSjsg /* write the fence */
2067ccd5a2cSjsg radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_FENCE, 0, 0));
2077ccd5a2cSjsg radeon_ring_write(ring, lower_32_bits(addr));
2087ccd5a2cSjsg radeon_ring_write(ring, upper_32_bits(addr));
2097ccd5a2cSjsg radeon_ring_write(ring, fence->seq);
2107ccd5a2cSjsg /* generate an interrupt */
2117ccd5a2cSjsg radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_TRAP, 0, 0));
2127ccd5a2cSjsg /* flush HDP */
2137ccd5a2cSjsg cik_sdma_hdp_flush_ring_emit(rdev, fence->ring);
2147ccd5a2cSjsg }
2157ccd5a2cSjsg
2167ccd5a2cSjsg /**
2177ccd5a2cSjsg * cik_sdma_semaphore_ring_emit - emit a semaphore on the dma ring
2187ccd5a2cSjsg *
2197ccd5a2cSjsg * @rdev: radeon_device pointer
2207ccd5a2cSjsg * @ring: radeon_ring structure holding ring information
2217ccd5a2cSjsg * @semaphore: radeon semaphore object
2227ccd5a2cSjsg * @emit_wait: wait or signal semaphore
2237ccd5a2cSjsg *
2247ccd5a2cSjsg * Add a DMA semaphore packet to the ring wait on or signal
2257ccd5a2cSjsg * other rings (CIK).
2267ccd5a2cSjsg */
cik_sdma_semaphore_ring_emit(struct radeon_device * rdev,struct radeon_ring * ring,struct radeon_semaphore * semaphore,bool emit_wait)2277ccd5a2cSjsg bool cik_sdma_semaphore_ring_emit(struct radeon_device *rdev,
2287ccd5a2cSjsg struct radeon_ring *ring,
2297ccd5a2cSjsg struct radeon_semaphore *semaphore,
2307ccd5a2cSjsg bool emit_wait)
2317ccd5a2cSjsg {
2327ccd5a2cSjsg u64 addr = semaphore->gpu_addr;
2337ccd5a2cSjsg u32 extra_bits = emit_wait ? 0 : SDMA_SEMAPHORE_EXTRA_S;
2347ccd5a2cSjsg
2357ccd5a2cSjsg radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SEMAPHORE, 0, extra_bits));
2367ccd5a2cSjsg radeon_ring_write(ring, addr & 0xfffffff8);
2377ccd5a2cSjsg radeon_ring_write(ring, upper_32_bits(addr));
2387ccd5a2cSjsg
2397ccd5a2cSjsg return true;
2407ccd5a2cSjsg }
2417ccd5a2cSjsg
2427ccd5a2cSjsg /**
2437ccd5a2cSjsg * cik_sdma_gfx_stop - stop the gfx async dma engines
2447ccd5a2cSjsg *
2457ccd5a2cSjsg * @rdev: radeon_device pointer
2467ccd5a2cSjsg *
2477ccd5a2cSjsg * Stop the gfx async dma ring buffers (CIK).
2487ccd5a2cSjsg */
cik_sdma_gfx_stop(struct radeon_device * rdev)2497ccd5a2cSjsg static void cik_sdma_gfx_stop(struct radeon_device *rdev)
2507ccd5a2cSjsg {
2517ccd5a2cSjsg u32 rb_cntl, reg_offset;
2527ccd5a2cSjsg int i;
2537ccd5a2cSjsg
2547ccd5a2cSjsg if ((rdev->asic->copy.copy_ring_index == R600_RING_TYPE_DMA_INDEX) ||
2557ccd5a2cSjsg (rdev->asic->copy.copy_ring_index == CAYMAN_RING_TYPE_DMA1_INDEX))
2567ccd5a2cSjsg radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
2577ccd5a2cSjsg
2587ccd5a2cSjsg for (i = 0; i < 2; i++) {
2597ccd5a2cSjsg if (i == 0)
2607ccd5a2cSjsg reg_offset = SDMA0_REGISTER_OFFSET;
2617ccd5a2cSjsg else
2627ccd5a2cSjsg reg_offset = SDMA1_REGISTER_OFFSET;
2637ccd5a2cSjsg rb_cntl = RREG32(SDMA0_GFX_RB_CNTL + reg_offset);
2647ccd5a2cSjsg rb_cntl &= ~SDMA_RB_ENABLE;
2657ccd5a2cSjsg WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl);
2667ccd5a2cSjsg WREG32(SDMA0_GFX_IB_CNTL + reg_offset, 0);
2677ccd5a2cSjsg }
2687ccd5a2cSjsg rdev->ring[R600_RING_TYPE_DMA_INDEX].ready = false;
2697ccd5a2cSjsg rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX].ready = false;
2707ccd5a2cSjsg
2717ccd5a2cSjsg /* FIXME use something else than big hammer but after few days can not
2727ccd5a2cSjsg * seem to find good combination so reset SDMA blocks as it seems we
2737ccd5a2cSjsg * do not shut them down properly. This fix hibernation and does not
2747ccd5a2cSjsg * affect suspend to ram.
2757ccd5a2cSjsg */
2767ccd5a2cSjsg WREG32(SRBM_SOFT_RESET, SOFT_RESET_SDMA | SOFT_RESET_SDMA1);
2777ccd5a2cSjsg (void)RREG32(SRBM_SOFT_RESET);
2787ccd5a2cSjsg udelay(50);
2797ccd5a2cSjsg WREG32(SRBM_SOFT_RESET, 0);
2807ccd5a2cSjsg (void)RREG32(SRBM_SOFT_RESET);
2817ccd5a2cSjsg }
2827ccd5a2cSjsg
2837ccd5a2cSjsg /**
2847ccd5a2cSjsg * cik_sdma_rlc_stop - stop the compute async dma engines
2857ccd5a2cSjsg *
2867ccd5a2cSjsg * @rdev: radeon_device pointer
2877ccd5a2cSjsg *
2887ccd5a2cSjsg * Stop the compute async dma queues (CIK).
2897ccd5a2cSjsg */
cik_sdma_rlc_stop(struct radeon_device * rdev)2907ccd5a2cSjsg static void cik_sdma_rlc_stop(struct radeon_device *rdev)
2917ccd5a2cSjsg {
2927ccd5a2cSjsg /* XXX todo */
2937ccd5a2cSjsg }
2947ccd5a2cSjsg
2957ccd5a2cSjsg /**
2967ccd5a2cSjsg * cik_sdma_ctx_switch_enable - enable/disable sdma engine preemption
2977ccd5a2cSjsg *
2987ccd5a2cSjsg * @rdev: radeon_device pointer
2997ccd5a2cSjsg * @enable: enable/disable preemption.
3007ccd5a2cSjsg *
3017ccd5a2cSjsg * Halt or unhalt the async dma engines (CIK).
3027ccd5a2cSjsg */
cik_sdma_ctx_switch_enable(struct radeon_device * rdev,bool enable)3037ccd5a2cSjsg static void cik_sdma_ctx_switch_enable(struct radeon_device *rdev, bool enable)
3047ccd5a2cSjsg {
3057ccd5a2cSjsg uint32_t reg_offset, value;
3067ccd5a2cSjsg int i;
3077ccd5a2cSjsg
3087ccd5a2cSjsg for (i = 0; i < 2; i++) {
3097ccd5a2cSjsg if (i == 0)
3107ccd5a2cSjsg reg_offset = SDMA0_REGISTER_OFFSET;
3117ccd5a2cSjsg else
3127ccd5a2cSjsg reg_offset = SDMA1_REGISTER_OFFSET;
3137ccd5a2cSjsg value = RREG32(SDMA0_CNTL + reg_offset);
3147ccd5a2cSjsg if (enable)
3157ccd5a2cSjsg value |= AUTO_CTXSW_ENABLE;
3167ccd5a2cSjsg else
3177ccd5a2cSjsg value &= ~AUTO_CTXSW_ENABLE;
3187ccd5a2cSjsg WREG32(SDMA0_CNTL + reg_offset, value);
3197ccd5a2cSjsg }
3207ccd5a2cSjsg }
3217ccd5a2cSjsg
3227ccd5a2cSjsg /**
3237ccd5a2cSjsg * cik_sdma_enable - stop the async dma engines
3247ccd5a2cSjsg *
3257ccd5a2cSjsg * @rdev: radeon_device pointer
3267ccd5a2cSjsg * @enable: enable/disable the DMA MEs.
3277ccd5a2cSjsg *
3287ccd5a2cSjsg * Halt or unhalt the async dma engines (CIK).
3297ccd5a2cSjsg */
cik_sdma_enable(struct radeon_device * rdev,bool enable)3307ccd5a2cSjsg void cik_sdma_enable(struct radeon_device *rdev, bool enable)
3317ccd5a2cSjsg {
3327ccd5a2cSjsg u32 me_cntl, reg_offset;
3337ccd5a2cSjsg int i;
3347ccd5a2cSjsg
335c349dbc7Sjsg if (!enable) {
3367ccd5a2cSjsg cik_sdma_gfx_stop(rdev);
3377ccd5a2cSjsg cik_sdma_rlc_stop(rdev);
3387ccd5a2cSjsg }
3397ccd5a2cSjsg
3407ccd5a2cSjsg for (i = 0; i < 2; i++) {
3417ccd5a2cSjsg if (i == 0)
3427ccd5a2cSjsg reg_offset = SDMA0_REGISTER_OFFSET;
3437ccd5a2cSjsg else
3447ccd5a2cSjsg reg_offset = SDMA1_REGISTER_OFFSET;
3457ccd5a2cSjsg me_cntl = RREG32(SDMA0_ME_CNTL + reg_offset);
3467ccd5a2cSjsg if (enable)
3477ccd5a2cSjsg me_cntl &= ~SDMA_HALT;
3487ccd5a2cSjsg else
3497ccd5a2cSjsg me_cntl |= SDMA_HALT;
3507ccd5a2cSjsg WREG32(SDMA0_ME_CNTL + reg_offset, me_cntl);
3517ccd5a2cSjsg }
3527ccd5a2cSjsg
3537ccd5a2cSjsg cik_sdma_ctx_switch_enable(rdev, enable);
3547ccd5a2cSjsg }
3557ccd5a2cSjsg
3567ccd5a2cSjsg /**
3577ccd5a2cSjsg * cik_sdma_gfx_resume - setup and start the async dma engines
3587ccd5a2cSjsg *
3597ccd5a2cSjsg * @rdev: radeon_device pointer
3607ccd5a2cSjsg *
3617ccd5a2cSjsg * Set up the gfx DMA ring buffers and enable them (CIK).
3627ccd5a2cSjsg * Returns 0 for success, error for failure.
3637ccd5a2cSjsg */
cik_sdma_gfx_resume(struct radeon_device * rdev)3647ccd5a2cSjsg static int cik_sdma_gfx_resume(struct radeon_device *rdev)
3657ccd5a2cSjsg {
3667ccd5a2cSjsg struct radeon_ring *ring;
3677ccd5a2cSjsg u32 rb_cntl, ib_cntl;
3687ccd5a2cSjsg u32 rb_bufsz;
3697ccd5a2cSjsg u32 reg_offset, wb_offset;
3707ccd5a2cSjsg int i, r;
3717ccd5a2cSjsg
3727ccd5a2cSjsg for (i = 0; i < 2; i++) {
3737ccd5a2cSjsg if (i == 0) {
3747ccd5a2cSjsg ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
3757ccd5a2cSjsg reg_offset = SDMA0_REGISTER_OFFSET;
3767ccd5a2cSjsg wb_offset = R600_WB_DMA_RPTR_OFFSET;
3777ccd5a2cSjsg } else {
3787ccd5a2cSjsg ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
3797ccd5a2cSjsg reg_offset = SDMA1_REGISTER_OFFSET;
3807ccd5a2cSjsg wb_offset = CAYMAN_WB_DMA1_RPTR_OFFSET;
3817ccd5a2cSjsg }
3827ccd5a2cSjsg
3837ccd5a2cSjsg WREG32(SDMA0_SEM_INCOMPLETE_TIMER_CNTL + reg_offset, 0);
3847ccd5a2cSjsg WREG32(SDMA0_SEM_WAIT_FAIL_TIMER_CNTL + reg_offset, 0);
3857ccd5a2cSjsg
3867ccd5a2cSjsg /* Set ring buffer size in dwords */
3877ccd5a2cSjsg rb_bufsz = order_base_2(ring->ring_size / 4);
3887ccd5a2cSjsg rb_cntl = rb_bufsz << 1;
3897ccd5a2cSjsg #ifdef __BIG_ENDIAN
3907ccd5a2cSjsg rb_cntl |= SDMA_RB_SWAP_ENABLE | SDMA_RPTR_WRITEBACK_SWAP_ENABLE;
3917ccd5a2cSjsg #endif
3927ccd5a2cSjsg WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl);
3937ccd5a2cSjsg
3947ccd5a2cSjsg /* Initialize the ring buffer's read and write pointers */
3957ccd5a2cSjsg WREG32(SDMA0_GFX_RB_RPTR + reg_offset, 0);
3967ccd5a2cSjsg WREG32(SDMA0_GFX_RB_WPTR + reg_offset, 0);
3977ccd5a2cSjsg
3987ccd5a2cSjsg /* set the wb address whether it's enabled or not */
3997ccd5a2cSjsg WREG32(SDMA0_GFX_RB_RPTR_ADDR_HI + reg_offset,
4007ccd5a2cSjsg upper_32_bits(rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF);
4017ccd5a2cSjsg WREG32(SDMA0_GFX_RB_RPTR_ADDR_LO + reg_offset,
4027ccd5a2cSjsg ((rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC));
4037ccd5a2cSjsg
4047ccd5a2cSjsg if (rdev->wb.enabled)
4057ccd5a2cSjsg rb_cntl |= SDMA_RPTR_WRITEBACK_ENABLE;
4067ccd5a2cSjsg
4077ccd5a2cSjsg WREG32(SDMA0_GFX_RB_BASE + reg_offset, ring->gpu_addr >> 8);
4087ccd5a2cSjsg WREG32(SDMA0_GFX_RB_BASE_HI + reg_offset, ring->gpu_addr >> 40);
4097ccd5a2cSjsg
4107ccd5a2cSjsg ring->wptr = 0;
4117ccd5a2cSjsg WREG32(SDMA0_GFX_RB_WPTR + reg_offset, ring->wptr << 2);
4127ccd5a2cSjsg
4137ccd5a2cSjsg /* enable DMA RB */
4147ccd5a2cSjsg WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl | SDMA_RB_ENABLE);
4157ccd5a2cSjsg
4167ccd5a2cSjsg ib_cntl = SDMA_IB_ENABLE;
4177ccd5a2cSjsg #ifdef __BIG_ENDIAN
4187ccd5a2cSjsg ib_cntl |= SDMA_IB_SWAP_ENABLE;
4197ccd5a2cSjsg #endif
4207ccd5a2cSjsg /* enable DMA IBs */
4217ccd5a2cSjsg WREG32(SDMA0_GFX_IB_CNTL + reg_offset, ib_cntl);
4227ccd5a2cSjsg
4237ccd5a2cSjsg ring->ready = true;
4247ccd5a2cSjsg
4257ccd5a2cSjsg r = radeon_ring_test(rdev, ring->idx, ring);
4267ccd5a2cSjsg if (r) {
4277ccd5a2cSjsg ring->ready = false;
4287ccd5a2cSjsg return r;
4297ccd5a2cSjsg }
4307ccd5a2cSjsg }
4317ccd5a2cSjsg
4327ccd5a2cSjsg if ((rdev->asic->copy.copy_ring_index == R600_RING_TYPE_DMA_INDEX) ||
4337ccd5a2cSjsg (rdev->asic->copy.copy_ring_index == CAYMAN_RING_TYPE_DMA1_INDEX))
4347ccd5a2cSjsg radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
4357ccd5a2cSjsg
4367ccd5a2cSjsg return 0;
4377ccd5a2cSjsg }
4387ccd5a2cSjsg
4397ccd5a2cSjsg /**
4407ccd5a2cSjsg * cik_sdma_rlc_resume - setup and start the async dma engines
4417ccd5a2cSjsg *
4427ccd5a2cSjsg * @rdev: radeon_device pointer
4437ccd5a2cSjsg *
4447ccd5a2cSjsg * Set up the compute DMA queues and enable them (CIK).
4457ccd5a2cSjsg * Returns 0 for success, error for failure.
4467ccd5a2cSjsg */
cik_sdma_rlc_resume(struct radeon_device * rdev)4477ccd5a2cSjsg static int cik_sdma_rlc_resume(struct radeon_device *rdev)
4487ccd5a2cSjsg {
4497ccd5a2cSjsg /* XXX todo */
4507ccd5a2cSjsg return 0;
4517ccd5a2cSjsg }
4527ccd5a2cSjsg
4537ccd5a2cSjsg /**
4547ccd5a2cSjsg * cik_sdma_load_microcode - load the sDMA ME ucode
4557ccd5a2cSjsg *
4567ccd5a2cSjsg * @rdev: radeon_device pointer
4577ccd5a2cSjsg *
4587ccd5a2cSjsg * Loads the sDMA0/1 ucode.
4597ccd5a2cSjsg * Returns 0 for success, -EINVAL if the ucode is not available.
4607ccd5a2cSjsg */
cik_sdma_load_microcode(struct radeon_device * rdev)4617ccd5a2cSjsg static int cik_sdma_load_microcode(struct radeon_device *rdev)
4627ccd5a2cSjsg {
4637ccd5a2cSjsg int i;
4647ccd5a2cSjsg
4657ccd5a2cSjsg if (!rdev->sdma_fw)
4667ccd5a2cSjsg return -EINVAL;
4677ccd5a2cSjsg
4687ccd5a2cSjsg /* halt the MEs */
4697ccd5a2cSjsg cik_sdma_enable(rdev, false);
4707ccd5a2cSjsg
4717ccd5a2cSjsg if (rdev->new_fw) {
4727ccd5a2cSjsg const struct sdma_firmware_header_v1_0 *hdr =
4737ccd5a2cSjsg (const struct sdma_firmware_header_v1_0 *)rdev->sdma_fw->data;
4747ccd5a2cSjsg const __le32 *fw_data;
4757ccd5a2cSjsg u32 fw_size;
4767ccd5a2cSjsg
4777ccd5a2cSjsg radeon_ucode_print_sdma_hdr(&hdr->header);
4787ccd5a2cSjsg
4797ccd5a2cSjsg /* sdma0 */
4807ccd5a2cSjsg fw_data = (const __le32 *)
4817ccd5a2cSjsg (rdev->sdma_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
4827ccd5a2cSjsg fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
4837ccd5a2cSjsg WREG32(SDMA0_UCODE_ADDR + SDMA0_REGISTER_OFFSET, 0);
4847ccd5a2cSjsg for (i = 0; i < fw_size; i++)
4857ccd5a2cSjsg WREG32(SDMA0_UCODE_DATA + SDMA0_REGISTER_OFFSET, le32_to_cpup(fw_data++));
4867ccd5a2cSjsg WREG32(SDMA0_UCODE_DATA + SDMA0_REGISTER_OFFSET, CIK_SDMA_UCODE_VERSION);
4877ccd5a2cSjsg
4887ccd5a2cSjsg /* sdma1 */
4897ccd5a2cSjsg fw_data = (const __le32 *)
4907ccd5a2cSjsg (rdev->sdma_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
4917ccd5a2cSjsg fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
4927ccd5a2cSjsg WREG32(SDMA0_UCODE_ADDR + SDMA1_REGISTER_OFFSET, 0);
4937ccd5a2cSjsg for (i = 0; i < fw_size; i++)
4947ccd5a2cSjsg WREG32(SDMA0_UCODE_DATA + SDMA1_REGISTER_OFFSET, le32_to_cpup(fw_data++));
4957ccd5a2cSjsg WREG32(SDMA0_UCODE_DATA + SDMA1_REGISTER_OFFSET, CIK_SDMA_UCODE_VERSION);
4967ccd5a2cSjsg } else {
4977ccd5a2cSjsg const __be32 *fw_data;
4987ccd5a2cSjsg
4997ccd5a2cSjsg /* sdma0 */
5007ccd5a2cSjsg fw_data = (const __be32 *)rdev->sdma_fw->data;
5017ccd5a2cSjsg WREG32(SDMA0_UCODE_ADDR + SDMA0_REGISTER_OFFSET, 0);
5027ccd5a2cSjsg for (i = 0; i < CIK_SDMA_UCODE_SIZE; i++)
5037ccd5a2cSjsg WREG32(SDMA0_UCODE_DATA + SDMA0_REGISTER_OFFSET, be32_to_cpup(fw_data++));
5047ccd5a2cSjsg WREG32(SDMA0_UCODE_DATA + SDMA0_REGISTER_OFFSET, CIK_SDMA_UCODE_VERSION);
5057ccd5a2cSjsg
5067ccd5a2cSjsg /* sdma1 */
5077ccd5a2cSjsg fw_data = (const __be32 *)rdev->sdma_fw->data;
5087ccd5a2cSjsg WREG32(SDMA0_UCODE_ADDR + SDMA1_REGISTER_OFFSET, 0);
5097ccd5a2cSjsg for (i = 0; i < CIK_SDMA_UCODE_SIZE; i++)
5107ccd5a2cSjsg WREG32(SDMA0_UCODE_DATA + SDMA1_REGISTER_OFFSET, be32_to_cpup(fw_data++));
5117ccd5a2cSjsg WREG32(SDMA0_UCODE_DATA + SDMA1_REGISTER_OFFSET, CIK_SDMA_UCODE_VERSION);
5127ccd5a2cSjsg }
5137ccd5a2cSjsg
5147ccd5a2cSjsg WREG32(SDMA0_UCODE_ADDR + SDMA0_REGISTER_OFFSET, 0);
5157ccd5a2cSjsg WREG32(SDMA0_UCODE_ADDR + SDMA1_REGISTER_OFFSET, 0);
5167ccd5a2cSjsg return 0;
5177ccd5a2cSjsg }
5187ccd5a2cSjsg
5197ccd5a2cSjsg /**
5207ccd5a2cSjsg * cik_sdma_resume - setup and start the async dma engines
5217ccd5a2cSjsg *
5227ccd5a2cSjsg * @rdev: radeon_device pointer
5237ccd5a2cSjsg *
5247ccd5a2cSjsg * Set up the DMA engines and enable them (CIK).
5257ccd5a2cSjsg * Returns 0 for success, error for failure.
5267ccd5a2cSjsg */
cik_sdma_resume(struct radeon_device * rdev)5277ccd5a2cSjsg int cik_sdma_resume(struct radeon_device *rdev)
5287ccd5a2cSjsg {
5297ccd5a2cSjsg int r;
5307ccd5a2cSjsg
5317ccd5a2cSjsg r = cik_sdma_load_microcode(rdev);
5327ccd5a2cSjsg if (r)
5337ccd5a2cSjsg return r;
5347ccd5a2cSjsg
5357ccd5a2cSjsg /* unhalt the MEs */
5367ccd5a2cSjsg cik_sdma_enable(rdev, true);
5377ccd5a2cSjsg
5387ccd5a2cSjsg /* start the gfx rings and rlc compute queues */
5397ccd5a2cSjsg r = cik_sdma_gfx_resume(rdev);
5407ccd5a2cSjsg if (r)
5417ccd5a2cSjsg return r;
5427ccd5a2cSjsg r = cik_sdma_rlc_resume(rdev);
5437ccd5a2cSjsg if (r)
5447ccd5a2cSjsg return r;
5457ccd5a2cSjsg
5467ccd5a2cSjsg return 0;
5477ccd5a2cSjsg }
5487ccd5a2cSjsg
5497ccd5a2cSjsg /**
5507ccd5a2cSjsg * cik_sdma_fini - tear down the async dma engines
5517ccd5a2cSjsg *
5527ccd5a2cSjsg * @rdev: radeon_device pointer
5537ccd5a2cSjsg *
5547ccd5a2cSjsg * Stop the async dma engines and free the rings (CIK).
5557ccd5a2cSjsg */
cik_sdma_fini(struct radeon_device * rdev)5567ccd5a2cSjsg void cik_sdma_fini(struct radeon_device *rdev)
5577ccd5a2cSjsg {
5587ccd5a2cSjsg /* halt the MEs */
5597ccd5a2cSjsg cik_sdma_enable(rdev, false);
5607ccd5a2cSjsg radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX]);
5617ccd5a2cSjsg radeon_ring_fini(rdev, &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]);
5627ccd5a2cSjsg /* XXX - compute dma queue tear down */
5637ccd5a2cSjsg }
5647ccd5a2cSjsg
5657ccd5a2cSjsg /**
5667ccd5a2cSjsg * cik_copy_dma - copy pages using the DMA engine
5677ccd5a2cSjsg *
5687ccd5a2cSjsg * @rdev: radeon_device pointer
5697ccd5a2cSjsg * @src_offset: src GPU address
5707ccd5a2cSjsg * @dst_offset: dst GPU address
5717ccd5a2cSjsg * @num_gpu_pages: number of GPU pages to xfer
5727ccd5a2cSjsg * @resv: reservation object to sync to
5737ccd5a2cSjsg *
5747ccd5a2cSjsg * Copy GPU paging using the DMA engine (CIK).
5757ccd5a2cSjsg * Used by the radeon ttm implementation to move pages if
5767ccd5a2cSjsg * registered as the asic copy callback.
5777ccd5a2cSjsg */
cik_copy_dma(struct radeon_device * rdev,uint64_t src_offset,uint64_t dst_offset,unsigned num_gpu_pages,struct dma_resv * resv)5787ccd5a2cSjsg struct radeon_fence *cik_copy_dma(struct radeon_device *rdev,
5797ccd5a2cSjsg uint64_t src_offset, uint64_t dst_offset,
5807ccd5a2cSjsg unsigned num_gpu_pages,
581c349dbc7Sjsg struct dma_resv *resv)
5827ccd5a2cSjsg {
5837ccd5a2cSjsg struct radeon_fence *fence;
5847ccd5a2cSjsg struct radeon_sync sync;
5857ccd5a2cSjsg int ring_index = rdev->asic->copy.dma_ring_index;
5867ccd5a2cSjsg struct radeon_ring *ring = &rdev->ring[ring_index];
5877ccd5a2cSjsg u32 size_in_bytes, cur_size_in_bytes;
5887ccd5a2cSjsg int i, num_loops;
5897ccd5a2cSjsg int r = 0;
5907ccd5a2cSjsg
5917ccd5a2cSjsg radeon_sync_create(&sync);
5927ccd5a2cSjsg
5937ccd5a2cSjsg size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
5947ccd5a2cSjsg num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
5957ccd5a2cSjsg r = radeon_ring_lock(rdev, ring, num_loops * 7 + 14);
5967ccd5a2cSjsg if (r) {
5977ccd5a2cSjsg DRM_ERROR("radeon: moving bo (%d).\n", r);
5987ccd5a2cSjsg radeon_sync_free(rdev, &sync, NULL);
5997ccd5a2cSjsg return ERR_PTR(r);
6007ccd5a2cSjsg }
6017ccd5a2cSjsg
6027ccd5a2cSjsg radeon_sync_resv(rdev, &sync, resv, false);
6037ccd5a2cSjsg radeon_sync_rings(rdev, &sync, ring->idx);
6047ccd5a2cSjsg
6057ccd5a2cSjsg for (i = 0; i < num_loops; i++) {
6067ccd5a2cSjsg cur_size_in_bytes = size_in_bytes;
6077ccd5a2cSjsg if (cur_size_in_bytes > 0x1fffff)
6087ccd5a2cSjsg cur_size_in_bytes = 0x1fffff;
6097ccd5a2cSjsg size_in_bytes -= cur_size_in_bytes;
6107ccd5a2cSjsg radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0));
6117ccd5a2cSjsg radeon_ring_write(ring, cur_size_in_bytes);
6127ccd5a2cSjsg radeon_ring_write(ring, 0); /* src/dst endian swap */
6137ccd5a2cSjsg radeon_ring_write(ring, lower_32_bits(src_offset));
6147ccd5a2cSjsg radeon_ring_write(ring, upper_32_bits(src_offset));
6157ccd5a2cSjsg radeon_ring_write(ring, lower_32_bits(dst_offset));
6167ccd5a2cSjsg radeon_ring_write(ring, upper_32_bits(dst_offset));
6177ccd5a2cSjsg src_offset += cur_size_in_bytes;
6187ccd5a2cSjsg dst_offset += cur_size_in_bytes;
6197ccd5a2cSjsg }
6207ccd5a2cSjsg
6217ccd5a2cSjsg r = radeon_fence_emit(rdev, &fence, ring->idx);
6227ccd5a2cSjsg if (r) {
6237ccd5a2cSjsg radeon_ring_unlock_undo(rdev, ring);
6247ccd5a2cSjsg radeon_sync_free(rdev, &sync, NULL);
6257ccd5a2cSjsg return ERR_PTR(r);
6267ccd5a2cSjsg }
6277ccd5a2cSjsg
6287ccd5a2cSjsg radeon_ring_unlock_commit(rdev, ring, false);
6297ccd5a2cSjsg radeon_sync_free(rdev, &sync, fence);
6307ccd5a2cSjsg
6317ccd5a2cSjsg return fence;
6327ccd5a2cSjsg }
6337ccd5a2cSjsg
6347ccd5a2cSjsg /**
6357ccd5a2cSjsg * cik_sdma_ring_test - simple async dma engine test
6367ccd5a2cSjsg *
6377ccd5a2cSjsg * @rdev: radeon_device pointer
6387ccd5a2cSjsg * @ring: radeon_ring structure holding ring information
6397ccd5a2cSjsg *
6407ccd5a2cSjsg * Test the DMA engine by writing using it to write an
6417ccd5a2cSjsg * value to memory. (CIK).
6427ccd5a2cSjsg * Returns 0 for success, error for failure.
6437ccd5a2cSjsg */
cik_sdma_ring_test(struct radeon_device * rdev,struct radeon_ring * ring)6447ccd5a2cSjsg int cik_sdma_ring_test(struct radeon_device *rdev,
6457ccd5a2cSjsg struct radeon_ring *ring)
6467ccd5a2cSjsg {
6477ccd5a2cSjsg unsigned i;
6487ccd5a2cSjsg int r;
6497ccd5a2cSjsg unsigned index;
6507ccd5a2cSjsg u32 tmp;
6517ccd5a2cSjsg u64 gpu_addr;
6527ccd5a2cSjsg
6537ccd5a2cSjsg if (ring->idx == R600_RING_TYPE_DMA_INDEX)
6547ccd5a2cSjsg index = R600_WB_DMA_RING_TEST_OFFSET;
6557ccd5a2cSjsg else
6567ccd5a2cSjsg index = CAYMAN_WB_DMA1_RING_TEST_OFFSET;
6577ccd5a2cSjsg
6587ccd5a2cSjsg gpu_addr = rdev->wb.gpu_addr + index;
6597ccd5a2cSjsg
6607ccd5a2cSjsg tmp = 0xCAFEDEAD;
6617ccd5a2cSjsg rdev->wb.wb[index/4] = cpu_to_le32(tmp);
6627ccd5a2cSjsg
6637ccd5a2cSjsg r = radeon_ring_lock(rdev, ring, 5);
6647ccd5a2cSjsg if (r) {
6657ccd5a2cSjsg DRM_ERROR("radeon: dma failed to lock ring %d (%d).\n", ring->idx, r);
6667ccd5a2cSjsg return r;
6677ccd5a2cSjsg }
6687ccd5a2cSjsg radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0));
6697ccd5a2cSjsg radeon_ring_write(ring, lower_32_bits(gpu_addr));
6707ccd5a2cSjsg radeon_ring_write(ring, upper_32_bits(gpu_addr));
6717ccd5a2cSjsg radeon_ring_write(ring, 1); /* number of DWs to follow */
6727ccd5a2cSjsg radeon_ring_write(ring, 0xDEADBEEF);
6737ccd5a2cSjsg radeon_ring_unlock_commit(rdev, ring, false);
6747ccd5a2cSjsg
6757ccd5a2cSjsg for (i = 0; i < rdev->usec_timeout; i++) {
6767ccd5a2cSjsg tmp = le32_to_cpu(rdev->wb.wb[index/4]);
6777ccd5a2cSjsg if (tmp == 0xDEADBEEF)
6787ccd5a2cSjsg break;
679c349dbc7Sjsg udelay(1);
6807ccd5a2cSjsg }
6817ccd5a2cSjsg
6827ccd5a2cSjsg if (i < rdev->usec_timeout) {
6837ccd5a2cSjsg DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
6847ccd5a2cSjsg } else {
6857ccd5a2cSjsg DRM_ERROR("radeon: ring %d test failed (0x%08X)\n",
6867ccd5a2cSjsg ring->idx, tmp);
6877ccd5a2cSjsg r = -EINVAL;
6887ccd5a2cSjsg }
6897ccd5a2cSjsg return r;
6907ccd5a2cSjsg }
6917ccd5a2cSjsg
6927ccd5a2cSjsg /**
6937ccd5a2cSjsg * cik_sdma_ib_test - test an IB on the DMA engine
6947ccd5a2cSjsg *
6957ccd5a2cSjsg * @rdev: radeon_device pointer
6967ccd5a2cSjsg * @ring: radeon_ring structure holding ring information
6977ccd5a2cSjsg *
6987ccd5a2cSjsg * Test a simple IB in the DMA ring (CIK).
6997ccd5a2cSjsg * Returns 0 on success, error on failure.
7007ccd5a2cSjsg */
cik_sdma_ib_test(struct radeon_device * rdev,struct radeon_ring * ring)7017ccd5a2cSjsg int cik_sdma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
7027ccd5a2cSjsg {
7037ccd5a2cSjsg struct radeon_ib ib;
7047ccd5a2cSjsg unsigned i;
7057ccd5a2cSjsg unsigned index;
7067ccd5a2cSjsg int r;
7077ccd5a2cSjsg u32 tmp = 0;
7087ccd5a2cSjsg u64 gpu_addr;
7097ccd5a2cSjsg
7107ccd5a2cSjsg if (ring->idx == R600_RING_TYPE_DMA_INDEX)
7117ccd5a2cSjsg index = R600_WB_DMA_RING_TEST_OFFSET;
7127ccd5a2cSjsg else
7137ccd5a2cSjsg index = CAYMAN_WB_DMA1_RING_TEST_OFFSET;
7147ccd5a2cSjsg
7157ccd5a2cSjsg gpu_addr = rdev->wb.gpu_addr + index;
7167ccd5a2cSjsg
7177ccd5a2cSjsg tmp = 0xCAFEDEAD;
7187ccd5a2cSjsg rdev->wb.wb[index/4] = cpu_to_le32(tmp);
7197ccd5a2cSjsg
7207ccd5a2cSjsg r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
7217ccd5a2cSjsg if (r) {
7227ccd5a2cSjsg DRM_ERROR("radeon: failed to get ib (%d).\n", r);
7237ccd5a2cSjsg return r;
7247ccd5a2cSjsg }
7257ccd5a2cSjsg
7267ccd5a2cSjsg ib.ptr[0] = SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
7277ccd5a2cSjsg ib.ptr[1] = lower_32_bits(gpu_addr);
7287ccd5a2cSjsg ib.ptr[2] = upper_32_bits(gpu_addr);
7297ccd5a2cSjsg ib.ptr[3] = 1;
7307ccd5a2cSjsg ib.ptr[4] = 0xDEADBEEF;
7317ccd5a2cSjsg ib.length_dw = 5;
7327ccd5a2cSjsg
7337ccd5a2cSjsg r = radeon_ib_schedule(rdev, &ib, NULL, false);
7347ccd5a2cSjsg if (r) {
7357ccd5a2cSjsg radeon_ib_free(rdev, &ib);
7367ccd5a2cSjsg DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
7377ccd5a2cSjsg return r;
7387ccd5a2cSjsg }
7397f4dd379Sjsg r = radeon_fence_wait_timeout(ib.fence, false, usecs_to_jiffies(
7407f4dd379Sjsg RADEON_USEC_IB_TEST_TIMEOUT));
7417f4dd379Sjsg if (r < 0) {
7427ccd5a2cSjsg DRM_ERROR("radeon: fence wait failed (%d).\n", r);
7437ccd5a2cSjsg return r;
7447f4dd379Sjsg } else if (r == 0) {
7457f4dd379Sjsg DRM_ERROR("radeon: fence wait timed out.\n");
7467f4dd379Sjsg return -ETIMEDOUT;
7477ccd5a2cSjsg }
7487f4dd379Sjsg r = 0;
7497ccd5a2cSjsg for (i = 0; i < rdev->usec_timeout; i++) {
7507ccd5a2cSjsg tmp = le32_to_cpu(rdev->wb.wb[index/4]);
7517ccd5a2cSjsg if (tmp == 0xDEADBEEF)
7527ccd5a2cSjsg break;
753c349dbc7Sjsg udelay(1);
7547ccd5a2cSjsg }
7557ccd5a2cSjsg if (i < rdev->usec_timeout) {
7567ccd5a2cSjsg DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
7577ccd5a2cSjsg } else {
7587ccd5a2cSjsg DRM_ERROR("radeon: ib test failed (0x%08X)\n", tmp);
7597ccd5a2cSjsg r = -EINVAL;
7607ccd5a2cSjsg }
7617ccd5a2cSjsg radeon_ib_free(rdev, &ib);
7627ccd5a2cSjsg return r;
7637ccd5a2cSjsg }
7647ccd5a2cSjsg
7657ccd5a2cSjsg /**
7667ccd5a2cSjsg * cik_sdma_is_lockup - Check if the DMA engine is locked up
7677ccd5a2cSjsg *
7687ccd5a2cSjsg * @rdev: radeon_device pointer
7697ccd5a2cSjsg * @ring: radeon_ring structure holding ring information
7707ccd5a2cSjsg *
7717ccd5a2cSjsg * Check if the async DMA engine is locked up (CIK).
7727ccd5a2cSjsg * Returns true if the engine appears to be locked up, false if not.
7737ccd5a2cSjsg */
cik_sdma_is_lockup(struct radeon_device * rdev,struct radeon_ring * ring)7747ccd5a2cSjsg bool cik_sdma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
7757ccd5a2cSjsg {
7767ccd5a2cSjsg u32 reset_mask = cik_gpu_check_soft_reset(rdev);
7777ccd5a2cSjsg u32 mask;
7787ccd5a2cSjsg
7797ccd5a2cSjsg if (ring->idx == R600_RING_TYPE_DMA_INDEX)
7807ccd5a2cSjsg mask = RADEON_RESET_DMA;
7817ccd5a2cSjsg else
7827ccd5a2cSjsg mask = RADEON_RESET_DMA1;
7837ccd5a2cSjsg
7847ccd5a2cSjsg if (!(reset_mask & mask)) {
7857ccd5a2cSjsg radeon_ring_lockup_update(rdev, ring);
7867ccd5a2cSjsg return false;
7877ccd5a2cSjsg }
7887ccd5a2cSjsg return radeon_ring_test_lockup(rdev, ring);
7897ccd5a2cSjsg }
7907ccd5a2cSjsg
7917ccd5a2cSjsg /**
7927ccd5a2cSjsg * cik_sdma_vm_copy_pages - update PTEs by copying them from the GART
7937ccd5a2cSjsg *
7947ccd5a2cSjsg * @rdev: radeon_device pointer
7957ccd5a2cSjsg * @ib: indirect buffer to fill with commands
7967ccd5a2cSjsg * @pe: addr of the page entry
7977ccd5a2cSjsg * @src: src addr to copy from
7987ccd5a2cSjsg * @count: number of page entries to update
7997ccd5a2cSjsg *
8007ccd5a2cSjsg * Update PTEs by copying them from the GART using sDMA (CIK).
8017ccd5a2cSjsg */
cik_sdma_vm_copy_pages(struct radeon_device * rdev,struct radeon_ib * ib,uint64_t pe,uint64_t src,unsigned count)8027ccd5a2cSjsg void cik_sdma_vm_copy_pages(struct radeon_device *rdev,
8037ccd5a2cSjsg struct radeon_ib *ib,
8047ccd5a2cSjsg uint64_t pe, uint64_t src,
8057ccd5a2cSjsg unsigned count)
8067ccd5a2cSjsg {
8077ccd5a2cSjsg while (count) {
8087ccd5a2cSjsg unsigned bytes = count * 8;
8097ccd5a2cSjsg if (bytes > 0x1FFFF8)
8107ccd5a2cSjsg bytes = 0x1FFFF8;
8117ccd5a2cSjsg
8127ccd5a2cSjsg ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_COPY,
8137ccd5a2cSjsg SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
8147ccd5a2cSjsg ib->ptr[ib->length_dw++] = bytes;
8157ccd5a2cSjsg ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
8167ccd5a2cSjsg ib->ptr[ib->length_dw++] = lower_32_bits(src);
8177ccd5a2cSjsg ib->ptr[ib->length_dw++] = upper_32_bits(src);
8187ccd5a2cSjsg ib->ptr[ib->length_dw++] = lower_32_bits(pe);
8197ccd5a2cSjsg ib->ptr[ib->length_dw++] = upper_32_bits(pe);
8207ccd5a2cSjsg
8217ccd5a2cSjsg pe += bytes;
8227ccd5a2cSjsg src += bytes;
8237ccd5a2cSjsg count -= bytes / 8;
8247ccd5a2cSjsg }
8257ccd5a2cSjsg }
8267ccd5a2cSjsg
8277ccd5a2cSjsg /**
8287ccd5a2cSjsg * cik_sdma_vm_write_pages - update PTEs by writing them manually
8297ccd5a2cSjsg *
8307ccd5a2cSjsg * @rdev: radeon_device pointer
8317ccd5a2cSjsg * @ib: indirect buffer to fill with commands
8327ccd5a2cSjsg * @pe: addr of the page entry
8337ccd5a2cSjsg * @addr: dst addr to write into pe
8347ccd5a2cSjsg * @count: number of page entries to update
8357ccd5a2cSjsg * @incr: increase next addr by incr bytes
8367ccd5a2cSjsg * @flags: access flags
8377ccd5a2cSjsg *
8387ccd5a2cSjsg * Update PTEs by writing them manually using sDMA (CIK).
8397ccd5a2cSjsg */
cik_sdma_vm_write_pages(struct radeon_device * rdev,struct radeon_ib * ib,uint64_t pe,uint64_t addr,unsigned count,uint32_t incr,uint32_t flags)8407ccd5a2cSjsg void cik_sdma_vm_write_pages(struct radeon_device *rdev,
8417ccd5a2cSjsg struct radeon_ib *ib,
8427ccd5a2cSjsg uint64_t pe,
8437ccd5a2cSjsg uint64_t addr, unsigned count,
8447ccd5a2cSjsg uint32_t incr, uint32_t flags)
8457ccd5a2cSjsg {
8467ccd5a2cSjsg uint64_t value;
8477ccd5a2cSjsg unsigned ndw;
8487ccd5a2cSjsg
8497ccd5a2cSjsg while (count) {
8507ccd5a2cSjsg ndw = count * 2;
8517ccd5a2cSjsg if (ndw > 0xFFFFE)
8527ccd5a2cSjsg ndw = 0xFFFFE;
8537ccd5a2cSjsg
8547ccd5a2cSjsg /* for non-physically contiguous pages (system) */
8557ccd5a2cSjsg ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_WRITE,
8567ccd5a2cSjsg SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
8577ccd5a2cSjsg ib->ptr[ib->length_dw++] = pe;
8587ccd5a2cSjsg ib->ptr[ib->length_dw++] = upper_32_bits(pe);
8597ccd5a2cSjsg ib->ptr[ib->length_dw++] = ndw;
8607ccd5a2cSjsg for (; ndw > 0; ndw -= 2, --count, pe += 8) {
8617ccd5a2cSjsg if (flags & R600_PTE_SYSTEM) {
8627ccd5a2cSjsg value = radeon_vm_map_gart(rdev, addr);
8637ccd5a2cSjsg } else if (flags & R600_PTE_VALID) {
8647ccd5a2cSjsg value = addr;
8657ccd5a2cSjsg } else {
8667ccd5a2cSjsg value = 0;
8677ccd5a2cSjsg }
8687ccd5a2cSjsg addr += incr;
8697ccd5a2cSjsg value |= flags;
8707ccd5a2cSjsg ib->ptr[ib->length_dw++] = value;
8717ccd5a2cSjsg ib->ptr[ib->length_dw++] = upper_32_bits(value);
8727ccd5a2cSjsg }
8737ccd5a2cSjsg }
8747ccd5a2cSjsg }
8757ccd5a2cSjsg
8767ccd5a2cSjsg /**
8777ccd5a2cSjsg * cik_sdma_vm_set_pages - update the page tables using sDMA
8787ccd5a2cSjsg *
8797ccd5a2cSjsg * @rdev: radeon_device pointer
8807ccd5a2cSjsg * @ib: indirect buffer to fill with commands
8817ccd5a2cSjsg * @pe: addr of the page entry
8827ccd5a2cSjsg * @addr: dst addr to write into pe
8837ccd5a2cSjsg * @count: number of page entries to update
8847ccd5a2cSjsg * @incr: increase next addr by incr bytes
8857ccd5a2cSjsg * @flags: access flags
8867ccd5a2cSjsg *
8877ccd5a2cSjsg * Update the page tables using sDMA (CIK).
8887ccd5a2cSjsg */
cik_sdma_vm_set_pages(struct radeon_device * rdev,struct radeon_ib * ib,uint64_t pe,uint64_t addr,unsigned count,uint32_t incr,uint32_t flags)8897ccd5a2cSjsg void cik_sdma_vm_set_pages(struct radeon_device *rdev,
8907ccd5a2cSjsg struct radeon_ib *ib,
8917ccd5a2cSjsg uint64_t pe,
8927ccd5a2cSjsg uint64_t addr, unsigned count,
8937ccd5a2cSjsg uint32_t incr, uint32_t flags)
8947ccd5a2cSjsg {
8957ccd5a2cSjsg uint64_t value;
8967ccd5a2cSjsg unsigned ndw;
8977ccd5a2cSjsg
8987ccd5a2cSjsg while (count) {
8997ccd5a2cSjsg ndw = count;
9007ccd5a2cSjsg if (ndw > 0x7FFFF)
9017ccd5a2cSjsg ndw = 0x7FFFF;
9027ccd5a2cSjsg
9037ccd5a2cSjsg if (flags & R600_PTE_VALID)
9047ccd5a2cSjsg value = addr;
9057ccd5a2cSjsg else
9067ccd5a2cSjsg value = 0;
9077ccd5a2cSjsg
9087ccd5a2cSjsg /* for physically contiguous pages (vram) */
9097ccd5a2cSjsg ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_GENERATE_PTE_PDE, 0, 0);
9107ccd5a2cSjsg ib->ptr[ib->length_dw++] = pe; /* dst addr */
9117ccd5a2cSjsg ib->ptr[ib->length_dw++] = upper_32_bits(pe);
9127ccd5a2cSjsg ib->ptr[ib->length_dw++] = flags; /* mask */
9137ccd5a2cSjsg ib->ptr[ib->length_dw++] = 0;
9147ccd5a2cSjsg ib->ptr[ib->length_dw++] = value; /* value */
9157ccd5a2cSjsg ib->ptr[ib->length_dw++] = upper_32_bits(value);
9167ccd5a2cSjsg ib->ptr[ib->length_dw++] = incr; /* increment size */
9177ccd5a2cSjsg ib->ptr[ib->length_dw++] = 0;
9187ccd5a2cSjsg ib->ptr[ib->length_dw++] = ndw; /* number of entries */
9197ccd5a2cSjsg
9207ccd5a2cSjsg pe += ndw * 8;
9217ccd5a2cSjsg addr += ndw * incr;
9227ccd5a2cSjsg count -= ndw;
9237ccd5a2cSjsg }
9247ccd5a2cSjsg }
9257ccd5a2cSjsg
9267ccd5a2cSjsg /**
9277ccd5a2cSjsg * cik_sdma_vm_pad_ib - pad the IB to the required number of dw
9287ccd5a2cSjsg *
9297ccd5a2cSjsg * @ib: indirect buffer to fill with padding
9307ccd5a2cSjsg *
9317ccd5a2cSjsg */
cik_sdma_vm_pad_ib(struct radeon_ib * ib)9327ccd5a2cSjsg void cik_sdma_vm_pad_ib(struct radeon_ib *ib)
9337ccd5a2cSjsg {
9347ccd5a2cSjsg while (ib->length_dw & 0x7)
9357ccd5a2cSjsg ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0);
9367ccd5a2cSjsg }
9377ccd5a2cSjsg
938*5ca02815Sjsg /*
9397ccd5a2cSjsg * cik_dma_vm_flush - cik vm flush using sDMA
9407ccd5a2cSjsg *
9417ccd5a2cSjsg * Update the page table base and flush the VM TLB
9427ccd5a2cSjsg * using sDMA (CIK).
9437ccd5a2cSjsg */
cik_dma_vm_flush(struct radeon_device * rdev,struct radeon_ring * ring,unsigned vm_id,uint64_t pd_addr)9447ccd5a2cSjsg void cik_dma_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
9457ccd5a2cSjsg unsigned vm_id, uint64_t pd_addr)
9467ccd5a2cSjsg {
9477ccd5a2cSjsg u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(0) |
9487ccd5a2cSjsg SDMA_POLL_REG_MEM_EXTRA_FUNC(0)); /* always */
9497ccd5a2cSjsg
9507ccd5a2cSjsg radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
9517ccd5a2cSjsg if (vm_id < 8) {
9527ccd5a2cSjsg radeon_ring_write(ring, (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2);
9537ccd5a2cSjsg } else {
9547ccd5a2cSjsg radeon_ring_write(ring, (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2);
9557ccd5a2cSjsg }
9567ccd5a2cSjsg radeon_ring_write(ring, pd_addr >> 12);
9577ccd5a2cSjsg
9587ccd5a2cSjsg /* update SH_MEM_* regs */
9597ccd5a2cSjsg radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
9607ccd5a2cSjsg radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
9617ccd5a2cSjsg radeon_ring_write(ring, VMID(vm_id));
9627ccd5a2cSjsg
9637ccd5a2cSjsg radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
9647ccd5a2cSjsg radeon_ring_write(ring, SH_MEM_BASES >> 2);
9657ccd5a2cSjsg radeon_ring_write(ring, 0);
9667ccd5a2cSjsg
9677ccd5a2cSjsg radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
9687ccd5a2cSjsg radeon_ring_write(ring, SH_MEM_CONFIG >> 2);
9697ccd5a2cSjsg radeon_ring_write(ring, 0);
9707ccd5a2cSjsg
9717ccd5a2cSjsg radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
9727ccd5a2cSjsg radeon_ring_write(ring, SH_MEM_APE1_BASE >> 2);
9737ccd5a2cSjsg radeon_ring_write(ring, 1);
9747ccd5a2cSjsg
9757ccd5a2cSjsg radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
9767ccd5a2cSjsg radeon_ring_write(ring, SH_MEM_APE1_LIMIT >> 2);
9777ccd5a2cSjsg radeon_ring_write(ring, 0);
9787ccd5a2cSjsg
9797ccd5a2cSjsg radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
9807ccd5a2cSjsg radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
9817ccd5a2cSjsg radeon_ring_write(ring, VMID(0));
9827ccd5a2cSjsg
9837ccd5a2cSjsg /* flush HDP */
9847ccd5a2cSjsg cik_sdma_hdp_flush_ring_emit(rdev, ring->idx);
9857ccd5a2cSjsg
9867ccd5a2cSjsg /* flush TLB */
9877ccd5a2cSjsg radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
9887ccd5a2cSjsg radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
9897ccd5a2cSjsg radeon_ring_write(ring, 1 << vm_id);
9907ccd5a2cSjsg
9917ccd5a2cSjsg radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0, extra_bits));
9927ccd5a2cSjsg radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
9937ccd5a2cSjsg radeon_ring_write(ring, 0);
9947ccd5a2cSjsg radeon_ring_write(ring, 0); /* reference */
9957ccd5a2cSjsg radeon_ring_write(ring, 0); /* mask */
9967ccd5a2cSjsg radeon_ring_write(ring, (0xfff << 16) | 10); /* retry count, poll interval */
9977ccd5a2cSjsg }
9987ccd5a2cSjsg
999