14cd92098Szrj /*
24cd92098Szrj * Copyright 2010 Advanced Micro Devices, Inc.
34cd92098Szrj *
44cd92098Szrj * Permission is hereby granted, free of charge, to any person obtaining a
54cd92098Szrj * copy of this software and associated documentation files (the "Software"),
64cd92098Szrj * to deal in the Software without restriction, including without limitation
74cd92098Szrj * the rights to use, copy, modify, merge, publish, distribute, sublicense,
84cd92098Szrj * and/or sell copies of the Software, and to permit persons to whom the
94cd92098Szrj * Software is furnished to do so, subject to the following conditions:
104cd92098Szrj *
114cd92098Szrj * The above copyright notice and this permission notice shall be included in
124cd92098Szrj * all copies or substantial portions of the Software.
134cd92098Szrj *
144cd92098Szrj * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
154cd92098Szrj * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
164cd92098Szrj * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
174cd92098Szrj * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
184cd92098Szrj * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
194cd92098Szrj * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
204cd92098Szrj * OTHER DEALINGS IN THE SOFTWARE.
214cd92098Szrj *
224cd92098Szrj * Authors: Alex Deucher
234cd92098Szrj */
244cd92098Szrj #include <drm/drmP.h>
254cd92098Szrj #include "radeon.h"
264cd92098Szrj #include "radeon_asic.h"
27c6f73aabSFrançois Tigeot #include "radeon_trace.h"
284cd92098Szrj #include "nid.h"
294cd92098Szrj
304cd92098Szrj /*
314cd92098Szrj * DMA
324cd92098Szrj * Starting with R600, the GPU has an asynchronous
334cd92098Szrj * DMA engine. The programming model is very similar
344cd92098Szrj * to the 3D engine (ring buffer, IBs, etc.), but the
354cd92098Szrj * DMA controller has it's own packet format that is
364cd92098Szrj * different form the PM4 format used by the 3D engine.
374cd92098Szrj * It supports copying data, writing embedded data,
384cd92098Szrj * solid fills, and a number of other things. It also
394cd92098Szrj * has support for tiling/detiling of buffers.
404cd92098Szrj * Cayman and newer support two asynchronous DMA engines.
414cd92098Szrj */
424cd92098Szrj
434cd92098Szrj /**
44c6f73aabSFrançois Tigeot * cayman_dma_get_rptr - get the current read pointer
45c6f73aabSFrançois Tigeot *
46c6f73aabSFrançois Tigeot * @rdev: radeon_device pointer
47c6f73aabSFrançois Tigeot * @ring: radeon ring pointer
48c6f73aabSFrançois Tigeot *
49c6f73aabSFrançois Tigeot * Get the current rptr from the hardware (cayman+).
50c6f73aabSFrançois Tigeot */
cayman_dma_get_rptr(struct radeon_device * rdev,struct radeon_ring * ring)51c6f73aabSFrançois Tigeot uint32_t cayman_dma_get_rptr(struct radeon_device *rdev,
52c6f73aabSFrançois Tigeot struct radeon_ring *ring)
53c6f73aabSFrançois Tigeot {
54c6f73aabSFrançois Tigeot u32 rptr, reg;
55c6f73aabSFrançois Tigeot
56c6f73aabSFrançois Tigeot if (rdev->wb.enabled) {
57c6f73aabSFrançois Tigeot rptr = rdev->wb.wb[ring->rptr_offs/4];
58c6f73aabSFrançois Tigeot } else {
59c6f73aabSFrançois Tigeot if (ring->idx == R600_RING_TYPE_DMA_INDEX)
60c6f73aabSFrançois Tigeot reg = DMA_RB_RPTR + DMA0_REGISTER_OFFSET;
61c6f73aabSFrançois Tigeot else
62c6f73aabSFrançois Tigeot reg = DMA_RB_RPTR + DMA1_REGISTER_OFFSET;
63c6f73aabSFrançois Tigeot
64c6f73aabSFrançois Tigeot rptr = RREG32(reg);
65c6f73aabSFrançois Tigeot }
66c6f73aabSFrançois Tigeot
67c6f73aabSFrançois Tigeot return (rptr & 0x3fffc) >> 2;
68c6f73aabSFrançois Tigeot }
69c6f73aabSFrançois Tigeot
70c6f73aabSFrançois Tigeot /**
71c6f73aabSFrançois Tigeot * cayman_dma_get_wptr - get the current write pointer
72c6f73aabSFrançois Tigeot *
73c6f73aabSFrançois Tigeot * @rdev: radeon_device pointer
74c6f73aabSFrançois Tigeot * @ring: radeon ring pointer
75c6f73aabSFrançois Tigeot *
76c6f73aabSFrançois Tigeot * Get the current wptr from the hardware (cayman+).
77c6f73aabSFrançois Tigeot */
cayman_dma_get_wptr(struct radeon_device * rdev,struct radeon_ring * ring)78c6f73aabSFrançois Tigeot uint32_t cayman_dma_get_wptr(struct radeon_device *rdev,
79c6f73aabSFrançois Tigeot struct radeon_ring *ring)
80c6f73aabSFrançois Tigeot {
81c6f73aabSFrançois Tigeot u32 reg;
82c6f73aabSFrançois Tigeot
83c6f73aabSFrançois Tigeot if (ring->idx == R600_RING_TYPE_DMA_INDEX)
84c6f73aabSFrançois Tigeot reg = DMA_RB_WPTR + DMA0_REGISTER_OFFSET;
85c6f73aabSFrançois Tigeot else
86c6f73aabSFrançois Tigeot reg = DMA_RB_WPTR + DMA1_REGISTER_OFFSET;
87c6f73aabSFrançois Tigeot
88c6f73aabSFrançois Tigeot return (RREG32(reg) & 0x3fffc) >> 2;
89c6f73aabSFrançois Tigeot }
90c6f73aabSFrançois Tigeot
91c6f73aabSFrançois Tigeot /**
92c6f73aabSFrançois Tigeot * cayman_dma_set_wptr - commit the write pointer
93c6f73aabSFrançois Tigeot *
94c6f73aabSFrançois Tigeot * @rdev: radeon_device pointer
95c6f73aabSFrançois Tigeot * @ring: radeon ring pointer
96c6f73aabSFrançois Tigeot *
97c6f73aabSFrançois Tigeot * Write the wptr back to the hardware (cayman+).
98c6f73aabSFrançois Tigeot */
cayman_dma_set_wptr(struct radeon_device * rdev,struct radeon_ring * ring)99c6f73aabSFrançois Tigeot void cayman_dma_set_wptr(struct radeon_device *rdev,
100c6f73aabSFrançois Tigeot struct radeon_ring *ring)
101c6f73aabSFrançois Tigeot {
102c6f73aabSFrançois Tigeot u32 reg;
103c6f73aabSFrançois Tigeot
104c6f73aabSFrançois Tigeot if (ring->idx == R600_RING_TYPE_DMA_INDEX)
105c6f73aabSFrançois Tigeot reg = DMA_RB_WPTR + DMA0_REGISTER_OFFSET;
106c6f73aabSFrançois Tigeot else
107c6f73aabSFrançois Tigeot reg = DMA_RB_WPTR + DMA1_REGISTER_OFFSET;
108c6f73aabSFrançois Tigeot
109c6f73aabSFrançois Tigeot WREG32(reg, (ring->wptr << 2) & 0x3fffc);
110c6f73aabSFrançois Tigeot }
111c6f73aabSFrançois Tigeot
112c6f73aabSFrançois Tigeot /**
1134cd92098Szrj * cayman_dma_ring_ib_execute - Schedule an IB on the DMA engine
1144cd92098Szrj *
1154cd92098Szrj * @rdev: radeon_device pointer
1164cd92098Szrj * @ib: IB object to schedule
1174cd92098Szrj *
1184cd92098Szrj * Schedule an IB in the DMA ring (cayman-SI).
1194cd92098Szrj */
cayman_dma_ring_ib_execute(struct radeon_device * rdev,struct radeon_ib * ib)1204cd92098Szrj void cayman_dma_ring_ib_execute(struct radeon_device *rdev,
1214cd92098Szrj struct radeon_ib *ib)
1224cd92098Szrj {
1234cd92098Szrj struct radeon_ring *ring = &rdev->ring[ib->ring];
124*7dcf36dcSFrançois Tigeot unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
1254cd92098Szrj
1264cd92098Szrj if (rdev->wb.enabled) {
1274cd92098Szrj u32 next_rptr = ring->wptr + 4;
1284cd92098Szrj while ((next_rptr & 7) != 5)
1294cd92098Szrj next_rptr++;
1304cd92098Szrj next_rptr += 3;
1314cd92098Szrj radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1));
1324cd92098Szrj radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
1334cd92098Szrj radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xff);
1344cd92098Szrj radeon_ring_write(ring, next_rptr);
1354cd92098Szrj }
1364cd92098Szrj
1374cd92098Szrj /* The indirect buffer packet must end on an 8 DW boundary in the DMA ring.
1384cd92098Szrj * Pad as necessary with NOPs.
1394cd92098Szrj */
1404cd92098Szrj while ((ring->wptr & 7) != 5)
1414cd92098Szrj radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0));
142*7dcf36dcSFrançois Tigeot radeon_ring_write(ring, DMA_IB_PACKET(DMA_PACKET_INDIRECT_BUFFER, vm_id, 0));
1434cd92098Szrj radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFE0));
1444cd92098Szrj radeon_ring_write(ring, (ib->length_dw << 12) | (upper_32_bits(ib->gpu_addr) & 0xFF));
1454cd92098Szrj
1464cd92098Szrj }
1474cd92098Szrj
1484cd92098Szrj /**
1494cd92098Szrj * cayman_dma_stop - stop the async dma engines
1504cd92098Szrj *
1514cd92098Szrj * @rdev: radeon_device pointer
1524cd92098Szrj *
1534cd92098Szrj * Stop the async dma engines (cayman-SI).
1544cd92098Szrj */
cayman_dma_stop(struct radeon_device * rdev)1554cd92098Szrj void cayman_dma_stop(struct radeon_device *rdev)
1564cd92098Szrj {
1574cd92098Szrj u32 rb_cntl;
1584cd92098Szrj
159c6f73aabSFrançois Tigeot if ((rdev->asic->copy.copy_ring_index == R600_RING_TYPE_DMA_INDEX) ||
160c6f73aabSFrançois Tigeot (rdev->asic->copy.copy_ring_index == CAYMAN_RING_TYPE_DMA1_INDEX))
1614cd92098Szrj radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
1624cd92098Szrj
1634cd92098Szrj /* dma0 */
1644cd92098Szrj rb_cntl = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
1654cd92098Szrj rb_cntl &= ~DMA_RB_ENABLE;
1664cd92098Szrj WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, rb_cntl);
1674cd92098Szrj
1684cd92098Szrj /* dma1 */
1694cd92098Szrj rb_cntl = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
1704cd92098Szrj rb_cntl &= ~DMA_RB_ENABLE;
1714cd92098Szrj WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, rb_cntl);
1724cd92098Szrj
1734cd92098Szrj rdev->ring[R600_RING_TYPE_DMA_INDEX].ready = false;
1744cd92098Szrj rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX].ready = false;
1754cd92098Szrj }
1764cd92098Szrj
1774cd92098Szrj /**
1784cd92098Szrj * cayman_dma_resume - setup and start the async dma engines
1794cd92098Szrj *
1804cd92098Szrj * @rdev: radeon_device pointer
1814cd92098Szrj *
1824cd92098Szrj * Set up the DMA ring buffers and enable them. (cayman-SI).
1834cd92098Szrj * Returns 0 for success, error for failure.
1844cd92098Szrj */
cayman_dma_resume(struct radeon_device * rdev)1854cd92098Szrj int cayman_dma_resume(struct radeon_device *rdev)
1864cd92098Szrj {
1874cd92098Szrj struct radeon_ring *ring;
1884cd92098Szrj u32 rb_cntl, dma_cntl, ib_cntl;
1894cd92098Szrj u32 rb_bufsz;
1904cd92098Szrj u32 reg_offset, wb_offset;
1914cd92098Szrj int i, r;
1924cd92098Szrj
1934cd92098Szrj for (i = 0; i < 2; i++) {
1944cd92098Szrj if (i == 0) {
1954cd92098Szrj ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
1964cd92098Szrj reg_offset = DMA0_REGISTER_OFFSET;
1974cd92098Szrj wb_offset = R600_WB_DMA_RPTR_OFFSET;
1984cd92098Szrj } else {
1994cd92098Szrj ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
2004cd92098Szrj reg_offset = DMA1_REGISTER_OFFSET;
2014cd92098Szrj wb_offset = CAYMAN_WB_DMA1_RPTR_OFFSET;
2024cd92098Szrj }
2034cd92098Szrj
2044cd92098Szrj WREG32(DMA_SEM_INCOMPLETE_TIMER_CNTL + reg_offset, 0);
2054cd92098Szrj WREG32(DMA_SEM_WAIT_FAIL_TIMER_CNTL + reg_offset, 0);
2064cd92098Szrj
2074cd92098Szrj /* Set ring buffer size in dwords */
2084cd92098Szrj rb_bufsz = order_base_2(ring->ring_size / 4);
2094cd92098Szrj rb_cntl = rb_bufsz << 1;
2104cd92098Szrj #ifdef __BIG_ENDIAN
2114cd92098Szrj rb_cntl |= DMA_RB_SWAP_ENABLE | DMA_RPTR_WRITEBACK_SWAP_ENABLE;
2124cd92098Szrj #endif
2134cd92098Szrj WREG32(DMA_RB_CNTL + reg_offset, rb_cntl);
2144cd92098Szrj
2154cd92098Szrj /* Initialize the ring buffer's read and write pointers */
2164cd92098Szrj WREG32(DMA_RB_RPTR + reg_offset, 0);
2174cd92098Szrj WREG32(DMA_RB_WPTR + reg_offset, 0);
2184cd92098Szrj
2194cd92098Szrj /* set the wb address whether it's enabled or not */
2204cd92098Szrj WREG32(DMA_RB_RPTR_ADDR_HI + reg_offset,
2214cd92098Szrj upper_32_bits(rdev->wb.gpu_addr + wb_offset) & 0xFF);
2224cd92098Szrj WREG32(DMA_RB_RPTR_ADDR_LO + reg_offset,
2234cd92098Szrj ((rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC));
2244cd92098Szrj
2254cd92098Szrj if (rdev->wb.enabled)
2264cd92098Szrj rb_cntl |= DMA_RPTR_WRITEBACK_ENABLE;
2274cd92098Szrj
2284cd92098Szrj WREG32(DMA_RB_BASE + reg_offset, ring->gpu_addr >> 8);
2294cd92098Szrj
2304cd92098Szrj /* enable DMA IBs */
2314cd92098Szrj ib_cntl = DMA_IB_ENABLE | CMD_VMID_FORCE;
2324cd92098Szrj #ifdef __BIG_ENDIAN
2334cd92098Szrj ib_cntl |= DMA_IB_SWAP_ENABLE;
2344cd92098Szrj #endif
2354cd92098Szrj WREG32(DMA_IB_CNTL + reg_offset, ib_cntl);
2364cd92098Szrj
2374cd92098Szrj dma_cntl = RREG32(DMA_CNTL + reg_offset);
2384cd92098Szrj dma_cntl &= ~CTXEMPTY_INT_ENABLE;
2394cd92098Szrj WREG32(DMA_CNTL + reg_offset, dma_cntl);
2404cd92098Szrj
2414cd92098Szrj ring->wptr = 0;
2424cd92098Szrj WREG32(DMA_RB_WPTR + reg_offset, ring->wptr << 2);
2434cd92098Szrj
2444cd92098Szrj WREG32(DMA_RB_CNTL + reg_offset, rb_cntl | DMA_RB_ENABLE);
2454cd92098Szrj
2464cd92098Szrj ring->ready = true;
2474cd92098Szrj
2484cd92098Szrj r = radeon_ring_test(rdev, ring->idx, ring);
2494cd92098Szrj if (r) {
2504cd92098Szrj ring->ready = false;
2514cd92098Szrj return r;
2524cd92098Szrj }
2534cd92098Szrj }
2544cd92098Szrj
255c6f73aabSFrançois Tigeot if ((rdev->asic->copy.copy_ring_index == R600_RING_TYPE_DMA_INDEX) ||
256c6f73aabSFrançois Tigeot (rdev->asic->copy.copy_ring_index == CAYMAN_RING_TYPE_DMA1_INDEX))
2574cd92098Szrj radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
2584cd92098Szrj
2594cd92098Szrj return 0;
2604cd92098Szrj }
2614cd92098Szrj
2624cd92098Szrj /**
2634cd92098Szrj * cayman_dma_fini - tear down the async dma engines
2644cd92098Szrj *
2654cd92098Szrj * @rdev: radeon_device pointer
2664cd92098Szrj *
2674cd92098Szrj * Stop the async dma engines and free the rings (cayman-SI).
2684cd92098Szrj */
cayman_dma_fini(struct radeon_device * rdev)2694cd92098Szrj void cayman_dma_fini(struct radeon_device *rdev)
2704cd92098Szrj {
2714cd92098Szrj cayman_dma_stop(rdev);
2724cd92098Szrj radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX]);
2734cd92098Szrj radeon_ring_fini(rdev, &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]);
2744cd92098Szrj }
2754cd92098Szrj
2764cd92098Szrj /**
2774cd92098Szrj * cayman_dma_is_lockup - Check if the DMA engine is locked up
2784cd92098Szrj *
2794cd92098Szrj * @rdev: radeon_device pointer
2804cd92098Szrj * @ring: radeon_ring structure holding ring information
2814cd92098Szrj *
2824cd92098Szrj * Check if the async DMA engine is locked up.
2834cd92098Szrj * Returns true if the engine appears to be locked up, false if not.
2844cd92098Szrj */
cayman_dma_is_lockup(struct radeon_device * rdev,struct radeon_ring * ring)2854cd92098Szrj bool cayman_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
2864cd92098Szrj {
2874cd92098Szrj u32 reset_mask = cayman_gpu_check_soft_reset(rdev);
2884cd92098Szrj u32 mask;
2894cd92098Szrj
2904cd92098Szrj if (ring->idx == R600_RING_TYPE_DMA_INDEX)
2914cd92098Szrj mask = RADEON_RESET_DMA;
2924cd92098Szrj else
2934cd92098Szrj mask = RADEON_RESET_DMA1;
2944cd92098Szrj
2954cd92098Szrj if (!(reset_mask & mask)) {
296c6f73aabSFrançois Tigeot radeon_ring_lockup_update(rdev, ring);
2974cd92098Szrj return false;
2984cd92098Szrj }
2994cd92098Szrj return radeon_ring_test_lockup(rdev, ring);
3004cd92098Szrj }
3014cd92098Szrj
3024cd92098Szrj /**
303c6f73aabSFrançois Tigeot * cayman_dma_vm_copy_pages - update PTEs by copying them from the GART
304c6f73aabSFrançois Tigeot *
305c6f73aabSFrançois Tigeot * @rdev: radeon_device pointer
306c6f73aabSFrançois Tigeot * @ib: indirect buffer to fill with commands
307c6f73aabSFrançois Tigeot * @pe: addr of the page entry
308c6f73aabSFrançois Tigeot * @src: src addr where to copy from
309c6f73aabSFrançois Tigeot * @count: number of page entries to update
310c6f73aabSFrançois Tigeot *
311c6f73aabSFrançois Tigeot * Update PTEs by copying them from the GART using the DMA (cayman/TN).
312c6f73aabSFrançois Tigeot */
cayman_dma_vm_copy_pages(struct radeon_device * rdev,struct radeon_ib * ib,uint64_t pe,uint64_t src,unsigned count)313c6f73aabSFrançois Tigeot void cayman_dma_vm_copy_pages(struct radeon_device *rdev,
314c6f73aabSFrançois Tigeot struct radeon_ib *ib,
315c6f73aabSFrançois Tigeot uint64_t pe, uint64_t src,
316c6f73aabSFrançois Tigeot unsigned count)
317c6f73aabSFrançois Tigeot {
318c6f73aabSFrançois Tigeot unsigned ndw;
319c6f73aabSFrançois Tigeot
320c6f73aabSFrançois Tigeot while (count) {
321c6f73aabSFrançois Tigeot ndw = count * 2;
322c6f73aabSFrançois Tigeot if (ndw > 0xFFFFE)
323c6f73aabSFrançois Tigeot ndw = 0xFFFFE;
324c6f73aabSFrançois Tigeot
325c6f73aabSFrançois Tigeot ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_COPY,
326c6f73aabSFrançois Tigeot 0, 0, ndw);
327c6f73aabSFrançois Tigeot ib->ptr[ib->length_dw++] = lower_32_bits(pe);
328c6f73aabSFrançois Tigeot ib->ptr[ib->length_dw++] = lower_32_bits(src);
329c6f73aabSFrançois Tigeot ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
330c6f73aabSFrançois Tigeot ib->ptr[ib->length_dw++] = upper_32_bits(src) & 0xff;
331c6f73aabSFrançois Tigeot
332c6f73aabSFrançois Tigeot pe += ndw * 4;
333c6f73aabSFrançois Tigeot src += ndw * 4;
334c6f73aabSFrançois Tigeot count -= ndw / 2;
335c6f73aabSFrançois Tigeot }
336c6f73aabSFrançois Tigeot }
337c6f73aabSFrançois Tigeot
338c6f73aabSFrançois Tigeot /**
339c6f73aabSFrançois Tigeot * cayman_dma_vm_write_pages - update PTEs by writing them manually
3404cd92098Szrj *
3414cd92098Szrj * @rdev: radeon_device pointer
3424cd92098Szrj * @ib: indirect buffer to fill with commands
3434cd92098Szrj * @pe: addr of the page entry
3444cd92098Szrj * @addr: dst addr to write into pe
3454cd92098Szrj * @count: number of page entries to update
3464cd92098Szrj * @incr: increase next addr by incr bytes
347c6f73aabSFrançois Tigeot * @flags: hw access flags
3484cd92098Szrj *
349c6f73aabSFrançois Tigeot * Update PTEs by writing them manually using the DMA (cayman/TN).
3504cd92098Szrj */
cayman_dma_vm_write_pages(struct radeon_device * rdev,struct radeon_ib * ib,uint64_t pe,uint64_t addr,unsigned count,uint32_t incr,uint32_t flags)351c6f73aabSFrançois Tigeot void cayman_dma_vm_write_pages(struct radeon_device *rdev,
3524cd92098Szrj struct radeon_ib *ib,
3534cd92098Szrj uint64_t pe,
3544cd92098Szrj uint64_t addr, unsigned count,
3554cd92098Szrj uint32_t incr, uint32_t flags)
3564cd92098Szrj {
3574cd92098Szrj uint64_t value;
3584cd92098Szrj unsigned ndw;
3594cd92098Szrj
3604cd92098Szrj while (count) {
3614cd92098Szrj ndw = count * 2;
3624cd92098Szrj if (ndw > 0xFFFFE)
3634cd92098Szrj ndw = 0xFFFFE;
3644cd92098Szrj
3654cd92098Szrj /* for non-physically contiguous pages (system) */
366c6f73aabSFrançois Tigeot ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_WRITE,
367c6f73aabSFrançois Tigeot 0, 0, ndw);
3684cd92098Szrj ib->ptr[ib->length_dw++] = pe;
3694cd92098Szrj ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
3704cd92098Szrj for (; ndw > 0; ndw -= 2, --count, pe += 8) {
371c6f73aabSFrançois Tigeot if (flags & R600_PTE_SYSTEM) {
3724cd92098Szrj value = radeon_vm_map_gart(rdev, addr);
373c6f73aabSFrançois Tigeot } else if (flags & R600_PTE_VALID) {
3744cd92098Szrj value = addr;
3754cd92098Szrj } else {
3764cd92098Szrj value = 0;
3774cd92098Szrj }
3784cd92098Szrj addr += incr;
379c6f73aabSFrançois Tigeot value |= flags;
3804cd92098Szrj ib->ptr[ib->length_dw++] = value;
3814cd92098Szrj ib->ptr[ib->length_dw++] = upper_32_bits(value);
3824cd92098Szrj }
3834cd92098Szrj }
384c6f73aabSFrançois Tigeot }
385c6f73aabSFrançois Tigeot
386c6f73aabSFrançois Tigeot /**
387c6f73aabSFrançois Tigeot * cayman_dma_vm_set_pages - update the page tables using the DMA
388c6f73aabSFrançois Tigeot *
389c6f73aabSFrançois Tigeot * @rdev: radeon_device pointer
390c6f73aabSFrançois Tigeot * @ib: indirect buffer to fill with commands
391c6f73aabSFrançois Tigeot * @pe: addr of the page entry
392c6f73aabSFrançois Tigeot * @addr: dst addr to write into pe
393c6f73aabSFrançois Tigeot * @count: number of page entries to update
394c6f73aabSFrançois Tigeot * @incr: increase next addr by incr bytes
395c6f73aabSFrançois Tigeot * @flags: hw access flags
396c6f73aabSFrançois Tigeot *
397c6f73aabSFrançois Tigeot * Update the page tables using the DMA (cayman/TN).
398c6f73aabSFrançois Tigeot */
cayman_dma_vm_set_pages(struct radeon_device * rdev,struct radeon_ib * ib,uint64_t pe,uint64_t addr,unsigned count,uint32_t incr,uint32_t flags)399c6f73aabSFrançois Tigeot void cayman_dma_vm_set_pages(struct radeon_device *rdev,
400c6f73aabSFrançois Tigeot struct radeon_ib *ib,
401c6f73aabSFrançois Tigeot uint64_t pe,
402c6f73aabSFrançois Tigeot uint64_t addr, unsigned count,
403c6f73aabSFrançois Tigeot uint32_t incr, uint32_t flags)
404c6f73aabSFrançois Tigeot {
405c6f73aabSFrançois Tigeot uint64_t value;
406c6f73aabSFrançois Tigeot unsigned ndw;
407c6f73aabSFrançois Tigeot
4084cd92098Szrj while (count) {
4094cd92098Szrj ndw = count * 2;
4104cd92098Szrj if (ndw > 0xFFFFE)
4114cd92098Szrj ndw = 0xFFFFE;
4124cd92098Szrj
413c6f73aabSFrançois Tigeot if (flags & R600_PTE_VALID)
4144cd92098Szrj value = addr;
4154cd92098Szrj else
4164cd92098Szrj value = 0;
417c6f73aabSFrançois Tigeot
4184cd92098Szrj /* for physically contiguous pages (vram) */
4194cd92098Szrj ib->ptr[ib->length_dw++] = DMA_PTE_PDE_PACKET(ndw);
4204cd92098Szrj ib->ptr[ib->length_dw++] = pe; /* dst addr */
4214cd92098Szrj ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
422c6f73aabSFrançois Tigeot ib->ptr[ib->length_dw++] = flags; /* mask */
4234cd92098Szrj ib->ptr[ib->length_dw++] = 0;
4244cd92098Szrj ib->ptr[ib->length_dw++] = value; /* value */
4254cd92098Szrj ib->ptr[ib->length_dw++] = upper_32_bits(value);
4264cd92098Szrj ib->ptr[ib->length_dw++] = incr; /* increment size */
4274cd92098Szrj ib->ptr[ib->length_dw++] = 0;
428c6f73aabSFrançois Tigeot
4294cd92098Szrj pe += ndw * 4;
4304cd92098Szrj addr += (ndw / 2) * incr;
4314cd92098Szrj count -= ndw / 2;
4324cd92098Szrj }
4334cd92098Szrj }
434c6f73aabSFrançois Tigeot
435c6f73aabSFrançois Tigeot /**
436c6f73aabSFrançois Tigeot * cayman_dma_vm_pad_ib - pad the IB to the required number of dw
437c6f73aabSFrançois Tigeot *
438c6f73aabSFrançois Tigeot * @ib: indirect buffer to fill with padding
439c6f73aabSFrançois Tigeot *
440c6f73aabSFrançois Tigeot */
cayman_dma_vm_pad_ib(struct radeon_ib * ib)441c6f73aabSFrançois Tigeot void cayman_dma_vm_pad_ib(struct radeon_ib *ib)
442c6f73aabSFrançois Tigeot {
4434cd92098Szrj while (ib->length_dw & 0x7)
4444cd92098Szrj ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0);
4454cd92098Szrj }
4464cd92098Szrj
cayman_dma_vm_flush(struct radeon_device * rdev,struct radeon_ring * ring,unsigned vm_id,uint64_t pd_addr)447*7dcf36dcSFrançois Tigeot void cayman_dma_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
448*7dcf36dcSFrançois Tigeot unsigned vm_id, uint64_t pd_addr)
4494cd92098Szrj {
4504cd92098Szrj radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0));
451*7dcf36dcSFrançois Tigeot radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2));
452*7dcf36dcSFrançois Tigeot radeon_ring_write(ring, pd_addr >> 12);
4534cd92098Szrj
4544cd92098Szrj /* flush hdp cache */
4554cd92098Szrj radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0));
4564cd92098Szrj radeon_ring_write(ring, (0xf << 16) | (HDP_MEM_COHERENCY_FLUSH_CNTL >> 2));
4574cd92098Szrj radeon_ring_write(ring, 1);
4584cd92098Szrj
4594cd92098Szrj /* bits 0-7 are the VM contexts0-7 */
4604cd92098Szrj radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0));
4614cd92098Szrj radeon_ring_write(ring, (0xf << 16) | (VM_INVALIDATE_REQUEST >> 2));
462*7dcf36dcSFrançois Tigeot radeon_ring_write(ring, 1 << vm_id);
463*7dcf36dcSFrançois Tigeot
464*7dcf36dcSFrançois Tigeot /* wait for invalidate to complete */
465*7dcf36dcSFrançois Tigeot radeon_ring_write(ring, DMA_SRBM_READ_PACKET);
466*7dcf36dcSFrançois Tigeot radeon_ring_write(ring, (0xff << 20) | (VM_INVALIDATE_REQUEST >> 2));
467*7dcf36dcSFrançois Tigeot radeon_ring_write(ring, 0); /* mask */
468*7dcf36dcSFrançois Tigeot radeon_ring_write(ring, 0); /* value */
4694cd92098Szrj }
470*7dcf36dcSFrançois Tigeot
471