1c349dbc7Sjsg /* 2c349dbc7Sjsg * Copyright 2019 Advanced Micro Devices, Inc. 3c349dbc7Sjsg * 4c349dbc7Sjsg * Permission is hereby granted, free of charge, to any person obtaining a 5c349dbc7Sjsg * copy of this software and associated documentation files (the "Software"), 6c349dbc7Sjsg * to deal in the Software without restriction, including without limitation 7c349dbc7Sjsg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8c349dbc7Sjsg * and/or sell copies of the Software, and to permit persons to whom the 9c349dbc7Sjsg * Software is furnished to do so, subject to the following conditions: 10c349dbc7Sjsg * 11c349dbc7Sjsg * The above copyright notice and this permission notice shall be included in 12c349dbc7Sjsg * all copies or substantial portions of the Software. 13c349dbc7Sjsg * 14c349dbc7Sjsg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15c349dbc7Sjsg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16c349dbc7Sjsg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17c349dbc7Sjsg * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18c349dbc7Sjsg * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19c349dbc7Sjsg * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20c349dbc7Sjsg * OTHER DEALINGS IN THE SOFTWARE. 21c349dbc7Sjsg */ 22c349dbc7Sjsg 23c349dbc7Sjsg #include "amdgpu_vm.h" 24c349dbc7Sjsg #include "amdgpu_job.h" 25c349dbc7Sjsg #include "amdgpu_object.h" 26c349dbc7Sjsg #include "amdgpu_trace.h" 27c349dbc7Sjsg 28c349dbc7Sjsg #define AMDGPU_VM_SDMA_MIN_NUM_DW 256u 29c349dbc7Sjsg #define AMDGPU_VM_SDMA_MAX_NUM_DW (16u * 1024u) 30c349dbc7Sjsg 31c349dbc7Sjsg /** 32c349dbc7Sjsg * amdgpu_vm_sdma_map_table - make sure new PDs/PTs are GTT mapped 33c349dbc7Sjsg * 34c349dbc7Sjsg * @table: newly allocated or validated PD/PT 35c349dbc7Sjsg */ 365ca02815Sjsg static int amdgpu_vm_sdma_map_table(struct amdgpu_bo_vm *table) 37c349dbc7Sjsg { 38c349dbc7Sjsg int r; 39c349dbc7Sjsg 405ca02815Sjsg r = amdgpu_ttm_alloc_gart(&table->bo.tbo); 41c349dbc7Sjsg if (r) 42c349dbc7Sjsg return r; 43c349dbc7Sjsg 44c349dbc7Sjsg if (table->shadow) 45c349dbc7Sjsg r = amdgpu_ttm_alloc_gart(&table->shadow->tbo); 46c349dbc7Sjsg 47c349dbc7Sjsg return r; 48c349dbc7Sjsg } 49c349dbc7Sjsg 50f005ef32Sjsg /* Allocate a new job for @count PTE updates */ 51f005ef32Sjsg static int amdgpu_vm_sdma_alloc_job(struct amdgpu_vm_update_params *p, 52f005ef32Sjsg unsigned int count) 53f005ef32Sjsg { 54f005ef32Sjsg enum amdgpu_ib_pool_type pool = p->immediate ? AMDGPU_IB_POOL_IMMEDIATE 55f005ef32Sjsg : AMDGPU_IB_POOL_DELAYED; 56f005ef32Sjsg struct drm_sched_entity *entity = p->immediate ? &p->vm->immediate 57f005ef32Sjsg : &p->vm->delayed; 58f005ef32Sjsg unsigned int ndw; 59f005ef32Sjsg int r; 60f005ef32Sjsg 61f005ef32Sjsg /* estimate how many dw we need */ 62f005ef32Sjsg ndw = AMDGPU_VM_SDMA_MIN_NUM_DW; 63f005ef32Sjsg if (p->pages_addr) 64f005ef32Sjsg ndw += count * 2; 65f005ef32Sjsg ndw = min(ndw, AMDGPU_VM_SDMA_MAX_NUM_DW); 66f005ef32Sjsg 67f005ef32Sjsg r = amdgpu_job_alloc_with_ib(p->adev, entity, AMDGPU_FENCE_OWNER_VM, 68f005ef32Sjsg ndw * 4, pool, &p->job); 69f005ef32Sjsg if (r) 70f005ef32Sjsg return r; 71f005ef32Sjsg 72f005ef32Sjsg p->num_dw_left = ndw; 73f005ef32Sjsg return 0; 74f005ef32Sjsg } 75f005ef32Sjsg 76c349dbc7Sjsg /** 77c349dbc7Sjsg * amdgpu_vm_sdma_prepare - prepare SDMA command submission 78c349dbc7Sjsg * 79c349dbc7Sjsg * @p: see amdgpu_vm_update_params definition 805ca02815Sjsg * @resv: reservation object with embedded fence 815ca02815Sjsg * @sync_mode: synchronization mode 82c349dbc7Sjsg * 83c349dbc7Sjsg * Returns: 84c349dbc7Sjsg * Negativ errno, 0 for success. 85c349dbc7Sjsg */ 86c349dbc7Sjsg static int amdgpu_vm_sdma_prepare(struct amdgpu_vm_update_params *p, 87c349dbc7Sjsg struct dma_resv *resv, 88c349dbc7Sjsg enum amdgpu_sync_mode sync_mode) 89c349dbc7Sjsg { 90f005ef32Sjsg struct amdgpu_sync sync; 91c349dbc7Sjsg int r; 92c349dbc7Sjsg 93f005ef32Sjsg r = amdgpu_vm_sdma_alloc_job(p, 0); 94c349dbc7Sjsg if (r) 95c349dbc7Sjsg return r; 96c349dbc7Sjsg 97c349dbc7Sjsg if (!resv) 98c349dbc7Sjsg return 0; 99c349dbc7Sjsg 100f005ef32Sjsg amdgpu_sync_create(&sync); 101f005ef32Sjsg r = amdgpu_sync_resv(p->adev, &sync, resv, sync_mode, p->vm); 102f005ef32Sjsg if (!r) 103f005ef32Sjsg r = amdgpu_sync_push_to_job(&sync, p->job); 104f005ef32Sjsg amdgpu_sync_free(&sync); 105*6a73d8ccSjsg 106*6a73d8ccSjsg if (r) { 107*6a73d8ccSjsg p->num_dw_left = 0; 108*6a73d8ccSjsg amdgpu_job_free(p->job); 109*6a73d8ccSjsg } 110f005ef32Sjsg return r; 111c349dbc7Sjsg } 112c349dbc7Sjsg 113c349dbc7Sjsg /** 114c349dbc7Sjsg * amdgpu_vm_sdma_commit - commit SDMA command submission 115c349dbc7Sjsg * 116c349dbc7Sjsg * @p: see amdgpu_vm_update_params definition 117c349dbc7Sjsg * @fence: resulting fence 118c349dbc7Sjsg * 119c349dbc7Sjsg * Returns: 120c349dbc7Sjsg * Negativ errno, 0 for success. 121c349dbc7Sjsg */ 122c349dbc7Sjsg static int amdgpu_vm_sdma_commit(struct amdgpu_vm_update_params *p, 123c349dbc7Sjsg struct dma_fence **fence) 124c349dbc7Sjsg { 125c349dbc7Sjsg struct amdgpu_ib *ib = p->job->ibs; 126c349dbc7Sjsg struct amdgpu_ring *ring; 127ad8b1aafSjsg struct dma_fence *f; 128c349dbc7Sjsg 129f005ef32Sjsg ring = container_of(p->vm->delayed.rq->sched, struct amdgpu_ring, 130f005ef32Sjsg sched); 131c349dbc7Sjsg 132c349dbc7Sjsg WARN_ON(ib->length_dw == 0); 133c349dbc7Sjsg amdgpu_ring_pad_ib(ring, ib); 134c349dbc7Sjsg WARN_ON(ib->length_dw > p->num_dw_left); 135f005ef32Sjsg f = amdgpu_job_submit(p->job); 136c349dbc7Sjsg 137ad8b1aafSjsg if (p->unlocked) { 138ad8b1aafSjsg struct dma_fence *tmp = dma_fence_get(f); 139ad8b1aafSjsg 1401bb76ff1Sjsg swap(p->vm->last_unlocked, tmp); 141c349dbc7Sjsg dma_fence_put(tmp); 142c349dbc7Sjsg } else { 1431bb76ff1Sjsg dma_resv_add_fence(p->vm->root.bo->tbo.base.resv, f, 1441bb76ff1Sjsg DMA_RESV_USAGE_BOOKKEEP); 145c349dbc7Sjsg } 146c349dbc7Sjsg 1471bb76ff1Sjsg if (fence && !p->immediate) { 1481bb76ff1Sjsg /* 1491bb76ff1Sjsg * Most hw generations now have a separate queue for page table 1501bb76ff1Sjsg * updates, but when the queue is shared with userspace we need 1511bb76ff1Sjsg * the extra CPU round trip to correctly flush the TLB. 1521bb76ff1Sjsg */ 1531bb76ff1Sjsg set_bit(DRM_SCHED_FENCE_DONT_PIPELINE, &f->flags); 154c349dbc7Sjsg swap(*fence, f); 1551bb76ff1Sjsg } 156c349dbc7Sjsg dma_fence_put(f); 157c349dbc7Sjsg return 0; 158c349dbc7Sjsg } 159c349dbc7Sjsg 160c349dbc7Sjsg /** 161c349dbc7Sjsg * amdgpu_vm_sdma_copy_ptes - copy the PTEs from mapping 162c349dbc7Sjsg * 163c349dbc7Sjsg * @p: see amdgpu_vm_update_params definition 164c349dbc7Sjsg * @bo: PD/PT to update 165c349dbc7Sjsg * @pe: addr of the page entry 166c349dbc7Sjsg * @count: number of page entries to copy 167c349dbc7Sjsg * 168c349dbc7Sjsg * Traces the parameters and calls the DMA function to copy the PTEs. 169c349dbc7Sjsg */ 170c349dbc7Sjsg static void amdgpu_vm_sdma_copy_ptes(struct amdgpu_vm_update_params *p, 171c349dbc7Sjsg struct amdgpu_bo *bo, uint64_t pe, 172c349dbc7Sjsg unsigned count) 173c349dbc7Sjsg { 174c349dbc7Sjsg struct amdgpu_ib *ib = p->job->ibs; 175c349dbc7Sjsg uint64_t src = ib->gpu_addr; 176c349dbc7Sjsg 177c349dbc7Sjsg src += p->num_dw_left * 4; 178c349dbc7Sjsg 179f005ef32Sjsg pe += amdgpu_bo_gpu_offset_no_check(bo); 180ad8b1aafSjsg trace_amdgpu_vm_copy_ptes(pe, src, count, p->immediate); 181c349dbc7Sjsg 182c349dbc7Sjsg amdgpu_vm_copy_pte(p->adev, ib, pe, src, count); 183c349dbc7Sjsg } 184c349dbc7Sjsg 185c349dbc7Sjsg /** 186c349dbc7Sjsg * amdgpu_vm_sdma_set_ptes - helper to call the right asic function 187c349dbc7Sjsg * 188c349dbc7Sjsg * @p: see amdgpu_vm_update_params definition 189c349dbc7Sjsg * @bo: PD/PT to update 190ad8b1aafSjsg * @pe: byte offset of the PDE/PTE, relative to start of PDB/PTB 191c349dbc7Sjsg * @addr: dst addr to write into pe 192c349dbc7Sjsg * @count: number of page entries to update 193c349dbc7Sjsg * @incr: increase next addr by incr bytes 194c349dbc7Sjsg * @flags: hw access flags 195c349dbc7Sjsg * 196c349dbc7Sjsg * Traces the parameters and calls the right asic functions 197c349dbc7Sjsg * to setup the page table using the DMA. 198c349dbc7Sjsg */ 199c349dbc7Sjsg static void amdgpu_vm_sdma_set_ptes(struct amdgpu_vm_update_params *p, 200c349dbc7Sjsg struct amdgpu_bo *bo, uint64_t pe, 201c349dbc7Sjsg uint64_t addr, unsigned count, 202c349dbc7Sjsg uint32_t incr, uint64_t flags) 203c349dbc7Sjsg { 204c349dbc7Sjsg struct amdgpu_ib *ib = p->job->ibs; 205c349dbc7Sjsg 206f005ef32Sjsg pe += amdgpu_bo_gpu_offset_no_check(bo); 207ad8b1aafSjsg trace_amdgpu_vm_set_ptes(pe, addr, count, incr, flags, p->immediate); 208c349dbc7Sjsg if (count < 3) { 209c349dbc7Sjsg amdgpu_vm_write_pte(p->adev, ib, pe, addr | flags, 210c349dbc7Sjsg count, incr); 211c349dbc7Sjsg } else { 212c349dbc7Sjsg amdgpu_vm_set_pte_pde(p->adev, ib, pe, addr, 213c349dbc7Sjsg count, incr, flags); 214c349dbc7Sjsg } 215c349dbc7Sjsg } 216c349dbc7Sjsg 217c349dbc7Sjsg /** 218c349dbc7Sjsg * amdgpu_vm_sdma_update - execute VM update 219c349dbc7Sjsg * 220c349dbc7Sjsg * @p: see amdgpu_vm_update_params definition 2215ca02815Sjsg * @vmbo: PD/PT to update 222ad8b1aafSjsg * @pe: byte offset of the PDE/PTE, relative to start of PDB/PTB 223c349dbc7Sjsg * @addr: dst addr to write into pe 224c349dbc7Sjsg * @count: number of page entries to update 225c349dbc7Sjsg * @incr: increase next addr by incr bytes 226c349dbc7Sjsg * @flags: hw access flags 227c349dbc7Sjsg * 228c349dbc7Sjsg * Reserve space in the IB, setup mapping buffer on demand and write commands to 229c349dbc7Sjsg * the IB. 230c349dbc7Sjsg */ 231c349dbc7Sjsg static int amdgpu_vm_sdma_update(struct amdgpu_vm_update_params *p, 2325ca02815Sjsg struct amdgpu_bo_vm *vmbo, uint64_t pe, 233c349dbc7Sjsg uint64_t addr, unsigned count, uint32_t incr, 234c349dbc7Sjsg uint64_t flags) 235c349dbc7Sjsg { 2365ca02815Sjsg struct amdgpu_bo *bo = &vmbo->bo; 2371bb76ff1Sjsg struct dma_resv_iter cursor; 238c349dbc7Sjsg unsigned int i, ndw, nptes; 2391bb76ff1Sjsg struct dma_fence *fence; 240c349dbc7Sjsg uint64_t *pte; 241c349dbc7Sjsg int r; 242c349dbc7Sjsg 243c349dbc7Sjsg /* Wait for PD/PT moves to be completed */ 2441bb76ff1Sjsg dma_resv_iter_begin(&cursor, bo->tbo.base.resv, DMA_RESV_USAGE_KERNEL); 2451bb76ff1Sjsg dma_resv_for_each_fence_unlocked(&cursor, fence) { 246f005ef32Sjsg dma_fence_get(fence); 247f005ef32Sjsg r = drm_sched_job_add_dependency(&p->job->base, fence); 2481bb76ff1Sjsg if (r) { 249f005ef32Sjsg dma_fence_put(fence); 2501bb76ff1Sjsg dma_resv_iter_end(&cursor); 251c349dbc7Sjsg return r; 2521bb76ff1Sjsg } 2531bb76ff1Sjsg } 2541bb76ff1Sjsg dma_resv_iter_end(&cursor); 255c349dbc7Sjsg 256c349dbc7Sjsg do { 257c349dbc7Sjsg ndw = p->num_dw_left; 258c349dbc7Sjsg ndw -= p->job->ibs->length_dw; 259c349dbc7Sjsg 260c349dbc7Sjsg if (ndw < 32) { 261c349dbc7Sjsg r = amdgpu_vm_sdma_commit(p, NULL); 262c349dbc7Sjsg if (r) 263c349dbc7Sjsg return r; 264c349dbc7Sjsg 265f005ef32Sjsg r = amdgpu_vm_sdma_alloc_job(p, count); 266c349dbc7Sjsg if (r) 267c349dbc7Sjsg return r; 268c349dbc7Sjsg } 269c349dbc7Sjsg 270c349dbc7Sjsg if (!p->pages_addr) { 271c349dbc7Sjsg /* set page commands needed */ 2725ca02815Sjsg if (vmbo->shadow) 2735ca02815Sjsg amdgpu_vm_sdma_set_ptes(p, vmbo->shadow, pe, addr, 274c349dbc7Sjsg count, incr, flags); 275c349dbc7Sjsg amdgpu_vm_sdma_set_ptes(p, bo, pe, addr, count, 276c349dbc7Sjsg incr, flags); 277c349dbc7Sjsg return 0; 278c349dbc7Sjsg } 279c349dbc7Sjsg 280c349dbc7Sjsg /* copy commands needed */ 281c349dbc7Sjsg ndw -= p->adev->vm_manager.vm_pte_funcs->copy_pte_num_dw * 2825ca02815Sjsg (vmbo->shadow ? 2 : 1); 283c349dbc7Sjsg 284c349dbc7Sjsg /* for padding */ 285c349dbc7Sjsg ndw -= 7; 286c349dbc7Sjsg 287c349dbc7Sjsg nptes = min(count, ndw / 2); 288c349dbc7Sjsg 289c349dbc7Sjsg /* Put the PTEs at the end of the IB. */ 290c349dbc7Sjsg p->num_dw_left -= nptes * 2; 291c349dbc7Sjsg pte = (uint64_t *)&(p->job->ibs->ptr[p->num_dw_left]); 292c349dbc7Sjsg for (i = 0; i < nptes; ++i, addr += incr) { 293c349dbc7Sjsg pte[i] = amdgpu_vm_map_gart(p->pages_addr, addr); 294c349dbc7Sjsg pte[i] |= flags; 295c349dbc7Sjsg } 296c349dbc7Sjsg 2975ca02815Sjsg if (vmbo->shadow) 2985ca02815Sjsg amdgpu_vm_sdma_copy_ptes(p, vmbo->shadow, pe, nptes); 299c349dbc7Sjsg amdgpu_vm_sdma_copy_ptes(p, bo, pe, nptes); 300c349dbc7Sjsg 301c349dbc7Sjsg pe += nptes * 8; 302c349dbc7Sjsg count -= nptes; 303c349dbc7Sjsg } while (count); 304c349dbc7Sjsg 305c349dbc7Sjsg return 0; 306c349dbc7Sjsg } 307c349dbc7Sjsg 308c349dbc7Sjsg const struct amdgpu_vm_update_funcs amdgpu_vm_sdma_funcs = { 309c349dbc7Sjsg .map_table = amdgpu_vm_sdma_map_table, 310c349dbc7Sjsg .prepare = amdgpu_vm_sdma_prepare, 311c349dbc7Sjsg .update = amdgpu_vm_sdma_update, 312c349dbc7Sjsg .commit = amdgpu_vm_sdma_commit 313c349dbc7Sjsg }; 314