1 /* $NetBSD: amdgpu_vm.c,v 1.3 2018/08/27 14:04:50 riastradh Exp $ */ 2 3 /* 4 * Copyright 2008 Advanced Micro Devices, Inc. 5 * Copyright 2008 Red Hat Inc. 6 * Copyright 2009 Jerome Glisse. 7 * 8 * Permission is hereby granted, free of charge, to any person obtaining a 9 * copy of this software and associated documentation files (the "Software"), 10 * to deal in the Software without restriction, including without limitation 11 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 12 * and/or sell copies of the Software, and to permit persons to whom the 13 * Software is furnished to do so, subject to the following conditions: 14 * 15 * The above copyright notice and this permission notice shall be included in 16 * all copies or substantial portions of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 21 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 22 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 23 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 24 * OTHER DEALINGS IN THE SOFTWARE. 25 * 26 * Authors: Dave Airlie 27 * Alex Deucher 28 * Jerome Glisse 29 */ 30 #include <sys/cdefs.h> 31 __KERNEL_RCSID(0, "$NetBSD: amdgpu_vm.c,v 1.3 2018/08/27 14:04:50 riastradh Exp $"); 32 33 #include <drm/drmP.h> 34 #include <drm/amdgpu_drm.h> 35 #include "amdgpu.h" 36 #include "amdgpu_trace.h" 37 38 /* 39 * GPUVM 40 * GPUVM is similar to the legacy gart on older asics, however 41 * rather than there being a single global gart table 42 * for the entire GPU, there are multiple VM page tables active 43 * at any given time. The VM page tables can contain a mix 44 * vram pages and system memory pages and system memory pages 45 * can be mapped as snooped (cached system pages) or unsnooped 46 * (uncached system pages). 47 * Each VM has an ID associated with it and there is a page table 48 * associated with each VMID. When execting a command buffer, 49 * the kernel tells the the ring what VMID to use for that command 50 * buffer. VMIDs are allocated dynamically as commands are submitted. 51 * The userspace drivers maintain their own address space and the kernel 52 * sets up their pages tables accordingly when they submit their 53 * command buffers and a VMID is assigned. 54 * Cayman/Trinity support up to 8 active VMs at any given time; 55 * SI supports 16. 56 */ 57 58 /** 59 * amdgpu_vm_num_pde - return the number of page directory entries 60 * 61 * @adev: amdgpu_device pointer 62 * 63 * Calculate the number of page directory entries (cayman+). 64 */ 65 static unsigned amdgpu_vm_num_pdes(struct amdgpu_device *adev) 66 { 67 return adev->vm_manager.max_pfn >> amdgpu_vm_block_size; 68 } 69 70 /** 71 * amdgpu_vm_directory_size - returns the size of the page directory in bytes 72 * 73 * @adev: amdgpu_device pointer 74 * 75 * Calculate the size of the page directory in bytes (cayman+). 76 */ 77 static unsigned amdgpu_vm_directory_size(struct amdgpu_device *adev) 78 { 79 return AMDGPU_GPU_PAGE_ALIGN(amdgpu_vm_num_pdes(adev) * 8); 80 } 81 82 /** 83 * amdgpu_vm_get_bos - add the vm BOs to a validation list 84 * 85 * @vm: vm providing the BOs 86 * @head: head of validation list 87 * 88 * Add the page directory to the list of BOs to 89 * validate for command submission (cayman+). 90 */ 91 struct amdgpu_bo_list_entry *amdgpu_vm_get_bos(struct amdgpu_device *adev, 92 struct amdgpu_vm *vm, 93 struct list_head *head) 94 { 95 struct amdgpu_bo_list_entry *list; 96 unsigned i, idx; 97 98 list = drm_malloc_ab(vm->max_pde_used + 2, 99 sizeof(struct amdgpu_bo_list_entry)); 100 if (!list) { 101 return NULL; 102 } 103 104 /* add the vm page table to the list */ 105 list[0].robj = vm->page_directory; 106 list[0].prefered_domains = AMDGPU_GEM_DOMAIN_VRAM; 107 list[0].allowed_domains = AMDGPU_GEM_DOMAIN_VRAM; 108 list[0].priority = 0; 109 list[0].tv.bo = &vm->page_directory->tbo; 110 list[0].tv.shared = true; 111 list_add(&list[0].tv.head, head); 112 113 for (i = 0, idx = 1; i <= vm->max_pde_used; i++) { 114 if (!vm->page_tables[i].bo) 115 continue; 116 117 list[idx].robj = vm->page_tables[i].bo; 118 list[idx].prefered_domains = AMDGPU_GEM_DOMAIN_VRAM; 119 list[idx].allowed_domains = AMDGPU_GEM_DOMAIN_VRAM; 120 list[idx].priority = 0; 121 list[idx].tv.bo = &list[idx].robj->tbo; 122 list[idx].tv.shared = true; 123 list_add(&list[idx++].tv.head, head); 124 } 125 126 return list; 127 } 128 129 /** 130 * amdgpu_vm_grab_id - allocate the next free VMID 131 * 132 * @vm: vm to allocate id for 133 * @ring: ring we want to submit job to 134 * @sync: sync object where we add dependencies 135 * 136 * Allocate an id for the vm, adding fences to the sync obj as necessary. 137 * 138 * Global mutex must be locked! 139 */ 140 int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring, 141 struct amdgpu_sync *sync) 142 { 143 struct fence *best[AMDGPU_MAX_RINGS] = {}; 144 struct amdgpu_vm_id *vm_id = &vm->ids[ring->idx]; 145 struct amdgpu_device *adev = ring->adev; 146 147 unsigned choices[2] = {}; 148 unsigned i; 149 150 /* check if the id is still valid */ 151 if (vm_id->id) { 152 unsigned id = vm_id->id; 153 long owner; 154 155 owner = atomic_long_read(&adev->vm_manager.ids[id].owner); 156 if (owner == (long)vm) { 157 trace_amdgpu_vm_grab_id(vm_id->id, ring->idx); 158 return 0; 159 } 160 } 161 162 /* we definately need to flush */ 163 vm_id->pd_gpu_addr = ~0ll; 164 165 /* skip over VMID 0, since it is the system VM */ 166 for (i = 1; i < adev->vm_manager.nvm; ++i) { 167 struct fence *fence = adev->vm_manager.ids[i].active; 168 struct amdgpu_ring *fring; 169 170 if (fence == NULL) { 171 /* found a free one */ 172 vm_id->id = i; 173 trace_amdgpu_vm_grab_id(i, ring->idx); 174 return 0; 175 } 176 177 fring = amdgpu_ring_from_fence(fence); 178 if (best[fring->idx] == NULL || 179 fence_is_later(best[fring->idx], fence)) { 180 best[fring->idx] = fence; 181 choices[fring == ring ? 0 : 1] = i; 182 } 183 } 184 185 for (i = 0; i < 2; ++i) { 186 if (choices[i]) { 187 struct fence *fence; 188 189 fence = adev->vm_manager.ids[choices[i]].active; 190 vm_id->id = choices[i]; 191 192 trace_amdgpu_vm_grab_id(choices[i], ring->idx); 193 return amdgpu_sync_fence(ring->adev, sync, fence); 194 } 195 } 196 197 /* should never happen */ 198 BUG(); 199 return -EINVAL; 200 } 201 202 /** 203 * amdgpu_vm_flush - hardware flush the vm 204 * 205 * @ring: ring to use for flush 206 * @vm: vm we want to flush 207 * @updates: last vm update that we waited for 208 * 209 * Flush the vm (cayman+). 210 * 211 * Global and local mutex must be locked! 212 */ 213 void amdgpu_vm_flush(struct amdgpu_ring *ring, 214 struct amdgpu_vm *vm, 215 struct fence *updates) 216 { 217 uint64_t pd_addr = amdgpu_bo_gpu_offset(vm->page_directory); 218 struct amdgpu_vm_id *vm_id = &vm->ids[ring->idx]; 219 struct fence *flushed_updates = vm_id->flushed_updates; 220 bool is_later; 221 222 if (!flushed_updates) 223 is_later = true; 224 else if (!updates) 225 is_later = false; 226 else 227 is_later = fence_is_later(updates, flushed_updates); 228 229 if (pd_addr != vm_id->pd_gpu_addr || is_later) { 230 trace_amdgpu_vm_flush(pd_addr, ring->idx, vm_id->id); 231 if (is_later) { 232 vm_id->flushed_updates = fence_get(updates); 233 fence_put(flushed_updates); 234 } 235 vm_id->pd_gpu_addr = pd_addr; 236 amdgpu_ring_emit_vm_flush(ring, vm_id->id, vm_id->pd_gpu_addr); 237 } 238 } 239 240 /** 241 * amdgpu_vm_fence - remember fence for vm 242 * 243 * @adev: amdgpu_device pointer 244 * @vm: vm we want to fence 245 * @fence: fence to remember 246 * 247 * Fence the vm (cayman+). 248 * Set the fence used to protect page table and id. 249 * 250 * Global and local mutex must be locked! 251 */ 252 void amdgpu_vm_fence(struct amdgpu_device *adev, 253 struct amdgpu_vm *vm, 254 struct fence *fence) 255 { 256 struct amdgpu_ring *ring = amdgpu_ring_from_fence(fence); 257 unsigned vm_id = vm->ids[ring->idx].id; 258 259 fence_put(adev->vm_manager.ids[vm_id].active); 260 adev->vm_manager.ids[vm_id].active = fence_get(fence); 261 atomic_long_set(&adev->vm_manager.ids[vm_id].owner, (long)vm); 262 } 263 264 /** 265 * amdgpu_vm_bo_find - find the bo_va for a specific vm & bo 266 * 267 * @vm: requested vm 268 * @bo: requested buffer object 269 * 270 * Find @bo inside the requested vm (cayman+). 271 * Search inside the @bos vm list for the requested vm 272 * Returns the found bo_va or NULL if none is found 273 * 274 * Object has to be reserved! 275 */ 276 struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm, 277 struct amdgpu_bo *bo) 278 { 279 struct amdgpu_bo_va *bo_va; 280 281 list_for_each_entry(bo_va, &bo->va, bo_list) { 282 if (bo_va->vm == vm) { 283 return bo_va; 284 } 285 } 286 return NULL; 287 } 288 289 /** 290 * amdgpu_vm_update_pages - helper to call the right asic function 291 * 292 * @adev: amdgpu_device pointer 293 * @ib: indirect buffer to fill with commands 294 * @pe: addr of the page entry 295 * @addr: dst addr to write into pe 296 * @count: number of page entries to update 297 * @incr: increase next addr by incr bytes 298 * @flags: hw access flags 299 * @gtt_flags: GTT hw access flags 300 * 301 * Traces the parameters and calls the right asic functions 302 * to setup the page table using the DMA. 303 */ 304 static void amdgpu_vm_update_pages(struct amdgpu_device *adev, 305 struct amdgpu_ib *ib, 306 uint64_t pe, uint64_t addr, 307 unsigned count, uint32_t incr, 308 uint32_t flags, uint32_t gtt_flags) 309 { 310 trace_amdgpu_vm_set_page(pe, addr, count, incr, flags); 311 312 if ((flags & AMDGPU_PTE_SYSTEM) && (flags == gtt_flags)) { 313 uint64_t src = adev->gart.table_addr + (addr >> 12) * 8; 314 amdgpu_vm_copy_pte(adev, ib, pe, src, count); 315 316 } else if ((flags & AMDGPU_PTE_SYSTEM) || (count < 3)) { 317 amdgpu_vm_write_pte(adev, ib, pe, addr, 318 count, incr, flags); 319 320 } else { 321 amdgpu_vm_set_pte_pde(adev, ib, pe, addr, 322 count, incr, flags); 323 } 324 } 325 326 int amdgpu_vm_free_job(struct amdgpu_job *job) 327 { 328 int i; 329 for (i = 0; i < job->num_ibs; i++) 330 amdgpu_ib_free(job->adev, &job->ibs[i]); 331 kfree(job->ibs); 332 return 0; 333 } 334 335 /** 336 * amdgpu_vm_clear_bo - initially clear the page dir/table 337 * 338 * @adev: amdgpu_device pointer 339 * @bo: bo to clear 340 * 341 * need to reserve bo first before calling it. 342 */ 343 static int amdgpu_vm_clear_bo(struct amdgpu_device *adev, 344 struct amdgpu_bo *bo) 345 { 346 struct amdgpu_ring *ring = adev->vm_manager.vm_pte_funcs_ring; 347 struct fence *fence = NULL; 348 struct amdgpu_ib *ib; 349 unsigned entries; 350 uint64_t addr; 351 int r; 352 353 r = reservation_object_reserve_shared(bo->tbo.resv); 354 if (r) 355 return r; 356 357 r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false); 358 if (r) 359 goto error; 360 361 addr = amdgpu_bo_gpu_offset(bo); 362 entries = amdgpu_bo_size(bo) / 8; 363 364 ib = kzalloc(sizeof(struct amdgpu_ib), GFP_KERNEL); 365 if (!ib) 366 goto error; 367 368 r = amdgpu_ib_get(ring, NULL, entries * 2 + 64, ib); 369 if (r) 370 goto error_free; 371 372 ib->length_dw = 0; 373 374 amdgpu_vm_update_pages(adev, ib, addr, 0, entries, 0, 0, 0); 375 amdgpu_vm_pad_ib(adev, ib); 376 WARN_ON(ib->length_dw > 64); 377 r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, ib, 1, 378 &amdgpu_vm_free_job, 379 AMDGPU_FENCE_OWNER_VM, 380 &fence); 381 if (!r) 382 amdgpu_bo_fence(bo, fence, true); 383 fence_put(fence); 384 if (amdgpu_enable_scheduler) 385 return 0; 386 387 error_free: 388 amdgpu_ib_free(adev, ib); 389 kfree(ib); 390 391 error: 392 return r; 393 } 394 395 /** 396 * amdgpu_vm_map_gart - get the physical address of a gart page 397 * 398 * @adev: amdgpu_device pointer 399 * @addr: the unmapped addr 400 * 401 * Look up the physical address of the page that the pte resolves 402 * to (cayman+). 403 * Returns the physical address of the page. 404 */ 405 uint64_t amdgpu_vm_map_gart(struct amdgpu_device *adev, uint64_t addr) 406 { 407 uint64_t result; 408 409 /* page table offset */ 410 result = adev->gart.pages_addr[addr >> PAGE_SHIFT]; 411 412 /* in case cpu page size != gpu page size*/ 413 result |= addr & (~PAGE_MASK); 414 415 return result; 416 } 417 418 /** 419 * amdgpu_vm_update_pdes - make sure that page directory is valid 420 * 421 * @adev: amdgpu_device pointer 422 * @vm: requested vm 423 * @start: start of GPU address range 424 * @end: end of GPU address range 425 * 426 * Allocates new page tables if necessary 427 * and updates the page directory (cayman+). 428 * Returns 0 for success, error for failure. 429 * 430 * Global and local mutex must be locked! 431 */ 432 int amdgpu_vm_update_page_directory(struct amdgpu_device *adev, 433 struct amdgpu_vm *vm) 434 { 435 struct amdgpu_ring *ring = adev->vm_manager.vm_pte_funcs_ring; 436 struct amdgpu_bo *pd = vm->page_directory; 437 uint64_t pd_addr = amdgpu_bo_gpu_offset(pd); 438 uint32_t incr = AMDGPU_VM_PTE_COUNT * 8; 439 uint64_t last_pde = ~0, last_pt = ~0; 440 unsigned count = 0, pt_idx, ndw; 441 struct amdgpu_ib *ib; 442 struct fence *fence = NULL; 443 444 int r; 445 446 /* padding, etc. */ 447 ndw = 64; 448 449 /* assume the worst case */ 450 ndw += vm->max_pde_used * 6; 451 452 /* update too big for an IB */ 453 if (ndw > 0xfffff) 454 return -ENOMEM; 455 456 ib = kzalloc(sizeof(struct amdgpu_ib), GFP_KERNEL); 457 if (!ib) 458 return -ENOMEM; 459 460 r = amdgpu_ib_get(ring, NULL, ndw * 4, ib); 461 if (r) { 462 kfree(ib); 463 return r; 464 } 465 ib->length_dw = 0; 466 467 /* walk over the address space and update the page directory */ 468 for (pt_idx = 0; pt_idx <= vm->max_pde_used; ++pt_idx) { 469 struct amdgpu_bo *bo = vm->page_tables[pt_idx].bo; 470 uint64_t pde, pt; 471 472 if (bo == NULL) 473 continue; 474 475 pt = amdgpu_bo_gpu_offset(bo); 476 if (vm->page_tables[pt_idx].addr == pt) 477 continue; 478 vm->page_tables[pt_idx].addr = pt; 479 480 pde = pd_addr + pt_idx * 8; 481 if (((last_pde + 8 * count) != pde) || 482 ((last_pt + incr * count) != pt)) { 483 484 if (count) { 485 amdgpu_vm_update_pages(adev, ib, last_pde, 486 last_pt, count, incr, 487 AMDGPU_PTE_VALID, 0); 488 } 489 490 count = 1; 491 last_pde = pde; 492 last_pt = pt; 493 } else { 494 ++count; 495 } 496 } 497 498 if (count) 499 amdgpu_vm_update_pages(adev, ib, last_pde, last_pt, count, 500 incr, AMDGPU_PTE_VALID, 0); 501 502 if (ib->length_dw != 0) { 503 amdgpu_vm_pad_ib(adev, ib); 504 amdgpu_sync_resv(adev, &ib->sync, pd->tbo.resv, AMDGPU_FENCE_OWNER_VM); 505 WARN_ON(ib->length_dw > ndw); 506 r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, ib, 1, 507 &amdgpu_vm_free_job, 508 AMDGPU_FENCE_OWNER_VM, 509 &fence); 510 if (r) 511 goto error_free; 512 513 amdgpu_bo_fence(pd, fence, true); 514 fence_put(vm->page_directory_fence); 515 vm->page_directory_fence = fence_get(fence); 516 fence_put(fence); 517 } 518 519 if (!amdgpu_enable_scheduler || ib->length_dw == 0) { 520 amdgpu_ib_free(adev, ib); 521 kfree(ib); 522 } 523 524 return 0; 525 526 error_free: 527 amdgpu_ib_free(adev, ib); 528 kfree(ib); 529 return r; 530 } 531 532 /** 533 * amdgpu_vm_frag_ptes - add fragment information to PTEs 534 * 535 * @adev: amdgpu_device pointer 536 * @ib: IB for the update 537 * @pe_start: first PTE to handle 538 * @pe_end: last PTE to handle 539 * @addr: addr those PTEs should point to 540 * @flags: hw mapping flags 541 * @gtt_flags: GTT hw mapping flags 542 * 543 * Global and local mutex must be locked! 544 */ 545 static void amdgpu_vm_frag_ptes(struct amdgpu_device *adev, 546 struct amdgpu_ib *ib, 547 uint64_t pe_start, uint64_t pe_end, 548 uint64_t addr, uint32_t flags, 549 uint32_t gtt_flags) 550 { 551 /** 552 * The MC L1 TLB supports variable sized pages, based on a fragment 553 * field in the PTE. When this field is set to a non-zero value, page 554 * granularity is increased from 4KB to (1 << (12 + frag)). The PTE 555 * flags are considered valid for all PTEs within the fragment range 556 * and corresponding mappings are assumed to be physically contiguous. 557 * 558 * The L1 TLB can store a single PTE for the whole fragment, 559 * significantly increasing the space available for translation 560 * caching. This leads to large improvements in throughput when the 561 * TLB is under pressure. 562 * 563 * The L2 TLB distributes small and large fragments into two 564 * asymmetric partitions. The large fragment cache is significantly 565 * larger. Thus, we try to use large fragments wherever possible. 566 * Userspace can support this by aligning virtual base address and 567 * allocation size to the fragment size. 568 */ 569 570 /* SI and newer are optimized for 64KB */ 571 uint64_t frag_flags = AMDGPU_PTE_FRAG_64KB; 572 uint64_t frag_align = 0x80; 573 574 #ifdef __NetBSD__ /* XXX ALIGN means something else */ 575 uint64_t frag_start = round_up(pe_start, frag_align); 576 #else 577 uint64_t frag_start = ALIGN(pe_start, frag_align); 578 #endif 579 uint64_t frag_end = pe_end & ~(frag_align - 1); 580 581 unsigned count; 582 583 /* system pages are non continuously */ 584 if ((flags & AMDGPU_PTE_SYSTEM) || !(flags & AMDGPU_PTE_VALID) || 585 (frag_start >= frag_end)) { 586 587 count = (pe_end - pe_start) / 8; 588 amdgpu_vm_update_pages(adev, ib, pe_start, addr, count, 589 AMDGPU_GPU_PAGE_SIZE, flags, gtt_flags); 590 return; 591 } 592 593 /* handle the 4K area at the beginning */ 594 if (pe_start != frag_start) { 595 count = (frag_start - pe_start) / 8; 596 amdgpu_vm_update_pages(adev, ib, pe_start, addr, count, 597 AMDGPU_GPU_PAGE_SIZE, flags, gtt_flags); 598 addr += AMDGPU_GPU_PAGE_SIZE * count; 599 } 600 601 /* handle the area in the middle */ 602 count = (frag_end - frag_start) / 8; 603 amdgpu_vm_update_pages(adev, ib, frag_start, addr, count, 604 AMDGPU_GPU_PAGE_SIZE, flags | frag_flags, 605 gtt_flags); 606 607 /* handle the 4K area at the end */ 608 if (frag_end != pe_end) { 609 addr += AMDGPU_GPU_PAGE_SIZE * count; 610 count = (pe_end - frag_end) / 8; 611 amdgpu_vm_update_pages(adev, ib, frag_end, addr, count, 612 AMDGPU_GPU_PAGE_SIZE, flags, gtt_flags); 613 } 614 } 615 616 /** 617 * amdgpu_vm_update_ptes - make sure that page tables are valid 618 * 619 * @adev: amdgpu_device pointer 620 * @vm: requested vm 621 * @start: start of GPU address range 622 * @end: end of GPU address range 623 * @dst: destination address to map to 624 * @flags: mapping flags 625 * 626 * Update the page tables in the range @start - @end (cayman+). 627 * 628 * Global and local mutex must be locked! 629 */ 630 static int amdgpu_vm_update_ptes(struct amdgpu_device *adev, 631 struct amdgpu_vm *vm, 632 struct amdgpu_ib *ib, 633 uint64_t start, uint64_t end, 634 uint64_t dst, uint32_t flags, 635 uint32_t gtt_flags) 636 { 637 uint64_t mask = AMDGPU_VM_PTE_COUNT - 1; 638 uint64_t last_pte = ~0, last_dst = ~0; 639 void *owner = AMDGPU_FENCE_OWNER_VM; 640 unsigned count = 0; 641 uint64_t addr; 642 643 /* sync to everything on unmapping */ 644 if (!(flags & AMDGPU_PTE_VALID)) 645 owner = AMDGPU_FENCE_OWNER_UNDEFINED; 646 647 /* walk over the address space and update the page tables */ 648 for (addr = start; addr < end; ) { 649 uint64_t pt_idx = addr >> amdgpu_vm_block_size; 650 struct amdgpu_bo *pt = vm->page_tables[pt_idx].bo; 651 unsigned nptes; 652 uint64_t pte; 653 int r; 654 655 amdgpu_sync_resv(adev, &ib->sync, pt->tbo.resv, owner); 656 r = reservation_object_reserve_shared(pt->tbo.resv); 657 if (r) 658 return r; 659 660 if ((addr & ~mask) == (end & ~mask)) 661 nptes = end - addr; 662 else 663 nptes = AMDGPU_VM_PTE_COUNT - (addr & mask); 664 665 pte = amdgpu_bo_gpu_offset(pt); 666 pte += (addr & mask) * 8; 667 668 if ((last_pte + 8 * count) != pte) { 669 670 if (count) { 671 amdgpu_vm_frag_ptes(adev, ib, last_pte, 672 last_pte + 8 * count, 673 last_dst, flags, 674 gtt_flags); 675 } 676 677 count = nptes; 678 last_pte = pte; 679 last_dst = dst; 680 } else { 681 count += nptes; 682 } 683 684 addr += nptes; 685 dst += nptes * AMDGPU_GPU_PAGE_SIZE; 686 } 687 688 if (count) { 689 amdgpu_vm_frag_ptes(adev, ib, last_pte, 690 last_pte + 8 * count, 691 last_dst, flags, gtt_flags); 692 } 693 694 return 0; 695 } 696 697 /** 698 * amdgpu_vm_bo_update_mapping - update a mapping in the vm page table 699 * 700 * @adev: amdgpu_device pointer 701 * @vm: requested vm 702 * @mapping: mapped range and flags to use for the update 703 * @addr: addr to set the area to 704 * @gtt_flags: flags as they are used for GTT 705 * @fence: optional resulting fence 706 * 707 * Fill in the page table entries for @mapping. 708 * Returns 0 for success, -EINVAL for failure. 709 * 710 * Object have to be reserved and mutex must be locked! 711 */ 712 static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, 713 struct amdgpu_vm *vm, 714 struct amdgpu_bo_va_mapping *mapping, 715 uint64_t addr, uint32_t gtt_flags, 716 struct fence **fence) 717 { 718 struct amdgpu_ring *ring = adev->vm_manager.vm_pte_funcs_ring; 719 unsigned nptes, ncmds, ndw; 720 uint32_t flags = gtt_flags; 721 struct amdgpu_ib *ib; 722 struct fence *f = NULL; 723 int r; 724 725 /* normally,bo_va->flags only contians READABLE and WIRTEABLE bit go here 726 * but in case of something, we filter the flags in first place 727 */ 728 if (!(mapping->flags & AMDGPU_PTE_READABLE)) 729 flags &= ~AMDGPU_PTE_READABLE; 730 if (!(mapping->flags & AMDGPU_PTE_WRITEABLE)) 731 flags &= ~AMDGPU_PTE_WRITEABLE; 732 733 trace_amdgpu_vm_bo_update(mapping); 734 735 nptes = mapping->it.last - mapping->it.start + 1; 736 737 /* 738 * reserve space for one command every (1 << BLOCK_SIZE) 739 * entries or 2k dwords (whatever is smaller) 740 */ 741 ncmds = (nptes >> min(amdgpu_vm_block_size, 11)) + 1; 742 743 /* padding, etc. */ 744 ndw = 64; 745 746 if ((flags & AMDGPU_PTE_SYSTEM) && (flags == gtt_flags)) { 747 /* only copy commands needed */ 748 ndw += ncmds * 7; 749 750 } else if (flags & AMDGPU_PTE_SYSTEM) { 751 /* header for write data commands */ 752 ndw += ncmds * 4; 753 754 /* body of write data command */ 755 ndw += nptes * 2; 756 757 } else { 758 /* set page commands needed */ 759 ndw += ncmds * 10; 760 761 /* two extra commands for begin/end of fragment */ 762 ndw += 2 * 10; 763 } 764 765 /* update too big for an IB */ 766 if (ndw > 0xfffff) 767 return -ENOMEM; 768 769 ib = kzalloc(sizeof(struct amdgpu_ib), GFP_KERNEL); 770 if (!ib) 771 return -ENOMEM; 772 773 r = amdgpu_ib_get(ring, NULL, ndw * 4, ib); 774 if (r) { 775 kfree(ib); 776 return r; 777 } 778 779 ib->length_dw = 0; 780 781 r = amdgpu_vm_update_ptes(adev, vm, ib, mapping->it.start, 782 mapping->it.last + 1, addr + mapping->offset, 783 flags, gtt_flags); 784 785 if (r) { 786 amdgpu_ib_free(adev, ib); 787 kfree(ib); 788 return r; 789 } 790 791 amdgpu_vm_pad_ib(adev, ib); 792 WARN_ON(ib->length_dw > ndw); 793 r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, ib, 1, 794 &amdgpu_vm_free_job, 795 AMDGPU_FENCE_OWNER_VM, 796 &f); 797 if (r) 798 goto error_free; 799 800 amdgpu_bo_fence(vm->page_directory, f, true); 801 if (fence) { 802 fence_put(*fence); 803 *fence = fence_get(f); 804 } 805 fence_put(f); 806 if (!amdgpu_enable_scheduler) { 807 amdgpu_ib_free(adev, ib); 808 kfree(ib); 809 } 810 return 0; 811 812 error_free: 813 amdgpu_ib_free(adev, ib); 814 kfree(ib); 815 return r; 816 } 817 818 /** 819 * amdgpu_vm_bo_update - update all BO mappings in the vm page table 820 * 821 * @adev: amdgpu_device pointer 822 * @bo_va: requested BO and VM object 823 * @mem: ttm mem 824 * 825 * Fill in the page table entries for @bo_va. 826 * Returns 0 for success, -EINVAL for failure. 827 * 828 * Object have to be reserved and mutex must be locked! 829 */ 830 int amdgpu_vm_bo_update(struct amdgpu_device *adev, 831 struct amdgpu_bo_va *bo_va, 832 struct ttm_mem_reg *mem) 833 { 834 struct amdgpu_vm *vm = bo_va->vm; 835 struct amdgpu_bo_va_mapping *mapping; 836 uint32_t flags; 837 uint64_t addr; 838 int r; 839 840 if (mem) { 841 addr = (u64)mem->start << PAGE_SHIFT; 842 if (mem->mem_type != TTM_PL_TT) 843 addr += adev->vm_manager.vram_base_offset; 844 } else { 845 addr = 0; 846 } 847 848 flags = amdgpu_ttm_tt_pte_flags(adev, bo_va->bo->tbo.ttm, mem); 849 850 spin_lock(&vm->status_lock); 851 if (!list_empty(&bo_va->vm_status)) 852 list_splice_init(&bo_va->valids, &bo_va->invalids); 853 spin_unlock(&vm->status_lock); 854 855 list_for_each_entry(mapping, &bo_va->invalids, list) { 856 r = amdgpu_vm_bo_update_mapping(adev, vm, mapping, addr, 857 flags, &bo_va->last_pt_update); 858 if (r) 859 return r; 860 } 861 862 if (trace_amdgpu_vm_bo_mapping_enabled()) { 863 list_for_each_entry(mapping, &bo_va->valids, list) 864 trace_amdgpu_vm_bo_mapping(mapping); 865 866 list_for_each_entry(mapping, &bo_va->invalids, list) 867 trace_amdgpu_vm_bo_mapping(mapping); 868 } 869 870 spin_lock(&vm->status_lock); 871 list_splice_init(&bo_va->invalids, &bo_va->valids); 872 list_del_init(&bo_va->vm_status); 873 if (!mem) 874 list_add(&bo_va->vm_status, &vm->cleared); 875 spin_unlock(&vm->status_lock); 876 877 return 0; 878 } 879 880 /** 881 * amdgpu_vm_clear_freed - clear freed BOs in the PT 882 * 883 * @adev: amdgpu_device pointer 884 * @vm: requested vm 885 * 886 * Make sure all freed BOs are cleared in the PT. 887 * Returns 0 for success. 888 * 889 * PTs have to be reserved and mutex must be locked! 890 */ 891 int amdgpu_vm_clear_freed(struct amdgpu_device *adev, 892 struct amdgpu_vm *vm) 893 { 894 struct amdgpu_bo_va_mapping *mapping; 895 int r; 896 897 spin_lock(&vm->freed_lock); 898 while (!list_empty(&vm->freed)) { 899 mapping = list_first_entry(&vm->freed, 900 struct amdgpu_bo_va_mapping, list); 901 list_del(&mapping->list); 902 spin_unlock(&vm->freed_lock); 903 r = amdgpu_vm_bo_update_mapping(adev, vm, mapping, 0, 0, NULL); 904 kfree(mapping); 905 if (r) 906 return r; 907 908 spin_lock(&vm->freed_lock); 909 } 910 spin_unlock(&vm->freed_lock); 911 912 return 0; 913 914 } 915 916 /** 917 * amdgpu_vm_clear_invalids - clear invalidated BOs in the PT 918 * 919 * @adev: amdgpu_device pointer 920 * @vm: requested vm 921 * 922 * Make sure all invalidated BOs are cleared in the PT. 923 * Returns 0 for success. 924 * 925 * PTs have to be reserved and mutex must be locked! 926 */ 927 int amdgpu_vm_clear_invalids(struct amdgpu_device *adev, 928 struct amdgpu_vm *vm, struct amdgpu_sync *sync) 929 { 930 struct amdgpu_bo_va *bo_va = NULL; 931 int r = 0; 932 933 spin_lock(&vm->status_lock); 934 while (!list_empty(&vm->invalidated)) { 935 bo_va = list_first_entry(&vm->invalidated, 936 struct amdgpu_bo_va, vm_status); 937 spin_unlock(&vm->status_lock); 938 mutex_lock(&bo_va->mutex); 939 r = amdgpu_vm_bo_update(adev, bo_va, NULL); 940 mutex_unlock(&bo_va->mutex); 941 if (r) 942 return r; 943 944 spin_lock(&vm->status_lock); 945 } 946 spin_unlock(&vm->status_lock); 947 948 if (bo_va) 949 r = amdgpu_sync_fence(adev, sync, bo_va->last_pt_update); 950 951 return r; 952 } 953 954 /** 955 * amdgpu_vm_bo_add - add a bo to a specific vm 956 * 957 * @adev: amdgpu_device pointer 958 * @vm: requested vm 959 * @bo: amdgpu buffer object 960 * 961 * Add @bo into the requested vm (cayman+). 962 * Add @bo to the list of bos associated with the vm 963 * Returns newly added bo_va or NULL for failure 964 * 965 * Object has to be reserved! 966 */ 967 struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev, 968 struct amdgpu_vm *vm, 969 struct amdgpu_bo *bo) 970 { 971 struct amdgpu_bo_va *bo_va; 972 973 bo_va = kzalloc(sizeof(struct amdgpu_bo_va), GFP_KERNEL); 974 if (bo_va == NULL) { 975 return NULL; 976 } 977 bo_va->vm = vm; 978 bo_va->bo = bo; 979 bo_va->ref_count = 1; 980 INIT_LIST_HEAD(&bo_va->bo_list); 981 INIT_LIST_HEAD(&bo_va->valids); 982 INIT_LIST_HEAD(&bo_va->invalids); 983 INIT_LIST_HEAD(&bo_va->vm_status); 984 #ifdef __NetBSD__ 985 linux_mutex_init(&bo_va->mutex); 986 #else 987 mutex_init(&bo_va->mutex); 988 #endif 989 list_add_tail(&bo_va->bo_list, &bo->va); 990 991 return bo_va; 992 } 993 994 /** 995 * amdgpu_vm_bo_map - map bo inside a vm 996 * 997 * @adev: amdgpu_device pointer 998 * @bo_va: bo_va to store the address 999 * @saddr: where to map the BO 1000 * @offset: requested offset in the BO 1001 * @flags: attributes of pages (read/write/valid/etc.) 1002 * 1003 * Add a mapping of the BO at the specefied addr into the VM. 1004 * Returns 0 for success, error for failure. 1005 * 1006 * Object has to be reserved and unreserved outside! 1007 */ 1008 int amdgpu_vm_bo_map(struct amdgpu_device *adev, 1009 struct amdgpu_bo_va *bo_va, 1010 uint64_t saddr, uint64_t offset, 1011 uint64_t size, uint32_t flags) 1012 { 1013 struct amdgpu_bo_va_mapping *mapping; 1014 struct amdgpu_vm *vm = bo_va->vm; 1015 struct interval_tree_node *it; 1016 unsigned last_pfn, pt_idx; 1017 uint64_t eaddr; 1018 int r; 1019 1020 /* validate the parameters */ 1021 if (saddr & AMDGPU_GPU_PAGE_MASK || offset & AMDGPU_GPU_PAGE_MASK || 1022 size == 0 || size & AMDGPU_GPU_PAGE_MASK) 1023 return -EINVAL; 1024 1025 /* make sure object fit at this offset */ 1026 eaddr = saddr + size - 1; 1027 if ((saddr >= eaddr) || (offset + size > amdgpu_bo_size(bo_va->bo))) 1028 return -EINVAL; 1029 1030 last_pfn = eaddr / AMDGPU_GPU_PAGE_SIZE; 1031 if (last_pfn >= adev->vm_manager.max_pfn) { 1032 dev_err(adev->dev, "va above limit (0x%08X >= 0x%08X)\n", 1033 last_pfn, adev->vm_manager.max_pfn); 1034 return -EINVAL; 1035 } 1036 1037 saddr /= AMDGPU_GPU_PAGE_SIZE; 1038 eaddr /= AMDGPU_GPU_PAGE_SIZE; 1039 1040 spin_lock(&vm->it_lock); 1041 it = interval_tree_iter_first(&vm->va, saddr, eaddr); 1042 spin_unlock(&vm->it_lock); 1043 if (it) { 1044 struct amdgpu_bo_va_mapping *tmp; 1045 tmp = container_of(it, struct amdgpu_bo_va_mapping, it); 1046 /* bo and tmp overlap, invalid addr */ 1047 dev_err(adev->dev, "bo %p va 0x%010"PRIx64"-0x%010"PRIx64" conflict with " 1048 "0x%010lx-0x%010lx\n", bo_va->bo, saddr, eaddr, 1049 tmp->it.start, tmp->it.last + 1); 1050 r = -EINVAL; 1051 goto error; 1052 } 1053 1054 mapping = kmalloc(sizeof(*mapping), GFP_KERNEL); 1055 if (!mapping) { 1056 r = -ENOMEM; 1057 goto error; 1058 } 1059 1060 INIT_LIST_HEAD(&mapping->list); 1061 mapping->it.start = saddr; 1062 mapping->it.last = eaddr; 1063 mapping->offset = offset; 1064 mapping->flags = flags; 1065 1066 mutex_lock(&bo_va->mutex); 1067 list_add(&mapping->list, &bo_va->invalids); 1068 mutex_unlock(&bo_va->mutex); 1069 spin_lock(&vm->it_lock); 1070 interval_tree_insert(&mapping->it, &vm->va); 1071 spin_unlock(&vm->it_lock); 1072 trace_amdgpu_vm_bo_map(bo_va, mapping); 1073 1074 /* Make sure the page tables are allocated */ 1075 saddr >>= amdgpu_vm_block_size; 1076 eaddr >>= amdgpu_vm_block_size; 1077 1078 BUG_ON(eaddr >= amdgpu_vm_num_pdes(adev)); 1079 1080 if (eaddr > vm->max_pde_used) 1081 vm->max_pde_used = eaddr; 1082 1083 /* walk over the address space and allocate the page tables */ 1084 for (pt_idx = saddr; pt_idx <= eaddr; ++pt_idx) { 1085 struct reservation_object *resv = vm->page_directory->tbo.resv; 1086 struct amdgpu_bo *pt; 1087 1088 if (vm->page_tables[pt_idx].bo) 1089 continue; 1090 1091 r = amdgpu_bo_create(adev, AMDGPU_VM_PTE_COUNT * 8, 1092 AMDGPU_GPU_PAGE_SIZE, true, 1093 AMDGPU_GEM_DOMAIN_VRAM, 1094 AMDGPU_GEM_CREATE_NO_CPU_ACCESS, 1095 NULL, resv, &pt); 1096 if (r) 1097 goto error_free; 1098 1099 /* Keep a reference to the page table to avoid freeing 1100 * them up in the wrong order. 1101 */ 1102 pt->parent = amdgpu_bo_ref(vm->page_directory); 1103 1104 r = amdgpu_vm_clear_bo(adev, pt); 1105 if (r) { 1106 amdgpu_bo_unref(&pt); 1107 goto error_free; 1108 } 1109 1110 vm->page_tables[pt_idx].addr = 0; 1111 vm->page_tables[pt_idx].bo = pt; 1112 } 1113 1114 return 0; 1115 1116 error_free: 1117 list_del(&mapping->list); 1118 spin_lock(&vm->it_lock); 1119 interval_tree_remove(&mapping->it, &vm->va); 1120 spin_unlock(&vm->it_lock); 1121 trace_amdgpu_vm_bo_unmap(bo_va, mapping); 1122 kfree(mapping); 1123 1124 error: 1125 return r; 1126 } 1127 1128 /** 1129 * amdgpu_vm_bo_unmap - remove bo mapping from vm 1130 * 1131 * @adev: amdgpu_device pointer 1132 * @bo_va: bo_va to remove the address from 1133 * @saddr: where to the BO is mapped 1134 * 1135 * Remove a mapping of the BO at the specefied addr from the VM. 1136 * Returns 0 for success, error for failure. 1137 * 1138 * Object has to be reserved and unreserved outside! 1139 */ 1140 int amdgpu_vm_bo_unmap(struct amdgpu_device *adev, 1141 struct amdgpu_bo_va *bo_va, 1142 uint64_t saddr) 1143 { 1144 struct amdgpu_bo_va_mapping *mapping; 1145 struct amdgpu_vm *vm = bo_va->vm; 1146 bool valid = true; 1147 1148 saddr /= AMDGPU_GPU_PAGE_SIZE; 1149 mutex_lock(&bo_va->mutex); 1150 list_for_each_entry(mapping, &bo_va->valids, list) { 1151 if (mapping->it.start == saddr) 1152 break; 1153 } 1154 1155 if (&mapping->list == &bo_va->valids) { 1156 valid = false; 1157 1158 list_for_each_entry(mapping, &bo_va->invalids, list) { 1159 if (mapping->it.start == saddr) 1160 break; 1161 } 1162 1163 if (&mapping->list == &bo_va->invalids) { 1164 mutex_unlock(&bo_va->mutex); 1165 return -ENOENT; 1166 } 1167 } 1168 mutex_unlock(&bo_va->mutex); 1169 list_del(&mapping->list); 1170 spin_lock(&vm->it_lock); 1171 interval_tree_remove(&mapping->it, &vm->va); 1172 spin_unlock(&vm->it_lock); 1173 trace_amdgpu_vm_bo_unmap(bo_va, mapping); 1174 1175 if (valid) { 1176 spin_lock(&vm->freed_lock); 1177 list_add(&mapping->list, &vm->freed); 1178 spin_unlock(&vm->freed_lock); 1179 } else { 1180 kfree(mapping); 1181 } 1182 1183 return 0; 1184 } 1185 1186 /** 1187 * amdgpu_vm_bo_rmv - remove a bo to a specific vm 1188 * 1189 * @adev: amdgpu_device pointer 1190 * @bo_va: requested bo_va 1191 * 1192 * Remove @bo_va->bo from the requested vm (cayman+). 1193 * 1194 * Object have to be reserved! 1195 */ 1196 void amdgpu_vm_bo_rmv(struct amdgpu_device *adev, 1197 struct amdgpu_bo_va *bo_va) 1198 { 1199 struct amdgpu_bo_va_mapping *mapping, *next; 1200 struct amdgpu_vm *vm = bo_va->vm; 1201 1202 list_del(&bo_va->bo_list); 1203 1204 spin_lock(&vm->status_lock); 1205 list_del(&bo_va->vm_status); 1206 spin_unlock(&vm->status_lock); 1207 1208 list_for_each_entry_safe(mapping, next, &bo_va->valids, list) { 1209 list_del(&mapping->list); 1210 spin_lock(&vm->it_lock); 1211 interval_tree_remove(&mapping->it, &vm->va); 1212 spin_unlock(&vm->it_lock); 1213 trace_amdgpu_vm_bo_unmap(bo_va, mapping); 1214 spin_lock(&vm->freed_lock); 1215 list_add(&mapping->list, &vm->freed); 1216 spin_unlock(&vm->freed_lock); 1217 } 1218 list_for_each_entry_safe(mapping, next, &bo_va->invalids, list) { 1219 list_del(&mapping->list); 1220 spin_lock(&vm->it_lock); 1221 interval_tree_remove(&mapping->it, &vm->va); 1222 spin_unlock(&vm->it_lock); 1223 kfree(mapping); 1224 } 1225 fence_put(bo_va->last_pt_update); 1226 #ifdef __NetBSD__ 1227 linux_mutex_destroy(&bo_va->mutex); 1228 #else 1229 mutex_destroy(&bo_va->mutex); 1230 #endif 1231 kfree(bo_va); 1232 } 1233 1234 /** 1235 * amdgpu_vm_bo_invalidate - mark the bo as invalid 1236 * 1237 * @adev: amdgpu_device pointer 1238 * @vm: requested vm 1239 * @bo: amdgpu buffer object 1240 * 1241 * Mark @bo as invalid (cayman+). 1242 */ 1243 void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev, 1244 struct amdgpu_bo *bo) 1245 { 1246 struct amdgpu_bo_va *bo_va; 1247 1248 list_for_each_entry(bo_va, &bo->va, bo_list) { 1249 spin_lock(&bo_va->vm->status_lock); 1250 if (list_empty(&bo_va->vm_status)) 1251 list_add(&bo_va->vm_status, &bo_va->vm->invalidated); 1252 spin_unlock(&bo_va->vm->status_lock); 1253 } 1254 } 1255 1256 /** 1257 * amdgpu_vm_init - initialize a vm instance 1258 * 1259 * @adev: amdgpu_device pointer 1260 * @vm: requested vm 1261 * 1262 * Init @vm fields (cayman+). 1263 */ 1264 int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm) 1265 { 1266 const unsigned align = min(AMDGPU_VM_PTB_ALIGN_SIZE, 1267 AMDGPU_VM_PTE_COUNT * 8); 1268 unsigned pd_size, pd_entries; 1269 int i, r; 1270 1271 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { 1272 vm->ids[i].id = 0; 1273 vm->ids[i].flushed_updates = NULL; 1274 } 1275 #ifdef __NetBSD__ 1276 interval_tree_init(&vm->va); 1277 #else 1278 vm->va = RB_ROOT; 1279 #endif 1280 spin_lock_init(&vm->status_lock); 1281 INIT_LIST_HEAD(&vm->invalidated); 1282 INIT_LIST_HEAD(&vm->cleared); 1283 INIT_LIST_HEAD(&vm->freed); 1284 spin_lock_init(&vm->it_lock); 1285 spin_lock_init(&vm->freed_lock); 1286 pd_size = amdgpu_vm_directory_size(adev); 1287 pd_entries = amdgpu_vm_num_pdes(adev); 1288 1289 /* allocate page table array */ 1290 vm->page_tables = drm_calloc_large(pd_entries, sizeof(struct amdgpu_vm_pt)); 1291 if (vm->page_tables == NULL) { 1292 DRM_ERROR("Cannot allocate memory for page table array\n"); 1293 return -ENOMEM; 1294 } 1295 1296 vm->page_directory_fence = NULL; 1297 1298 r = amdgpu_bo_create(adev, pd_size, align, true, 1299 AMDGPU_GEM_DOMAIN_VRAM, 1300 AMDGPU_GEM_CREATE_NO_CPU_ACCESS, 1301 NULL, NULL, &vm->page_directory); 1302 if (r) 1303 return r; 1304 r = amdgpu_bo_reserve(vm->page_directory, false); 1305 if (r) { 1306 amdgpu_bo_unref(&vm->page_directory); 1307 vm->page_directory = NULL; 1308 return r; 1309 } 1310 r = amdgpu_vm_clear_bo(adev, vm->page_directory); 1311 amdgpu_bo_unreserve(vm->page_directory); 1312 if (r) { 1313 amdgpu_bo_unref(&vm->page_directory); 1314 vm->page_directory = NULL; 1315 return r; 1316 } 1317 1318 return 0; 1319 } 1320 1321 /** 1322 * amdgpu_vm_fini - tear down a vm instance 1323 * 1324 * @adev: amdgpu_device pointer 1325 * @vm: requested vm 1326 * 1327 * Tear down @vm (cayman+). 1328 * Unbind the VM and remove all bos from the vm bo list 1329 */ 1330 void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) 1331 { 1332 struct amdgpu_bo_va_mapping *mapping, *tmp; 1333 int i; 1334 1335 if (!RB_EMPTY_ROOT(&vm->va)) { 1336 dev_err(adev->dev, "still active bo inside vm\n"); 1337 } 1338 rbtree_postorder_for_each_entry_safe(mapping, tmp, &vm->va, it.rb) { 1339 list_del(&mapping->list); 1340 interval_tree_remove(&mapping->it, &vm->va); 1341 kfree(mapping); 1342 } 1343 list_for_each_entry_safe(mapping, tmp, &vm->freed, list) { 1344 list_del(&mapping->list); 1345 kfree(mapping); 1346 } 1347 1348 for (i = 0; i < amdgpu_vm_num_pdes(adev); i++) 1349 amdgpu_bo_unref(&vm->page_tables[i].bo); 1350 drm_free_large(vm->page_tables); 1351 1352 amdgpu_bo_unref(&vm->page_directory); 1353 fence_put(vm->page_directory_fence); 1354 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { 1355 unsigned id = vm->ids[i].id; 1356 1357 atomic_long_cmpxchg(&adev->vm_manager.ids[id].owner, 1358 (long)vm, 0); 1359 fence_put(vm->ids[i].flushed_updates); 1360 } 1361 1362 } 1363 1364 /** 1365 * amdgpu_vm_manager_fini - cleanup VM manager 1366 * 1367 * @adev: amdgpu_device pointer 1368 * 1369 * Cleanup the VM manager and free resources. 1370 */ 1371 void amdgpu_vm_manager_fini(struct amdgpu_device *adev) 1372 { 1373 unsigned i; 1374 1375 for (i = 0; i < AMDGPU_NUM_VM; ++i) 1376 fence_put(adev->vm_manager.ids[i].active); 1377 } 1378