1 /* 2 * Copyright 2008 Advanced Micro Devices, Inc. 3 * Copyright 2008 Red Hat Inc. 4 * Copyright 2009 Jerome Glisse. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the "Software"), 8 * to deal in the Software without restriction, including without limitation 9 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 10 * and/or sell copies of the Software, and to permit persons to whom the 11 * Software is furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 * OTHER DEALINGS IN THE SOFTWARE. 23 * 24 * Authors: Dave Airlie 25 * Alex Deucher 26 * Jerome Glisse 27 */ 28 #include <linux/dma-fence-array.h> 29 #include <linux/interval_tree_generic.h> 30 #include <linux/idr.h> 31 #include <drm/drmP.h> 32 #include <drm/amdgpu_drm.h> 33 #include "amdgpu.h" 34 #include "amdgpu_trace.h" 35 #include "amdgpu_amdkfd.h" 36 #include "amdgpu_gmc.h" 37 38 /** 39 * DOC: GPUVM 40 * 41 * GPUVM is similar to the legacy gart on older asics, however 42 * rather than there being a single global gart table 43 * for the entire GPU, there are multiple VM page tables active 44 * at any given time. The VM page tables can contain a mix 45 * vram pages and system memory pages and system memory pages 46 * can be mapped as snooped (cached system pages) or unsnooped 47 * (uncached system pages). 48 * Each VM has an ID associated with it and there is a page table 49 * associated with each VMID. When execting a command buffer, 50 * the kernel tells the the ring what VMID to use for that command 51 * buffer. VMIDs are allocated dynamically as commands are submitted. 52 * The userspace drivers maintain their own address space and the kernel 53 * sets up their pages tables accordingly when they submit their 54 * command buffers and a VMID is assigned. 55 * Cayman/Trinity support up to 8 active VMs at any given time; 56 * SI supports 16. 57 */ 58 59 #define START(node) ((node)->start) 60 #define LAST(node) ((node)->last) 61 62 INTERVAL_TREE_DEFINE(struct amdgpu_bo_va_mapping, rb, uint64_t, __subtree_last, 63 START, LAST, static, amdgpu_vm_it) 64 65 #undef START 66 #undef LAST 67 68 /** 69 * struct amdgpu_pte_update_params - Local structure 70 * 71 * Encapsulate some VM table update parameters to reduce 72 * the number of function parameters 73 * 74 */ 75 struct amdgpu_pte_update_params { 76 77 /** 78 * @adev: amdgpu device we do this update for 79 */ 80 struct amdgpu_device *adev; 81 82 /** 83 * @vm: optional amdgpu_vm we do this update for 84 */ 85 struct amdgpu_vm *vm; 86 87 /** 88 * @src: address where to copy page table entries from 89 */ 90 uint64_t src; 91 92 /** 93 * @ib: indirect buffer to fill with commands 94 */ 95 struct amdgpu_ib *ib; 96 97 /** 98 * @func: Function which actually does the update 99 */ 100 void (*func)(struct amdgpu_pte_update_params *params, 101 struct amdgpu_bo *bo, uint64_t pe, 102 uint64_t addr, unsigned count, uint32_t incr, 103 uint64_t flags); 104 /** 105 * @pages_addr: 106 * 107 * DMA addresses to use for mapping, used during VM update by CPU 108 */ 109 dma_addr_t *pages_addr; 110 111 /** 112 * @kptr: 113 * 114 * Kernel pointer of PD/PT BO that needs to be updated, 115 * used during VM update by CPU 116 */ 117 void *kptr; 118 }; 119 120 /** 121 * struct amdgpu_prt_cb - Helper to disable partial resident texture feature from a fence callback 122 */ 123 struct amdgpu_prt_cb { 124 125 /** 126 * @adev: amdgpu device 127 */ 128 struct amdgpu_device *adev; 129 130 /** 131 * @cb: callback 132 */ 133 struct dma_fence_cb cb; 134 }; 135 136 /** 137 * amdgpu_vm_bo_base_init - Adds bo to the list of bos associated with the vm 138 * 139 * @base: base structure for tracking BO usage in a VM 140 * @vm: vm to which bo is to be added 141 * @bo: amdgpu buffer object 142 * 143 * Initialize a bo_va_base structure and add it to the appropriate lists 144 * 145 */ 146 static void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base, 147 struct amdgpu_vm *vm, 148 struct amdgpu_bo *bo) 149 { 150 base->vm = vm; 151 base->bo = bo; 152 INIT_LIST_HEAD(&base->bo_list); 153 INIT_LIST_HEAD(&base->vm_status); 154 155 if (!bo) 156 return; 157 list_add_tail(&base->bo_list, &bo->va); 158 159 if (bo->tbo.type == ttm_bo_type_kernel) 160 list_move(&base->vm_status, &vm->relocated); 161 162 if (bo->tbo.resv != vm->root.base.bo->tbo.resv) 163 return; 164 165 if (bo->preferred_domains & 166 amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type)) 167 return; 168 169 /* 170 * we checked all the prerequisites, but it looks like this per vm bo 171 * is currently evicted. add the bo to the evicted list to make sure it 172 * is validated on next vm use to avoid fault. 173 * */ 174 list_move_tail(&base->vm_status, &vm->evicted); 175 base->moved = true; 176 } 177 178 /** 179 * amdgpu_vm_level_shift - return the addr shift for each level 180 * 181 * @adev: amdgpu_device pointer 182 * @level: VMPT level 183 * 184 * Returns: 185 * The number of bits the pfn needs to be right shifted for a level. 186 */ 187 static unsigned amdgpu_vm_level_shift(struct amdgpu_device *adev, 188 unsigned level) 189 { 190 unsigned shift = 0xff; 191 192 switch (level) { 193 case AMDGPU_VM_PDB2: 194 case AMDGPU_VM_PDB1: 195 case AMDGPU_VM_PDB0: 196 shift = 9 * (AMDGPU_VM_PDB0 - level) + 197 adev->vm_manager.block_size; 198 break; 199 case AMDGPU_VM_PTB: 200 shift = 0; 201 break; 202 default: 203 dev_err(adev->dev, "the level%d isn't supported.\n", level); 204 } 205 206 return shift; 207 } 208 209 /** 210 * amdgpu_vm_num_entries - return the number of entries in a PD/PT 211 * 212 * @adev: amdgpu_device pointer 213 * @level: VMPT level 214 * 215 * Returns: 216 * The number of entries in a page directory or page table. 217 */ 218 static unsigned amdgpu_vm_num_entries(struct amdgpu_device *adev, 219 unsigned level) 220 { 221 unsigned shift = amdgpu_vm_level_shift(adev, 222 adev->vm_manager.root_level); 223 224 if (level == adev->vm_manager.root_level) 225 /* For the root directory */ 226 return round_up(adev->vm_manager.max_pfn, 1 << shift) >> shift; 227 else if (level != AMDGPU_VM_PTB) 228 /* Everything in between */ 229 return 512; 230 else 231 /* For the page tables on the leaves */ 232 return AMDGPU_VM_PTE_COUNT(adev); 233 } 234 235 /** 236 * amdgpu_vm_bo_size - returns the size of the BOs in bytes 237 * 238 * @adev: amdgpu_device pointer 239 * @level: VMPT level 240 * 241 * Returns: 242 * The size of the BO for a page directory or page table in bytes. 243 */ 244 static unsigned amdgpu_vm_bo_size(struct amdgpu_device *adev, unsigned level) 245 { 246 return AMDGPU_GPU_PAGE_ALIGN(amdgpu_vm_num_entries(adev, level) * 8); 247 } 248 249 /** 250 * amdgpu_vm_get_pd_bo - add the VM PD to a validation list 251 * 252 * @vm: vm providing the BOs 253 * @validated: head of validation list 254 * @entry: entry to add 255 * 256 * Add the page directory to the list of BOs to 257 * validate for command submission. 258 */ 259 void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm, 260 struct list_head *validated, 261 struct amdgpu_bo_list_entry *entry) 262 { 263 entry->robj = vm->root.base.bo; 264 entry->priority = 0; 265 entry->tv.bo = &entry->robj->tbo; 266 entry->tv.shared = true; 267 entry->user_pages = NULL; 268 list_add(&entry->tv.head, validated); 269 } 270 271 /** 272 * amdgpu_vm_validate_pt_bos - validate the page table BOs 273 * 274 * @adev: amdgpu device pointer 275 * @vm: vm providing the BOs 276 * @validate: callback to do the validation 277 * @param: parameter for the validation callback 278 * 279 * Validate the page table BOs on command submission if neccessary. 280 * 281 * Returns: 282 * Validation result. 283 */ 284 int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm, 285 int (*validate)(void *p, struct amdgpu_bo *bo), 286 void *param) 287 { 288 struct ttm_bo_global *glob = adev->mman.bdev.glob; 289 struct amdgpu_vm_bo_base *bo_base, *tmp; 290 int r = 0; 291 292 list_for_each_entry_safe(bo_base, tmp, &vm->evicted, vm_status) { 293 struct amdgpu_bo *bo = bo_base->bo; 294 295 if (bo->parent) { 296 r = validate(param, bo); 297 if (r) 298 break; 299 300 spin_lock(&glob->lru_lock); 301 ttm_bo_move_to_lru_tail(&bo->tbo); 302 if (bo->shadow) 303 ttm_bo_move_to_lru_tail(&bo->shadow->tbo); 304 spin_unlock(&glob->lru_lock); 305 } 306 307 if (bo->tbo.type != ttm_bo_type_kernel) { 308 spin_lock(&vm->moved_lock); 309 list_move(&bo_base->vm_status, &vm->moved); 310 spin_unlock(&vm->moved_lock); 311 } else { 312 list_move(&bo_base->vm_status, &vm->relocated); 313 } 314 } 315 316 spin_lock(&glob->lru_lock); 317 list_for_each_entry(bo_base, &vm->idle, vm_status) { 318 struct amdgpu_bo *bo = bo_base->bo; 319 320 if (!bo->parent) 321 continue; 322 323 ttm_bo_move_to_lru_tail(&bo->tbo); 324 if (bo->shadow) 325 ttm_bo_move_to_lru_tail(&bo->shadow->tbo); 326 } 327 spin_unlock(&glob->lru_lock); 328 329 return r; 330 } 331 332 /** 333 * amdgpu_vm_ready - check VM is ready for updates 334 * 335 * @vm: VM to check 336 * 337 * Check if all VM PDs/PTs are ready for updates 338 * 339 * Returns: 340 * True if eviction list is empty. 341 */ 342 bool amdgpu_vm_ready(struct amdgpu_vm *vm) 343 { 344 return list_empty(&vm->evicted); 345 } 346 347 /** 348 * amdgpu_vm_clear_bo - initially clear the PDs/PTs 349 * 350 * @adev: amdgpu_device pointer 351 * @vm: VM to clear BO from 352 * @bo: BO to clear 353 * @level: level this BO is at 354 * @pte_support_ats: indicate ATS support from PTE 355 * 356 * Root PD needs to be reserved when calling this. 357 * 358 * Returns: 359 * 0 on success, errno otherwise. 360 */ 361 static int amdgpu_vm_clear_bo(struct amdgpu_device *adev, 362 struct amdgpu_vm *vm, struct amdgpu_bo *bo, 363 unsigned level, bool pte_support_ats) 364 { 365 struct ttm_operation_ctx ctx = { true, false }; 366 struct dma_fence *fence = NULL; 367 unsigned entries, ats_entries; 368 struct amdgpu_ring *ring; 369 struct amdgpu_job *job; 370 uint64_t addr; 371 int r; 372 373 entries = amdgpu_bo_size(bo) / 8; 374 375 if (pte_support_ats) { 376 if (level == adev->vm_manager.root_level) { 377 ats_entries = amdgpu_vm_level_shift(adev, level); 378 ats_entries += AMDGPU_GPU_PAGE_SHIFT; 379 ats_entries = AMDGPU_VA_HOLE_START >> ats_entries; 380 ats_entries = min(ats_entries, entries); 381 entries -= ats_entries; 382 } else { 383 ats_entries = entries; 384 entries = 0; 385 } 386 } else { 387 ats_entries = 0; 388 } 389 390 ring = container_of(vm->entity.rq->sched, struct amdgpu_ring, sched); 391 392 r = reservation_object_reserve_shared(bo->tbo.resv); 393 if (r) 394 return r; 395 396 r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); 397 if (r) 398 goto error; 399 400 r = amdgpu_job_alloc_with_ib(adev, 64, &job); 401 if (r) 402 goto error; 403 404 addr = amdgpu_bo_gpu_offset(bo); 405 if (ats_entries) { 406 uint64_t ats_value; 407 408 ats_value = AMDGPU_PTE_DEFAULT_ATC; 409 if (level != AMDGPU_VM_PTB) 410 ats_value |= AMDGPU_PDE_PTE; 411 412 amdgpu_vm_set_pte_pde(adev, &job->ibs[0], addr, 0, 413 ats_entries, 0, ats_value); 414 addr += ats_entries * 8; 415 } 416 417 if (entries) 418 amdgpu_vm_set_pte_pde(adev, &job->ibs[0], addr, 0, 419 entries, 0, 0); 420 421 amdgpu_ring_pad_ib(ring, &job->ibs[0]); 422 423 WARN_ON(job->ibs[0].length_dw > 64); 424 r = amdgpu_sync_resv(adev, &job->sync, bo->tbo.resv, 425 AMDGPU_FENCE_OWNER_UNDEFINED, false); 426 if (r) 427 goto error_free; 428 429 r = amdgpu_job_submit(job, &vm->entity, AMDGPU_FENCE_OWNER_UNDEFINED, 430 &fence); 431 if (r) 432 goto error_free; 433 434 amdgpu_bo_fence(bo, fence, true); 435 dma_fence_put(fence); 436 437 if (bo->shadow) 438 return amdgpu_vm_clear_bo(adev, vm, bo->shadow, 439 level, pte_support_ats); 440 441 return 0; 442 443 error_free: 444 amdgpu_job_free(job); 445 446 error: 447 return r; 448 } 449 450 /** 451 * amdgpu_vm_alloc_levels - allocate the PD/PT levels 452 * 453 * @adev: amdgpu_device pointer 454 * @vm: requested vm 455 * @parent: parent PT 456 * @saddr: start of the address range 457 * @eaddr: end of the address range 458 * @level: VMPT level 459 * @ats: indicate ATS support from PTE 460 * 461 * Make sure the page directories and page tables are allocated 462 * 463 * Returns: 464 * 0 on success, errno otherwise. 465 */ 466 static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev, 467 struct amdgpu_vm *vm, 468 struct amdgpu_vm_pt *parent, 469 uint64_t saddr, uint64_t eaddr, 470 unsigned level, bool ats) 471 { 472 unsigned shift = amdgpu_vm_level_shift(adev, level); 473 unsigned pt_idx, from, to; 474 u64 flags; 475 int r; 476 477 if (!parent->entries) { 478 unsigned num_entries = amdgpu_vm_num_entries(adev, level); 479 480 parent->entries = kvmalloc_array(num_entries, 481 sizeof(struct amdgpu_vm_pt), 482 GFP_KERNEL | __GFP_ZERO); 483 if (!parent->entries) 484 return -ENOMEM; 485 memset(parent->entries, 0 , sizeof(struct amdgpu_vm_pt)); 486 } 487 488 from = saddr >> shift; 489 to = eaddr >> shift; 490 if (from >= amdgpu_vm_num_entries(adev, level) || 491 to >= amdgpu_vm_num_entries(adev, level)) 492 return -EINVAL; 493 494 ++level; 495 saddr = saddr & ((1 << shift) - 1); 496 eaddr = eaddr & ((1 << shift) - 1); 497 498 flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; 499 if (vm->root.base.bo->shadow) 500 flags |= AMDGPU_GEM_CREATE_SHADOW; 501 if (vm->use_cpu_for_update) 502 flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; 503 else 504 flags |= AMDGPU_GEM_CREATE_NO_CPU_ACCESS; 505 506 /* walk over the address space and allocate the page tables */ 507 for (pt_idx = from; pt_idx <= to; ++pt_idx) { 508 struct reservation_object *resv = vm->root.base.bo->tbo.resv; 509 struct amdgpu_vm_pt *entry = &parent->entries[pt_idx]; 510 struct amdgpu_bo *pt; 511 512 if (!entry->base.bo) { 513 struct amdgpu_bo_param bp; 514 515 memset(&bp, 0, sizeof(bp)); 516 bp.size = amdgpu_vm_bo_size(adev, level); 517 bp.byte_align = AMDGPU_GPU_PAGE_SIZE; 518 bp.domain = AMDGPU_GEM_DOMAIN_VRAM; 519 bp.flags = flags; 520 bp.type = ttm_bo_type_kernel; 521 bp.resv = resv; 522 r = amdgpu_bo_create(adev, &bp, &pt); 523 if (r) 524 return r; 525 526 r = amdgpu_vm_clear_bo(adev, vm, pt, level, ats); 527 if (r) { 528 amdgpu_bo_unref(&pt->shadow); 529 amdgpu_bo_unref(&pt); 530 return r; 531 } 532 533 if (vm->use_cpu_for_update) { 534 r = amdgpu_bo_kmap(pt, NULL); 535 if (r) { 536 amdgpu_bo_unref(&pt->shadow); 537 amdgpu_bo_unref(&pt); 538 return r; 539 } 540 } 541 542 /* Keep a reference to the root directory to avoid 543 * freeing them up in the wrong order. 544 */ 545 pt->parent = amdgpu_bo_ref(parent->base.bo); 546 547 amdgpu_vm_bo_base_init(&entry->base, vm, pt); 548 } 549 550 if (level < AMDGPU_VM_PTB) { 551 uint64_t sub_saddr = (pt_idx == from) ? saddr : 0; 552 uint64_t sub_eaddr = (pt_idx == to) ? eaddr : 553 ((1 << shift) - 1); 554 r = amdgpu_vm_alloc_levels(adev, vm, entry, sub_saddr, 555 sub_eaddr, level, ats); 556 if (r) 557 return r; 558 } 559 } 560 561 return 0; 562 } 563 564 /** 565 * amdgpu_vm_alloc_pts - Allocate page tables. 566 * 567 * @adev: amdgpu_device pointer 568 * @vm: VM to allocate page tables for 569 * @saddr: Start address which needs to be allocated 570 * @size: Size from start address we need. 571 * 572 * Make sure the page tables are allocated. 573 * 574 * Returns: 575 * 0 on success, errno otherwise. 576 */ 577 int amdgpu_vm_alloc_pts(struct amdgpu_device *adev, 578 struct amdgpu_vm *vm, 579 uint64_t saddr, uint64_t size) 580 { 581 uint64_t eaddr; 582 bool ats = false; 583 584 /* validate the parameters */ 585 if (saddr & AMDGPU_GPU_PAGE_MASK || size & AMDGPU_GPU_PAGE_MASK) 586 return -EINVAL; 587 588 eaddr = saddr + size - 1; 589 590 if (vm->pte_support_ats) 591 ats = saddr < AMDGPU_VA_HOLE_START; 592 593 saddr /= AMDGPU_GPU_PAGE_SIZE; 594 eaddr /= AMDGPU_GPU_PAGE_SIZE; 595 596 if (eaddr >= adev->vm_manager.max_pfn) { 597 dev_err(adev->dev, "va above limit (0x%08llX >= 0x%08llX)\n", 598 eaddr, adev->vm_manager.max_pfn); 599 return -EINVAL; 600 } 601 602 return amdgpu_vm_alloc_levels(adev, vm, &vm->root, saddr, eaddr, 603 adev->vm_manager.root_level, ats); 604 } 605 606 /** 607 * amdgpu_vm_check_compute_bug - check whether asic has compute vm bug 608 * 609 * @adev: amdgpu_device pointer 610 */ 611 void amdgpu_vm_check_compute_bug(struct amdgpu_device *adev) 612 { 613 const struct amdgpu_ip_block *ip_block; 614 bool has_compute_vm_bug; 615 struct amdgpu_ring *ring; 616 int i; 617 618 has_compute_vm_bug = false; 619 620 ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GFX); 621 if (ip_block) { 622 /* Compute has a VM bug for GFX version < 7. 623 Compute has a VM bug for GFX 8 MEC firmware version < 673.*/ 624 if (ip_block->version->major <= 7) 625 has_compute_vm_bug = true; 626 else if (ip_block->version->major == 8) 627 if (adev->gfx.mec_fw_version < 673) 628 has_compute_vm_bug = true; 629 } 630 631 for (i = 0; i < adev->num_rings; i++) { 632 ring = adev->rings[i]; 633 if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) 634 /* only compute rings */ 635 ring->has_compute_vm_bug = has_compute_vm_bug; 636 else 637 ring->has_compute_vm_bug = false; 638 } 639 } 640 641 /** 642 * amdgpu_vm_need_pipeline_sync - Check if pipe sync is needed for job. 643 * 644 * @ring: ring on which the job will be submitted 645 * @job: job to submit 646 * 647 * Returns: 648 * True if sync is needed. 649 */ 650 bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring, 651 struct amdgpu_job *job) 652 { 653 struct amdgpu_device *adev = ring->adev; 654 unsigned vmhub = ring->funcs->vmhub; 655 struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub]; 656 struct amdgpu_vmid *id; 657 bool gds_switch_needed; 658 bool vm_flush_needed = job->vm_needs_flush || ring->has_compute_vm_bug; 659 660 if (job->vmid == 0) 661 return false; 662 id = &id_mgr->ids[job->vmid]; 663 gds_switch_needed = ring->funcs->emit_gds_switch && ( 664 id->gds_base != job->gds_base || 665 id->gds_size != job->gds_size || 666 id->gws_base != job->gws_base || 667 id->gws_size != job->gws_size || 668 id->oa_base != job->oa_base || 669 id->oa_size != job->oa_size); 670 671 if (amdgpu_vmid_had_gpu_reset(adev, id)) 672 return true; 673 674 return vm_flush_needed || gds_switch_needed; 675 } 676 677 /** 678 * amdgpu_vm_flush - hardware flush the vm 679 * 680 * @ring: ring to use for flush 681 * @job: related job 682 * @need_pipe_sync: is pipe sync needed 683 * 684 * Emit a VM flush when it is necessary. 685 * 686 * Returns: 687 * 0 on success, errno otherwise. 688 */ 689 int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_pipe_sync) 690 { 691 struct amdgpu_device *adev = ring->adev; 692 unsigned vmhub = ring->funcs->vmhub; 693 struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub]; 694 struct amdgpu_vmid *id = &id_mgr->ids[job->vmid]; 695 bool gds_switch_needed = ring->funcs->emit_gds_switch && ( 696 id->gds_base != job->gds_base || 697 id->gds_size != job->gds_size || 698 id->gws_base != job->gws_base || 699 id->gws_size != job->gws_size || 700 id->oa_base != job->oa_base || 701 id->oa_size != job->oa_size); 702 bool vm_flush_needed = job->vm_needs_flush; 703 struct dma_fence *fence = NULL; 704 bool pasid_mapping_needed = false; 705 unsigned patch_offset = 0; 706 int r; 707 708 if (amdgpu_vmid_had_gpu_reset(adev, id)) { 709 gds_switch_needed = true; 710 vm_flush_needed = true; 711 pasid_mapping_needed = true; 712 } 713 714 mutex_lock(&id_mgr->lock); 715 if (id->pasid != job->pasid || !id->pasid_mapping || 716 !dma_fence_is_signaled(id->pasid_mapping)) 717 pasid_mapping_needed = true; 718 mutex_unlock(&id_mgr->lock); 719 720 gds_switch_needed &= !!ring->funcs->emit_gds_switch; 721 vm_flush_needed &= !!ring->funcs->emit_vm_flush && 722 job->vm_pd_addr != AMDGPU_BO_INVALID_OFFSET; 723 pasid_mapping_needed &= adev->gmc.gmc_funcs->emit_pasid_mapping && 724 ring->funcs->emit_wreg; 725 726 if (!vm_flush_needed && !gds_switch_needed && !need_pipe_sync) 727 return 0; 728 729 if (ring->funcs->init_cond_exec) 730 patch_offset = amdgpu_ring_init_cond_exec(ring); 731 732 if (need_pipe_sync) 733 amdgpu_ring_emit_pipeline_sync(ring); 734 735 if (vm_flush_needed) { 736 trace_amdgpu_vm_flush(ring, job->vmid, job->vm_pd_addr); 737 amdgpu_ring_emit_vm_flush(ring, job->vmid, job->vm_pd_addr); 738 } 739 740 if (pasid_mapping_needed) 741 amdgpu_gmc_emit_pasid_mapping(ring, job->vmid, job->pasid); 742 743 if (vm_flush_needed || pasid_mapping_needed) { 744 r = amdgpu_fence_emit(ring, &fence, 0); 745 if (r) 746 return r; 747 } 748 749 if (vm_flush_needed) { 750 mutex_lock(&id_mgr->lock); 751 dma_fence_put(id->last_flush); 752 id->last_flush = dma_fence_get(fence); 753 id->current_gpu_reset_count = 754 atomic_read(&adev->gpu_reset_counter); 755 mutex_unlock(&id_mgr->lock); 756 } 757 758 if (pasid_mapping_needed) { 759 mutex_lock(&id_mgr->lock); 760 id->pasid = job->pasid; 761 dma_fence_put(id->pasid_mapping); 762 id->pasid_mapping = dma_fence_get(fence); 763 mutex_unlock(&id_mgr->lock); 764 } 765 dma_fence_put(fence); 766 767 if (ring->funcs->emit_gds_switch && gds_switch_needed) { 768 id->gds_base = job->gds_base; 769 id->gds_size = job->gds_size; 770 id->gws_base = job->gws_base; 771 id->gws_size = job->gws_size; 772 id->oa_base = job->oa_base; 773 id->oa_size = job->oa_size; 774 amdgpu_ring_emit_gds_switch(ring, job->vmid, job->gds_base, 775 job->gds_size, job->gws_base, 776 job->gws_size, job->oa_base, 777 job->oa_size); 778 } 779 780 if (ring->funcs->patch_cond_exec) 781 amdgpu_ring_patch_cond_exec(ring, patch_offset); 782 783 /* the double SWITCH_BUFFER here *cannot* be skipped by COND_EXEC */ 784 if (ring->funcs->emit_switch_buffer) { 785 amdgpu_ring_emit_switch_buffer(ring); 786 amdgpu_ring_emit_switch_buffer(ring); 787 } 788 return 0; 789 } 790 791 /** 792 * amdgpu_vm_bo_find - find the bo_va for a specific vm & bo 793 * 794 * @vm: requested vm 795 * @bo: requested buffer object 796 * 797 * Find @bo inside the requested vm. 798 * Search inside the @bos vm list for the requested vm 799 * Returns the found bo_va or NULL if none is found 800 * 801 * Object has to be reserved! 802 * 803 * Returns: 804 * Found bo_va or NULL. 805 */ 806 struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm, 807 struct amdgpu_bo *bo) 808 { 809 struct amdgpu_bo_va *bo_va; 810 811 list_for_each_entry(bo_va, &bo->va, base.bo_list) { 812 if (bo_va->base.vm == vm) { 813 return bo_va; 814 } 815 } 816 return NULL; 817 } 818 819 /** 820 * amdgpu_vm_do_set_ptes - helper to call the right asic function 821 * 822 * @params: see amdgpu_pte_update_params definition 823 * @bo: PD/PT to update 824 * @pe: addr of the page entry 825 * @addr: dst addr to write into pe 826 * @count: number of page entries to update 827 * @incr: increase next addr by incr bytes 828 * @flags: hw access flags 829 * 830 * Traces the parameters and calls the right asic functions 831 * to setup the page table using the DMA. 832 */ 833 static void amdgpu_vm_do_set_ptes(struct amdgpu_pte_update_params *params, 834 struct amdgpu_bo *bo, 835 uint64_t pe, uint64_t addr, 836 unsigned count, uint32_t incr, 837 uint64_t flags) 838 { 839 pe += amdgpu_bo_gpu_offset(bo); 840 trace_amdgpu_vm_set_ptes(pe, addr, count, incr, flags); 841 842 if (count < 3) { 843 amdgpu_vm_write_pte(params->adev, params->ib, pe, 844 addr | flags, count, incr); 845 846 } else { 847 amdgpu_vm_set_pte_pde(params->adev, params->ib, pe, addr, 848 count, incr, flags); 849 } 850 } 851 852 /** 853 * amdgpu_vm_do_copy_ptes - copy the PTEs from the GART 854 * 855 * @params: see amdgpu_pte_update_params definition 856 * @bo: PD/PT to update 857 * @pe: addr of the page entry 858 * @addr: dst addr to write into pe 859 * @count: number of page entries to update 860 * @incr: increase next addr by incr bytes 861 * @flags: hw access flags 862 * 863 * Traces the parameters and calls the DMA function to copy the PTEs. 864 */ 865 static void amdgpu_vm_do_copy_ptes(struct amdgpu_pte_update_params *params, 866 struct amdgpu_bo *bo, 867 uint64_t pe, uint64_t addr, 868 unsigned count, uint32_t incr, 869 uint64_t flags) 870 { 871 uint64_t src = (params->src + (addr >> 12) * 8); 872 873 pe += amdgpu_bo_gpu_offset(bo); 874 trace_amdgpu_vm_copy_ptes(pe, src, count); 875 876 amdgpu_vm_copy_pte(params->adev, params->ib, pe, src, count); 877 } 878 879 /** 880 * amdgpu_vm_map_gart - Resolve gart mapping of addr 881 * 882 * @pages_addr: optional DMA address to use for lookup 883 * @addr: the unmapped addr 884 * 885 * Look up the physical address of the page that the pte resolves 886 * to. 887 * 888 * Returns: 889 * The pointer for the page table entry. 890 */ 891 static uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr) 892 { 893 uint64_t result; 894 895 /* page table offset */ 896 result = pages_addr[addr >> PAGE_SHIFT]; 897 898 /* in case cpu page size != gpu page size*/ 899 result |= addr & (~PAGE_MASK); 900 901 result &= 0xFFFFFFFFFFFFF000ULL; 902 903 return result; 904 } 905 906 /** 907 * amdgpu_vm_cpu_set_ptes - helper to update page tables via CPU 908 * 909 * @params: see amdgpu_pte_update_params definition 910 * @bo: PD/PT to update 911 * @pe: kmap addr of the page entry 912 * @addr: dst addr to write into pe 913 * @count: number of page entries to update 914 * @incr: increase next addr by incr bytes 915 * @flags: hw access flags 916 * 917 * Write count number of PT/PD entries directly. 918 */ 919 static void amdgpu_vm_cpu_set_ptes(struct amdgpu_pte_update_params *params, 920 struct amdgpu_bo *bo, 921 uint64_t pe, uint64_t addr, 922 unsigned count, uint32_t incr, 923 uint64_t flags) 924 { 925 unsigned int i; 926 uint64_t value; 927 928 pe += (unsigned long)amdgpu_bo_kptr(bo); 929 930 trace_amdgpu_vm_set_ptes(pe, addr, count, incr, flags); 931 932 for (i = 0; i < count; i++) { 933 value = params->pages_addr ? 934 amdgpu_vm_map_gart(params->pages_addr, addr) : 935 addr; 936 amdgpu_gmc_set_pte_pde(params->adev, (void *)(uintptr_t)pe, 937 i, value, flags); 938 addr += incr; 939 } 940 } 941 942 943 /** 944 * amdgpu_vm_wait_pd - Wait for PT BOs to be free. 945 * 946 * @adev: amdgpu_device pointer 947 * @vm: related vm 948 * @owner: fence owner 949 * 950 * Returns: 951 * 0 on success, errno otherwise. 952 */ 953 static int amdgpu_vm_wait_pd(struct amdgpu_device *adev, struct amdgpu_vm *vm, 954 void *owner) 955 { 956 struct amdgpu_sync sync; 957 int r; 958 959 amdgpu_sync_create(&sync); 960 amdgpu_sync_resv(adev, &sync, vm->root.base.bo->tbo.resv, owner, false); 961 r = amdgpu_sync_wait(&sync, true); 962 amdgpu_sync_free(&sync); 963 964 return r; 965 } 966 967 /* 968 * amdgpu_vm_update_pde - update a single level in the hierarchy 969 * 970 * @param: parameters for the update 971 * @vm: requested vm 972 * @parent: parent directory 973 * @entry: entry to update 974 * 975 * Makes sure the requested entry in parent is up to date. 976 */ 977 static void amdgpu_vm_update_pde(struct amdgpu_pte_update_params *params, 978 struct amdgpu_vm *vm, 979 struct amdgpu_vm_pt *parent, 980 struct amdgpu_vm_pt *entry) 981 { 982 struct amdgpu_bo *bo = parent->base.bo, *pbo; 983 uint64_t pde, pt, flags; 984 unsigned level; 985 986 /* Don't update huge pages here */ 987 if (entry->huge) 988 return; 989 990 for (level = 0, pbo = bo->parent; pbo; ++level) 991 pbo = pbo->parent; 992 993 level += params->adev->vm_manager.root_level; 994 pt = amdgpu_bo_gpu_offset(entry->base.bo); 995 flags = AMDGPU_PTE_VALID; 996 amdgpu_gmc_get_vm_pde(params->adev, level, &pt, &flags); 997 pde = (entry - parent->entries) * 8; 998 if (bo->shadow) 999 params->func(params, bo->shadow, pde, pt, 1, 0, flags); 1000 params->func(params, bo, pde, pt, 1, 0, flags); 1001 } 1002 1003 /* 1004 * amdgpu_vm_invalidate_level - mark all PD levels as invalid 1005 * 1006 * @adev: amdgpu_device pointer 1007 * @vm: related vm 1008 * @parent: parent PD 1009 * @level: VMPT level 1010 * 1011 * Mark all PD level as invalid after an error. 1012 */ 1013 static void amdgpu_vm_invalidate_level(struct amdgpu_device *adev, 1014 struct amdgpu_vm *vm, 1015 struct amdgpu_vm_pt *parent, 1016 unsigned level) 1017 { 1018 unsigned pt_idx, num_entries; 1019 1020 /* 1021 * Recurse into the subdirectories. This recursion is harmless because 1022 * we only have a maximum of 5 layers. 1023 */ 1024 num_entries = amdgpu_vm_num_entries(adev, level); 1025 for (pt_idx = 0; pt_idx < num_entries; ++pt_idx) { 1026 struct amdgpu_vm_pt *entry = &parent->entries[pt_idx]; 1027 1028 if (!entry->base.bo) 1029 continue; 1030 1031 if (!entry->base.moved) 1032 list_move(&entry->base.vm_status, &vm->relocated); 1033 amdgpu_vm_invalidate_level(adev, vm, entry, level + 1); 1034 } 1035 } 1036 1037 /* 1038 * amdgpu_vm_update_directories - make sure that all directories are valid 1039 * 1040 * @adev: amdgpu_device pointer 1041 * @vm: requested vm 1042 * 1043 * Makes sure all directories are up to date. 1044 * 1045 * Returns: 1046 * 0 for success, error for failure. 1047 */ 1048 int amdgpu_vm_update_directories(struct amdgpu_device *adev, 1049 struct amdgpu_vm *vm) 1050 { 1051 struct amdgpu_pte_update_params params; 1052 struct amdgpu_job *job; 1053 unsigned ndw = 0; 1054 int r = 0; 1055 1056 if (list_empty(&vm->relocated)) 1057 return 0; 1058 1059 restart: 1060 memset(¶ms, 0, sizeof(params)); 1061 params.adev = adev; 1062 1063 if (vm->use_cpu_for_update) { 1064 struct amdgpu_vm_bo_base *bo_base; 1065 1066 list_for_each_entry(bo_base, &vm->relocated, vm_status) { 1067 r = amdgpu_bo_kmap(bo_base->bo, NULL); 1068 if (unlikely(r)) 1069 return r; 1070 } 1071 1072 r = amdgpu_vm_wait_pd(adev, vm, AMDGPU_FENCE_OWNER_VM); 1073 if (unlikely(r)) 1074 return r; 1075 1076 params.func = amdgpu_vm_cpu_set_ptes; 1077 } else { 1078 ndw = 512 * 8; 1079 r = amdgpu_job_alloc_with_ib(adev, ndw * 4, &job); 1080 if (r) 1081 return r; 1082 1083 params.ib = &job->ibs[0]; 1084 params.func = amdgpu_vm_do_set_ptes; 1085 } 1086 1087 while (!list_empty(&vm->relocated)) { 1088 struct amdgpu_vm_bo_base *bo_base, *parent; 1089 struct amdgpu_vm_pt *pt, *entry; 1090 struct amdgpu_bo *bo; 1091 1092 bo_base = list_first_entry(&vm->relocated, 1093 struct amdgpu_vm_bo_base, 1094 vm_status); 1095 bo_base->moved = false; 1096 list_del_init(&bo_base->vm_status); 1097 1098 bo = bo_base->bo->parent; 1099 if (!bo) 1100 continue; 1101 1102 parent = list_first_entry(&bo->va, struct amdgpu_vm_bo_base, 1103 bo_list); 1104 pt = container_of(parent, struct amdgpu_vm_pt, base); 1105 entry = container_of(bo_base, struct amdgpu_vm_pt, base); 1106 1107 amdgpu_vm_update_pde(¶ms, vm, pt, entry); 1108 1109 if (!vm->use_cpu_for_update && 1110 (ndw - params.ib->length_dw) < 32) 1111 break; 1112 } 1113 1114 if (vm->use_cpu_for_update) { 1115 /* Flush HDP */ 1116 mb(); 1117 amdgpu_asic_flush_hdp(adev, NULL); 1118 } else if (params.ib->length_dw == 0) { 1119 amdgpu_job_free(job); 1120 } else { 1121 struct amdgpu_bo *root = vm->root.base.bo; 1122 struct amdgpu_ring *ring; 1123 struct dma_fence *fence; 1124 1125 ring = container_of(vm->entity.rq->sched, struct amdgpu_ring, 1126 sched); 1127 1128 amdgpu_ring_pad_ib(ring, params.ib); 1129 amdgpu_sync_resv(adev, &job->sync, root->tbo.resv, 1130 AMDGPU_FENCE_OWNER_VM, false); 1131 WARN_ON(params.ib->length_dw > ndw); 1132 r = amdgpu_job_submit(job, &vm->entity, AMDGPU_FENCE_OWNER_VM, 1133 &fence); 1134 if (r) 1135 goto error; 1136 1137 amdgpu_bo_fence(root, fence, true); 1138 dma_fence_put(vm->last_update); 1139 vm->last_update = fence; 1140 } 1141 1142 if (!list_empty(&vm->relocated)) 1143 goto restart; 1144 1145 return 0; 1146 1147 error: 1148 amdgpu_vm_invalidate_level(adev, vm, &vm->root, 1149 adev->vm_manager.root_level); 1150 amdgpu_job_free(job); 1151 return r; 1152 } 1153 1154 /** 1155 * amdgpu_vm_find_entry - find the entry for an address 1156 * 1157 * @p: see amdgpu_pte_update_params definition 1158 * @addr: virtual address in question 1159 * @entry: resulting entry or NULL 1160 * @parent: parent entry 1161 * 1162 * Find the vm_pt entry and it's parent for the given address. 1163 */ 1164 void amdgpu_vm_get_entry(struct amdgpu_pte_update_params *p, uint64_t addr, 1165 struct amdgpu_vm_pt **entry, 1166 struct amdgpu_vm_pt **parent) 1167 { 1168 unsigned level = p->adev->vm_manager.root_level; 1169 1170 *parent = NULL; 1171 *entry = &p->vm->root; 1172 while ((*entry)->entries) { 1173 unsigned shift = amdgpu_vm_level_shift(p->adev, level++); 1174 1175 *parent = *entry; 1176 *entry = &(*entry)->entries[addr >> shift]; 1177 addr &= (1ULL << shift) - 1; 1178 } 1179 1180 if (level != AMDGPU_VM_PTB) 1181 *entry = NULL; 1182 } 1183 1184 /** 1185 * amdgpu_vm_handle_huge_pages - handle updating the PD with huge pages 1186 * 1187 * @p: see amdgpu_pte_update_params definition 1188 * @entry: vm_pt entry to check 1189 * @parent: parent entry 1190 * @nptes: number of PTEs updated with this operation 1191 * @dst: destination address where the PTEs should point to 1192 * @flags: access flags fro the PTEs 1193 * 1194 * Check if we can update the PD with a huge page. 1195 */ 1196 static void amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p, 1197 struct amdgpu_vm_pt *entry, 1198 struct amdgpu_vm_pt *parent, 1199 unsigned nptes, uint64_t dst, 1200 uint64_t flags) 1201 { 1202 uint64_t pde; 1203 1204 /* In the case of a mixed PT the PDE must point to it*/ 1205 if (p->adev->asic_type >= CHIP_VEGA10 && !p->src && 1206 nptes == AMDGPU_VM_PTE_COUNT(p->adev)) { 1207 /* Set the huge page flag to stop scanning at this PDE */ 1208 flags |= AMDGPU_PDE_PTE; 1209 } 1210 1211 if (!(flags & AMDGPU_PDE_PTE)) { 1212 if (entry->huge) { 1213 /* Add the entry to the relocated list to update it. */ 1214 entry->huge = false; 1215 list_move(&entry->base.vm_status, &p->vm->relocated); 1216 } 1217 return; 1218 } 1219 1220 entry->huge = true; 1221 amdgpu_gmc_get_vm_pde(p->adev, AMDGPU_VM_PDB0, &dst, &flags); 1222 1223 pde = (entry - parent->entries) * 8; 1224 if (parent->base.bo->shadow) 1225 p->func(p, parent->base.bo->shadow, pde, dst, 1, 0, flags); 1226 p->func(p, parent->base.bo, pde, dst, 1, 0, flags); 1227 } 1228 1229 /** 1230 * amdgpu_vm_update_ptes - make sure that page tables are valid 1231 * 1232 * @params: see amdgpu_pte_update_params definition 1233 * @start: start of GPU address range 1234 * @end: end of GPU address range 1235 * @dst: destination address to map to, the next dst inside the function 1236 * @flags: mapping flags 1237 * 1238 * Update the page tables in the range @start - @end. 1239 * 1240 * Returns: 1241 * 0 for success, -EINVAL for failure. 1242 */ 1243 static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params, 1244 uint64_t start, uint64_t end, 1245 uint64_t dst, uint64_t flags) 1246 { 1247 struct amdgpu_device *adev = params->adev; 1248 const uint64_t mask = AMDGPU_VM_PTE_COUNT(adev) - 1; 1249 1250 uint64_t addr, pe_start; 1251 struct amdgpu_bo *pt; 1252 unsigned nptes; 1253 1254 /* walk over the address space and update the page tables */ 1255 for (addr = start; addr < end; addr += nptes, 1256 dst += nptes * AMDGPU_GPU_PAGE_SIZE) { 1257 struct amdgpu_vm_pt *entry, *parent; 1258 1259 amdgpu_vm_get_entry(params, addr, &entry, &parent); 1260 if (!entry) 1261 return -ENOENT; 1262 1263 if ((addr & ~mask) == (end & ~mask)) 1264 nptes = end - addr; 1265 else 1266 nptes = AMDGPU_VM_PTE_COUNT(adev) - (addr & mask); 1267 1268 amdgpu_vm_handle_huge_pages(params, entry, parent, 1269 nptes, dst, flags); 1270 /* We don't need to update PTEs for huge pages */ 1271 if (entry->huge) 1272 continue; 1273 1274 pt = entry->base.bo; 1275 pe_start = (addr & mask) * 8; 1276 if (pt->shadow) 1277 params->func(params, pt->shadow, pe_start, dst, nptes, 1278 AMDGPU_GPU_PAGE_SIZE, flags); 1279 params->func(params, pt, pe_start, dst, nptes, 1280 AMDGPU_GPU_PAGE_SIZE, flags); 1281 } 1282 1283 return 0; 1284 } 1285 1286 /* 1287 * amdgpu_vm_frag_ptes - add fragment information to PTEs 1288 * 1289 * @params: see amdgpu_pte_update_params definition 1290 * @vm: requested vm 1291 * @start: first PTE to handle 1292 * @end: last PTE to handle 1293 * @dst: addr those PTEs should point to 1294 * @flags: hw mapping flags 1295 * 1296 * Returns: 1297 * 0 for success, -EINVAL for failure. 1298 */ 1299 static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params *params, 1300 uint64_t start, uint64_t end, 1301 uint64_t dst, uint64_t flags) 1302 { 1303 /** 1304 * The MC L1 TLB supports variable sized pages, based on a fragment 1305 * field in the PTE. When this field is set to a non-zero value, page 1306 * granularity is increased from 4KB to (1 << (12 + frag)). The PTE 1307 * flags are considered valid for all PTEs within the fragment range 1308 * and corresponding mappings are assumed to be physically contiguous. 1309 * 1310 * The L1 TLB can store a single PTE for the whole fragment, 1311 * significantly increasing the space available for translation 1312 * caching. This leads to large improvements in throughput when the 1313 * TLB is under pressure. 1314 * 1315 * The L2 TLB distributes small and large fragments into two 1316 * asymmetric partitions. The large fragment cache is significantly 1317 * larger. Thus, we try to use large fragments wherever possible. 1318 * Userspace can support this by aligning virtual base address and 1319 * allocation size to the fragment size. 1320 */ 1321 unsigned max_frag = params->adev->vm_manager.fragment_size; 1322 int r; 1323 1324 /* system pages are non continuously */ 1325 if (params->src || !(flags & AMDGPU_PTE_VALID)) 1326 return amdgpu_vm_update_ptes(params, start, end, dst, flags); 1327 1328 while (start != end) { 1329 uint64_t frag_flags, frag_end; 1330 unsigned frag; 1331 1332 /* This intentionally wraps around if no bit is set */ 1333 frag = min((unsigned)ffs(start) - 1, 1334 (unsigned)fls64(end - start) - 1); 1335 if (frag >= max_frag) { 1336 frag_flags = AMDGPU_PTE_FRAG(max_frag); 1337 frag_end = end & ~((1ULL << max_frag) - 1); 1338 } else { 1339 frag_flags = AMDGPU_PTE_FRAG(frag); 1340 frag_end = start + (1 << frag); 1341 } 1342 1343 r = amdgpu_vm_update_ptes(params, start, frag_end, dst, 1344 flags | frag_flags); 1345 if (r) 1346 return r; 1347 1348 dst += (frag_end - start) * AMDGPU_GPU_PAGE_SIZE; 1349 start = frag_end; 1350 } 1351 1352 return 0; 1353 } 1354 1355 /** 1356 * amdgpu_vm_bo_update_mapping - update a mapping in the vm page table 1357 * 1358 * @adev: amdgpu_device pointer 1359 * @exclusive: fence we need to sync to 1360 * @pages_addr: DMA addresses to use for mapping 1361 * @vm: requested vm 1362 * @start: start of mapped range 1363 * @last: last mapped entry 1364 * @flags: flags for the entries 1365 * @addr: addr to set the area to 1366 * @fence: optional resulting fence 1367 * 1368 * Fill in the page table entries between @start and @last. 1369 * 1370 * Returns: 1371 * 0 for success, -EINVAL for failure. 1372 */ 1373 static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, 1374 struct dma_fence *exclusive, 1375 dma_addr_t *pages_addr, 1376 struct amdgpu_vm *vm, 1377 uint64_t start, uint64_t last, 1378 uint64_t flags, uint64_t addr, 1379 struct dma_fence **fence) 1380 { 1381 struct amdgpu_ring *ring; 1382 void *owner = AMDGPU_FENCE_OWNER_VM; 1383 unsigned nptes, ncmds, ndw; 1384 struct amdgpu_job *job; 1385 struct amdgpu_pte_update_params params; 1386 struct dma_fence *f = NULL; 1387 int r; 1388 1389 memset(¶ms, 0, sizeof(params)); 1390 params.adev = adev; 1391 params.vm = vm; 1392 1393 /* sync to everything on unmapping */ 1394 if (!(flags & AMDGPU_PTE_VALID)) 1395 owner = AMDGPU_FENCE_OWNER_UNDEFINED; 1396 1397 if (vm->use_cpu_for_update) { 1398 /* params.src is used as flag to indicate system Memory */ 1399 if (pages_addr) 1400 params.src = ~0; 1401 1402 /* Wait for PT BOs to be free. PTs share the same resv. object 1403 * as the root PD BO 1404 */ 1405 r = amdgpu_vm_wait_pd(adev, vm, owner); 1406 if (unlikely(r)) 1407 return r; 1408 1409 params.func = amdgpu_vm_cpu_set_ptes; 1410 params.pages_addr = pages_addr; 1411 return amdgpu_vm_frag_ptes(¶ms, start, last + 1, 1412 addr, flags); 1413 } 1414 1415 ring = container_of(vm->entity.rq->sched, struct amdgpu_ring, sched); 1416 1417 nptes = last - start + 1; 1418 1419 /* 1420 * reserve space for two commands every (1 << BLOCK_SIZE) 1421 * entries or 2k dwords (whatever is smaller) 1422 * 1423 * The second command is for the shadow pagetables. 1424 */ 1425 if (vm->root.base.bo->shadow) 1426 ncmds = ((nptes >> min(adev->vm_manager.block_size, 11u)) + 1) * 2; 1427 else 1428 ncmds = ((nptes >> min(adev->vm_manager.block_size, 11u)) + 1); 1429 1430 /* padding, etc. */ 1431 ndw = 64; 1432 1433 if (pages_addr) { 1434 /* copy commands needed */ 1435 ndw += ncmds * adev->vm_manager.vm_pte_funcs->copy_pte_num_dw; 1436 1437 /* and also PTEs */ 1438 ndw += nptes * 2; 1439 1440 params.func = amdgpu_vm_do_copy_ptes; 1441 1442 } else { 1443 /* set page commands needed */ 1444 ndw += ncmds * 10; 1445 1446 /* extra commands for begin/end fragments */ 1447 if (vm->root.base.bo->shadow) 1448 ndw += 2 * 10 * adev->vm_manager.fragment_size * 2; 1449 else 1450 ndw += 2 * 10 * adev->vm_manager.fragment_size; 1451 1452 params.func = amdgpu_vm_do_set_ptes; 1453 } 1454 1455 r = amdgpu_job_alloc_with_ib(adev, ndw * 4, &job); 1456 if (r) 1457 return r; 1458 1459 params.ib = &job->ibs[0]; 1460 1461 if (pages_addr) { 1462 uint64_t *pte; 1463 unsigned i; 1464 1465 /* Put the PTEs at the end of the IB. */ 1466 i = ndw - nptes * 2; 1467 pte= (uint64_t *)&(job->ibs->ptr[i]); 1468 params.src = job->ibs->gpu_addr + i * 4; 1469 1470 for (i = 0; i < nptes; ++i) { 1471 pte[i] = amdgpu_vm_map_gart(pages_addr, addr + i * 1472 AMDGPU_GPU_PAGE_SIZE); 1473 pte[i] |= flags; 1474 } 1475 addr = 0; 1476 } 1477 1478 r = amdgpu_sync_fence(adev, &job->sync, exclusive, false); 1479 if (r) 1480 goto error_free; 1481 1482 r = amdgpu_sync_resv(adev, &job->sync, vm->root.base.bo->tbo.resv, 1483 owner, false); 1484 if (r) 1485 goto error_free; 1486 1487 r = reservation_object_reserve_shared(vm->root.base.bo->tbo.resv); 1488 if (r) 1489 goto error_free; 1490 1491 r = amdgpu_vm_frag_ptes(¶ms, start, last + 1, addr, flags); 1492 if (r) 1493 goto error_free; 1494 1495 amdgpu_ring_pad_ib(ring, params.ib); 1496 WARN_ON(params.ib->length_dw > ndw); 1497 r = amdgpu_job_submit(job, &vm->entity, AMDGPU_FENCE_OWNER_VM, &f); 1498 if (r) 1499 goto error_free; 1500 1501 amdgpu_bo_fence(vm->root.base.bo, f, true); 1502 dma_fence_put(*fence); 1503 *fence = f; 1504 return 0; 1505 1506 error_free: 1507 amdgpu_job_free(job); 1508 return r; 1509 } 1510 1511 /** 1512 * amdgpu_vm_bo_split_mapping - split a mapping into smaller chunks 1513 * 1514 * @adev: amdgpu_device pointer 1515 * @exclusive: fence we need to sync to 1516 * @pages_addr: DMA addresses to use for mapping 1517 * @vm: requested vm 1518 * @mapping: mapped range and flags to use for the update 1519 * @flags: HW flags for the mapping 1520 * @nodes: array of drm_mm_nodes with the MC addresses 1521 * @fence: optional resulting fence 1522 * 1523 * Split the mapping into smaller chunks so that each update fits 1524 * into a SDMA IB. 1525 * 1526 * Returns: 1527 * 0 for success, -EINVAL for failure. 1528 */ 1529 static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev, 1530 struct dma_fence *exclusive, 1531 dma_addr_t *pages_addr, 1532 struct amdgpu_vm *vm, 1533 struct amdgpu_bo_va_mapping *mapping, 1534 uint64_t flags, 1535 struct drm_mm_node *nodes, 1536 struct dma_fence **fence) 1537 { 1538 unsigned min_linear_pages = 1 << adev->vm_manager.fragment_size; 1539 uint64_t pfn, start = mapping->start; 1540 int r; 1541 1542 /* normally,bo_va->flags only contians READABLE and WIRTEABLE bit go here 1543 * but in case of something, we filter the flags in first place 1544 */ 1545 if (!(mapping->flags & AMDGPU_PTE_READABLE)) 1546 flags &= ~AMDGPU_PTE_READABLE; 1547 if (!(mapping->flags & AMDGPU_PTE_WRITEABLE)) 1548 flags &= ~AMDGPU_PTE_WRITEABLE; 1549 1550 flags &= ~AMDGPU_PTE_EXECUTABLE; 1551 flags |= mapping->flags & AMDGPU_PTE_EXECUTABLE; 1552 1553 flags &= ~AMDGPU_PTE_MTYPE_MASK; 1554 flags |= (mapping->flags & AMDGPU_PTE_MTYPE_MASK); 1555 1556 if ((mapping->flags & AMDGPU_PTE_PRT) && 1557 (adev->asic_type >= CHIP_VEGA10)) { 1558 flags |= AMDGPU_PTE_PRT; 1559 flags &= ~AMDGPU_PTE_VALID; 1560 } 1561 1562 trace_amdgpu_vm_bo_update(mapping); 1563 1564 pfn = mapping->offset >> PAGE_SHIFT; 1565 if (nodes) { 1566 while (pfn >= nodes->size) { 1567 pfn -= nodes->size; 1568 ++nodes; 1569 } 1570 } 1571 1572 do { 1573 dma_addr_t *dma_addr = NULL; 1574 uint64_t max_entries; 1575 uint64_t addr, last; 1576 1577 if (nodes) { 1578 addr = nodes->start << PAGE_SHIFT; 1579 max_entries = (nodes->size - pfn) * 1580 AMDGPU_GPU_PAGES_IN_CPU_PAGE; 1581 } else { 1582 addr = 0; 1583 max_entries = S64_MAX; 1584 } 1585 1586 if (pages_addr) { 1587 uint64_t count; 1588 1589 max_entries = min(max_entries, 16ull * 1024ull); 1590 for (count = 1; 1591 count < max_entries / AMDGPU_GPU_PAGES_IN_CPU_PAGE; 1592 ++count) { 1593 uint64_t idx = pfn + count; 1594 1595 if (pages_addr[idx] != 1596 (pages_addr[idx - 1] + PAGE_SIZE)) 1597 break; 1598 } 1599 1600 if (count < min_linear_pages) { 1601 addr = pfn << PAGE_SHIFT; 1602 dma_addr = pages_addr; 1603 } else { 1604 addr = pages_addr[pfn]; 1605 max_entries = count * AMDGPU_GPU_PAGES_IN_CPU_PAGE; 1606 } 1607 1608 } else if (flags & AMDGPU_PTE_VALID) { 1609 addr += adev->vm_manager.vram_base_offset; 1610 addr += pfn << PAGE_SHIFT; 1611 } 1612 1613 last = min((uint64_t)mapping->last, start + max_entries - 1); 1614 r = amdgpu_vm_bo_update_mapping(adev, exclusive, dma_addr, vm, 1615 start, last, flags, addr, 1616 fence); 1617 if (r) 1618 return r; 1619 1620 pfn += (last - start + 1) / AMDGPU_GPU_PAGES_IN_CPU_PAGE; 1621 if (nodes && nodes->size == pfn) { 1622 pfn = 0; 1623 ++nodes; 1624 } 1625 start = last + 1; 1626 1627 } while (unlikely(start != mapping->last + 1)); 1628 1629 return 0; 1630 } 1631 1632 /** 1633 * amdgpu_vm_bo_update - update all BO mappings in the vm page table 1634 * 1635 * @adev: amdgpu_device pointer 1636 * @bo_va: requested BO and VM object 1637 * @clear: if true clear the entries 1638 * 1639 * Fill in the page table entries for @bo_va. 1640 * 1641 * Returns: 1642 * 0 for success, -EINVAL for failure. 1643 */ 1644 int amdgpu_vm_bo_update(struct amdgpu_device *adev, 1645 struct amdgpu_bo_va *bo_va, 1646 bool clear) 1647 { 1648 struct amdgpu_bo *bo = bo_va->base.bo; 1649 struct amdgpu_vm *vm = bo_va->base.vm; 1650 struct amdgpu_bo_va_mapping *mapping; 1651 dma_addr_t *pages_addr = NULL; 1652 struct ttm_mem_reg *mem; 1653 struct drm_mm_node *nodes; 1654 struct dma_fence *exclusive, **last_update; 1655 uint64_t flags; 1656 int r; 1657 1658 if (clear || !bo) { 1659 mem = NULL; 1660 nodes = NULL; 1661 exclusive = NULL; 1662 } else { 1663 struct ttm_dma_tt *ttm; 1664 1665 mem = &bo->tbo.mem; 1666 nodes = mem->mm_node; 1667 if (mem->mem_type == TTM_PL_TT) { 1668 ttm = container_of(bo->tbo.ttm, struct ttm_dma_tt, ttm); 1669 pages_addr = ttm->dma_address; 1670 } 1671 exclusive = reservation_object_get_excl(bo->tbo.resv); 1672 } 1673 1674 if (bo) 1675 flags = amdgpu_ttm_tt_pte_flags(adev, bo->tbo.ttm, mem); 1676 else 1677 flags = 0x0; 1678 1679 if (clear || (bo && bo->tbo.resv == vm->root.base.bo->tbo.resv)) 1680 last_update = &vm->last_update; 1681 else 1682 last_update = &bo_va->last_pt_update; 1683 1684 if (!clear && bo_va->base.moved) { 1685 bo_va->base.moved = false; 1686 list_splice_init(&bo_va->valids, &bo_va->invalids); 1687 1688 } else if (bo_va->cleared != clear) { 1689 list_splice_init(&bo_va->valids, &bo_va->invalids); 1690 } 1691 1692 list_for_each_entry(mapping, &bo_va->invalids, list) { 1693 r = amdgpu_vm_bo_split_mapping(adev, exclusive, pages_addr, vm, 1694 mapping, flags, nodes, 1695 last_update); 1696 if (r) 1697 return r; 1698 } 1699 1700 if (vm->use_cpu_for_update) { 1701 /* Flush HDP */ 1702 mb(); 1703 amdgpu_asic_flush_hdp(adev, NULL); 1704 } 1705 1706 spin_lock(&vm->moved_lock); 1707 list_del_init(&bo_va->base.vm_status); 1708 spin_unlock(&vm->moved_lock); 1709 1710 /* If the BO is not in its preferred location add it back to 1711 * the evicted list so that it gets validated again on the 1712 * next command submission. 1713 */ 1714 if (bo && bo->tbo.resv == vm->root.base.bo->tbo.resv) { 1715 uint32_t mem_type = bo->tbo.mem.mem_type; 1716 1717 if (!(bo->preferred_domains & amdgpu_mem_type_to_domain(mem_type))) 1718 list_add_tail(&bo_va->base.vm_status, &vm->evicted); 1719 else 1720 list_add(&bo_va->base.vm_status, &vm->idle); 1721 } 1722 1723 list_splice_init(&bo_va->invalids, &bo_va->valids); 1724 bo_va->cleared = clear; 1725 1726 if (trace_amdgpu_vm_bo_mapping_enabled()) { 1727 list_for_each_entry(mapping, &bo_va->valids, list) 1728 trace_amdgpu_vm_bo_mapping(mapping); 1729 } 1730 1731 return 0; 1732 } 1733 1734 /** 1735 * amdgpu_vm_update_prt_state - update the global PRT state 1736 * 1737 * @adev: amdgpu_device pointer 1738 */ 1739 static void amdgpu_vm_update_prt_state(struct amdgpu_device *adev) 1740 { 1741 unsigned long flags; 1742 bool enable; 1743 1744 spin_lock_irqsave(&adev->vm_manager.prt_lock, flags); 1745 enable = !!atomic_read(&adev->vm_manager.num_prt_users); 1746 adev->gmc.gmc_funcs->set_prt(adev, enable); 1747 spin_unlock_irqrestore(&adev->vm_manager.prt_lock, flags); 1748 } 1749 1750 /** 1751 * amdgpu_vm_prt_get - add a PRT user 1752 * 1753 * @adev: amdgpu_device pointer 1754 */ 1755 static void amdgpu_vm_prt_get(struct amdgpu_device *adev) 1756 { 1757 if (!adev->gmc.gmc_funcs->set_prt) 1758 return; 1759 1760 if (atomic_inc_return(&adev->vm_manager.num_prt_users) == 1) 1761 amdgpu_vm_update_prt_state(adev); 1762 } 1763 1764 /** 1765 * amdgpu_vm_prt_put - drop a PRT user 1766 * 1767 * @adev: amdgpu_device pointer 1768 */ 1769 static void amdgpu_vm_prt_put(struct amdgpu_device *adev) 1770 { 1771 if (atomic_dec_return(&adev->vm_manager.num_prt_users) == 0) 1772 amdgpu_vm_update_prt_state(adev); 1773 } 1774 1775 /** 1776 * amdgpu_vm_prt_cb - callback for updating the PRT status 1777 * 1778 * @fence: fence for the callback 1779 * @_cb: the callback function 1780 */ 1781 static void amdgpu_vm_prt_cb(struct dma_fence *fence, struct dma_fence_cb *_cb) 1782 { 1783 struct amdgpu_prt_cb *cb = container_of(_cb, struct amdgpu_prt_cb, cb); 1784 1785 amdgpu_vm_prt_put(cb->adev); 1786 kfree(cb); 1787 } 1788 1789 /** 1790 * amdgpu_vm_add_prt_cb - add callback for updating the PRT status 1791 * 1792 * @adev: amdgpu_device pointer 1793 * @fence: fence for the callback 1794 */ 1795 static void amdgpu_vm_add_prt_cb(struct amdgpu_device *adev, 1796 struct dma_fence *fence) 1797 { 1798 struct amdgpu_prt_cb *cb; 1799 1800 if (!adev->gmc.gmc_funcs->set_prt) 1801 return; 1802 1803 cb = kmalloc(sizeof(struct amdgpu_prt_cb), GFP_KERNEL); 1804 if (!cb) { 1805 /* Last resort when we are OOM */ 1806 if (fence) 1807 dma_fence_wait(fence, false); 1808 1809 amdgpu_vm_prt_put(adev); 1810 } else { 1811 cb->adev = adev; 1812 if (!fence || dma_fence_add_callback(fence, &cb->cb, 1813 amdgpu_vm_prt_cb)) 1814 amdgpu_vm_prt_cb(fence, &cb->cb); 1815 } 1816 } 1817 1818 /** 1819 * amdgpu_vm_free_mapping - free a mapping 1820 * 1821 * @adev: amdgpu_device pointer 1822 * @vm: requested vm 1823 * @mapping: mapping to be freed 1824 * @fence: fence of the unmap operation 1825 * 1826 * Free a mapping and make sure we decrease the PRT usage count if applicable. 1827 */ 1828 static void amdgpu_vm_free_mapping(struct amdgpu_device *adev, 1829 struct amdgpu_vm *vm, 1830 struct amdgpu_bo_va_mapping *mapping, 1831 struct dma_fence *fence) 1832 { 1833 if (mapping->flags & AMDGPU_PTE_PRT) 1834 amdgpu_vm_add_prt_cb(adev, fence); 1835 kfree(mapping); 1836 } 1837 1838 /** 1839 * amdgpu_vm_prt_fini - finish all prt mappings 1840 * 1841 * @adev: amdgpu_device pointer 1842 * @vm: requested vm 1843 * 1844 * Register a cleanup callback to disable PRT support after VM dies. 1845 */ 1846 static void amdgpu_vm_prt_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) 1847 { 1848 struct reservation_object *resv = vm->root.base.bo->tbo.resv; 1849 struct dma_fence *excl, **shared; 1850 unsigned i, shared_count; 1851 int r; 1852 1853 r = reservation_object_get_fences_rcu(resv, &excl, 1854 &shared_count, &shared); 1855 if (r) { 1856 /* Not enough memory to grab the fence list, as last resort 1857 * block for all the fences to complete. 1858 */ 1859 reservation_object_wait_timeout_rcu(resv, true, false, 1860 MAX_SCHEDULE_TIMEOUT); 1861 return; 1862 } 1863 1864 /* Add a callback for each fence in the reservation object */ 1865 amdgpu_vm_prt_get(adev); 1866 amdgpu_vm_add_prt_cb(adev, excl); 1867 1868 for (i = 0; i < shared_count; ++i) { 1869 amdgpu_vm_prt_get(adev); 1870 amdgpu_vm_add_prt_cb(adev, shared[i]); 1871 } 1872 1873 kfree(shared); 1874 } 1875 1876 /** 1877 * amdgpu_vm_clear_freed - clear freed BOs in the PT 1878 * 1879 * @adev: amdgpu_device pointer 1880 * @vm: requested vm 1881 * @fence: optional resulting fence (unchanged if no work needed to be done 1882 * or if an error occurred) 1883 * 1884 * Make sure all freed BOs are cleared in the PT. 1885 * PTs have to be reserved and mutex must be locked! 1886 * 1887 * Returns: 1888 * 0 for success. 1889 * 1890 */ 1891 int amdgpu_vm_clear_freed(struct amdgpu_device *adev, 1892 struct amdgpu_vm *vm, 1893 struct dma_fence **fence) 1894 { 1895 struct amdgpu_bo_va_mapping *mapping; 1896 uint64_t init_pte_value = 0; 1897 struct dma_fence *f = NULL; 1898 int r; 1899 1900 while (!list_empty(&vm->freed)) { 1901 mapping = list_first_entry(&vm->freed, 1902 struct amdgpu_bo_va_mapping, list); 1903 list_del(&mapping->list); 1904 1905 if (vm->pte_support_ats && mapping->start < AMDGPU_VA_HOLE_START) 1906 init_pte_value = AMDGPU_PTE_DEFAULT_ATC; 1907 1908 r = amdgpu_vm_bo_update_mapping(adev, NULL, NULL, vm, 1909 mapping->start, mapping->last, 1910 init_pte_value, 0, &f); 1911 amdgpu_vm_free_mapping(adev, vm, mapping, f); 1912 if (r) { 1913 dma_fence_put(f); 1914 return r; 1915 } 1916 } 1917 1918 if (fence && f) { 1919 dma_fence_put(*fence); 1920 *fence = f; 1921 } else { 1922 dma_fence_put(f); 1923 } 1924 1925 return 0; 1926 1927 } 1928 1929 /** 1930 * amdgpu_vm_handle_moved - handle moved BOs in the PT 1931 * 1932 * @adev: amdgpu_device pointer 1933 * @vm: requested vm 1934 * 1935 * Make sure all BOs which are moved are updated in the PTs. 1936 * 1937 * Returns: 1938 * 0 for success. 1939 * 1940 * PTs have to be reserved! 1941 */ 1942 int amdgpu_vm_handle_moved(struct amdgpu_device *adev, 1943 struct amdgpu_vm *vm) 1944 { 1945 struct amdgpu_bo_va *bo_va, *tmp; 1946 struct list_head moved; 1947 bool clear; 1948 int r; 1949 1950 INIT_LIST_HEAD(&moved); 1951 spin_lock(&vm->moved_lock); 1952 list_splice_init(&vm->moved, &moved); 1953 spin_unlock(&vm->moved_lock); 1954 1955 list_for_each_entry_safe(bo_va, tmp, &moved, base.vm_status) { 1956 struct reservation_object *resv = bo_va->base.bo->tbo.resv; 1957 1958 /* Per VM BOs never need to bo cleared in the page tables */ 1959 if (resv == vm->root.base.bo->tbo.resv) 1960 clear = false; 1961 /* Try to reserve the BO to avoid clearing its ptes */ 1962 else if (!amdgpu_vm_debug && reservation_object_trylock(resv)) 1963 clear = false; 1964 /* Somebody else is using the BO right now */ 1965 else 1966 clear = true; 1967 1968 r = amdgpu_vm_bo_update(adev, bo_va, clear); 1969 if (r) { 1970 spin_lock(&vm->moved_lock); 1971 list_splice(&moved, &vm->moved); 1972 spin_unlock(&vm->moved_lock); 1973 return r; 1974 } 1975 1976 if (!clear && resv != vm->root.base.bo->tbo.resv) 1977 reservation_object_unlock(resv); 1978 1979 } 1980 1981 return 0; 1982 } 1983 1984 /** 1985 * amdgpu_vm_bo_add - add a bo to a specific vm 1986 * 1987 * @adev: amdgpu_device pointer 1988 * @vm: requested vm 1989 * @bo: amdgpu buffer object 1990 * 1991 * Add @bo into the requested vm. 1992 * Add @bo to the list of bos associated with the vm 1993 * 1994 * Returns: 1995 * Newly added bo_va or NULL for failure 1996 * 1997 * Object has to be reserved! 1998 */ 1999 struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev, 2000 struct amdgpu_vm *vm, 2001 struct amdgpu_bo *bo) 2002 { 2003 struct amdgpu_bo_va *bo_va; 2004 2005 bo_va = kzalloc(sizeof(struct amdgpu_bo_va), GFP_KERNEL); 2006 if (bo_va == NULL) { 2007 return NULL; 2008 } 2009 amdgpu_vm_bo_base_init(&bo_va->base, vm, bo); 2010 2011 bo_va->ref_count = 1; 2012 INIT_LIST_HEAD(&bo_va->valids); 2013 INIT_LIST_HEAD(&bo_va->invalids); 2014 2015 return bo_va; 2016 } 2017 2018 2019 /** 2020 * amdgpu_vm_bo_insert_mapping - insert a new mapping 2021 * 2022 * @adev: amdgpu_device pointer 2023 * @bo_va: bo_va to store the address 2024 * @mapping: the mapping to insert 2025 * 2026 * Insert a new mapping into all structures. 2027 */ 2028 static void amdgpu_vm_bo_insert_map(struct amdgpu_device *adev, 2029 struct amdgpu_bo_va *bo_va, 2030 struct amdgpu_bo_va_mapping *mapping) 2031 { 2032 struct amdgpu_vm *vm = bo_va->base.vm; 2033 struct amdgpu_bo *bo = bo_va->base.bo; 2034 2035 mapping->bo_va = bo_va; 2036 list_add(&mapping->list, &bo_va->invalids); 2037 amdgpu_vm_it_insert(mapping, &vm->va); 2038 2039 if (mapping->flags & AMDGPU_PTE_PRT) 2040 amdgpu_vm_prt_get(adev); 2041 2042 if (bo && bo->tbo.resv == vm->root.base.bo->tbo.resv && 2043 !bo_va->base.moved) { 2044 spin_lock(&vm->moved_lock); 2045 list_move(&bo_va->base.vm_status, &vm->moved); 2046 spin_unlock(&vm->moved_lock); 2047 } 2048 trace_amdgpu_vm_bo_map(bo_va, mapping); 2049 } 2050 2051 /** 2052 * amdgpu_vm_bo_map - map bo inside a vm 2053 * 2054 * @adev: amdgpu_device pointer 2055 * @bo_va: bo_va to store the address 2056 * @saddr: where to map the BO 2057 * @offset: requested offset in the BO 2058 * @size: BO size in bytes 2059 * @flags: attributes of pages (read/write/valid/etc.) 2060 * 2061 * Add a mapping of the BO at the specefied addr into the VM. 2062 * 2063 * Returns: 2064 * 0 for success, error for failure. 2065 * 2066 * Object has to be reserved and unreserved outside! 2067 */ 2068 int amdgpu_vm_bo_map(struct amdgpu_device *adev, 2069 struct amdgpu_bo_va *bo_va, 2070 uint64_t saddr, uint64_t offset, 2071 uint64_t size, uint64_t flags) 2072 { 2073 struct amdgpu_bo_va_mapping *mapping, *tmp; 2074 struct amdgpu_bo *bo = bo_va->base.bo; 2075 struct amdgpu_vm *vm = bo_va->base.vm; 2076 uint64_t eaddr; 2077 2078 /* validate the parameters */ 2079 if (saddr & ~PAGE_MASK || offset & ~PAGE_MASK || 2080 size == 0 || size & ~PAGE_MASK) 2081 return -EINVAL; 2082 2083 /* make sure object fit at this offset */ 2084 eaddr = saddr + size - 1; 2085 if (saddr >= eaddr || 2086 (bo && offset + size > amdgpu_bo_size(bo))) 2087 return -EINVAL; 2088 2089 saddr /= AMDGPU_GPU_PAGE_SIZE; 2090 eaddr /= AMDGPU_GPU_PAGE_SIZE; 2091 2092 tmp = amdgpu_vm_it_iter_first(&vm->va, saddr, eaddr); 2093 if (tmp) { 2094 /* bo and tmp overlap, invalid addr */ 2095 dev_err(adev->dev, "bo %p va 0x%010Lx-0x%010Lx conflict with " 2096 "0x%010Lx-0x%010Lx\n", bo, saddr, eaddr, 2097 tmp->start, tmp->last + 1); 2098 return -EINVAL; 2099 } 2100 2101 mapping = kmalloc(sizeof(*mapping), GFP_KERNEL); 2102 if (!mapping) 2103 return -ENOMEM; 2104 2105 mapping->start = saddr; 2106 mapping->last = eaddr; 2107 mapping->offset = offset; 2108 mapping->flags = flags; 2109 2110 amdgpu_vm_bo_insert_map(adev, bo_va, mapping); 2111 2112 return 0; 2113 } 2114 2115 /** 2116 * amdgpu_vm_bo_replace_map - map bo inside a vm, replacing existing mappings 2117 * 2118 * @adev: amdgpu_device pointer 2119 * @bo_va: bo_va to store the address 2120 * @saddr: where to map the BO 2121 * @offset: requested offset in the BO 2122 * @size: BO size in bytes 2123 * @flags: attributes of pages (read/write/valid/etc.) 2124 * 2125 * Add a mapping of the BO at the specefied addr into the VM. Replace existing 2126 * mappings as we do so. 2127 * 2128 * Returns: 2129 * 0 for success, error for failure. 2130 * 2131 * Object has to be reserved and unreserved outside! 2132 */ 2133 int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev, 2134 struct amdgpu_bo_va *bo_va, 2135 uint64_t saddr, uint64_t offset, 2136 uint64_t size, uint64_t flags) 2137 { 2138 struct amdgpu_bo_va_mapping *mapping; 2139 struct amdgpu_bo *bo = bo_va->base.bo; 2140 uint64_t eaddr; 2141 int r; 2142 2143 /* validate the parameters */ 2144 if (saddr & ~PAGE_MASK || offset & ~PAGE_MASK || 2145 size == 0 || size & ~PAGE_MASK) 2146 return -EINVAL; 2147 2148 /* make sure object fit at this offset */ 2149 eaddr = saddr + size - 1; 2150 if (saddr >= eaddr || 2151 (bo && offset + size > amdgpu_bo_size(bo))) 2152 return -EINVAL; 2153 2154 /* Allocate all the needed memory */ 2155 mapping = kmalloc(sizeof(*mapping), GFP_KERNEL); 2156 if (!mapping) 2157 return -ENOMEM; 2158 2159 r = amdgpu_vm_bo_clear_mappings(adev, bo_va->base.vm, saddr, size); 2160 if (r) { 2161 kfree(mapping); 2162 return r; 2163 } 2164 2165 saddr /= AMDGPU_GPU_PAGE_SIZE; 2166 eaddr /= AMDGPU_GPU_PAGE_SIZE; 2167 2168 mapping->start = saddr; 2169 mapping->last = eaddr; 2170 mapping->offset = offset; 2171 mapping->flags = flags; 2172 2173 amdgpu_vm_bo_insert_map(adev, bo_va, mapping); 2174 2175 return 0; 2176 } 2177 2178 /** 2179 * amdgpu_vm_bo_unmap - remove bo mapping from vm 2180 * 2181 * @adev: amdgpu_device pointer 2182 * @bo_va: bo_va to remove the address from 2183 * @saddr: where to the BO is mapped 2184 * 2185 * Remove a mapping of the BO at the specefied addr from the VM. 2186 * 2187 * Returns: 2188 * 0 for success, error for failure. 2189 * 2190 * Object has to be reserved and unreserved outside! 2191 */ 2192 int amdgpu_vm_bo_unmap(struct amdgpu_device *adev, 2193 struct amdgpu_bo_va *bo_va, 2194 uint64_t saddr) 2195 { 2196 struct amdgpu_bo_va_mapping *mapping; 2197 struct amdgpu_vm *vm = bo_va->base.vm; 2198 bool valid = true; 2199 2200 saddr /= AMDGPU_GPU_PAGE_SIZE; 2201 2202 list_for_each_entry(mapping, &bo_va->valids, list) { 2203 if (mapping->start == saddr) 2204 break; 2205 } 2206 2207 if (&mapping->list == &bo_va->valids) { 2208 valid = false; 2209 2210 list_for_each_entry(mapping, &bo_va->invalids, list) { 2211 if (mapping->start == saddr) 2212 break; 2213 } 2214 2215 if (&mapping->list == &bo_va->invalids) 2216 return -ENOENT; 2217 } 2218 2219 list_del(&mapping->list); 2220 amdgpu_vm_it_remove(mapping, &vm->va); 2221 mapping->bo_va = NULL; 2222 trace_amdgpu_vm_bo_unmap(bo_va, mapping); 2223 2224 if (valid) 2225 list_add(&mapping->list, &vm->freed); 2226 else 2227 amdgpu_vm_free_mapping(adev, vm, mapping, 2228 bo_va->last_pt_update); 2229 2230 return 0; 2231 } 2232 2233 /** 2234 * amdgpu_vm_bo_clear_mappings - remove all mappings in a specific range 2235 * 2236 * @adev: amdgpu_device pointer 2237 * @vm: VM structure to use 2238 * @saddr: start of the range 2239 * @size: size of the range 2240 * 2241 * Remove all mappings in a range, split them as appropriate. 2242 * 2243 * Returns: 2244 * 0 for success, error for failure. 2245 */ 2246 int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev, 2247 struct amdgpu_vm *vm, 2248 uint64_t saddr, uint64_t size) 2249 { 2250 struct amdgpu_bo_va_mapping *before, *after, *tmp, *next; 2251 LIST_HEAD(removed); 2252 uint64_t eaddr; 2253 2254 eaddr = saddr + size - 1; 2255 saddr /= AMDGPU_GPU_PAGE_SIZE; 2256 eaddr /= AMDGPU_GPU_PAGE_SIZE; 2257 2258 /* Allocate all the needed memory */ 2259 before = kzalloc(sizeof(*before), GFP_KERNEL); 2260 if (!before) 2261 return -ENOMEM; 2262 INIT_LIST_HEAD(&before->list); 2263 2264 after = kzalloc(sizeof(*after), GFP_KERNEL); 2265 if (!after) { 2266 kfree(before); 2267 return -ENOMEM; 2268 } 2269 INIT_LIST_HEAD(&after->list); 2270 2271 /* Now gather all removed mappings */ 2272 tmp = amdgpu_vm_it_iter_first(&vm->va, saddr, eaddr); 2273 while (tmp) { 2274 /* Remember mapping split at the start */ 2275 if (tmp->start < saddr) { 2276 before->start = tmp->start; 2277 before->last = saddr - 1; 2278 before->offset = tmp->offset; 2279 before->flags = tmp->flags; 2280 before->bo_va = tmp->bo_va; 2281 list_add(&before->list, &tmp->bo_va->invalids); 2282 } 2283 2284 /* Remember mapping split at the end */ 2285 if (tmp->last > eaddr) { 2286 after->start = eaddr + 1; 2287 after->last = tmp->last; 2288 after->offset = tmp->offset; 2289 after->offset += (after->start - tmp->start) << PAGE_SHIFT; 2290 after->flags = tmp->flags; 2291 after->bo_va = tmp->bo_va; 2292 list_add(&after->list, &tmp->bo_va->invalids); 2293 } 2294 2295 list_del(&tmp->list); 2296 list_add(&tmp->list, &removed); 2297 2298 tmp = amdgpu_vm_it_iter_next(tmp, saddr, eaddr); 2299 } 2300 2301 /* And free them up */ 2302 list_for_each_entry_safe(tmp, next, &removed, list) { 2303 amdgpu_vm_it_remove(tmp, &vm->va); 2304 list_del(&tmp->list); 2305 2306 if (tmp->start < saddr) 2307 tmp->start = saddr; 2308 if (tmp->last > eaddr) 2309 tmp->last = eaddr; 2310 2311 tmp->bo_va = NULL; 2312 list_add(&tmp->list, &vm->freed); 2313 trace_amdgpu_vm_bo_unmap(NULL, tmp); 2314 } 2315 2316 /* Insert partial mapping before the range */ 2317 if (!list_empty(&before->list)) { 2318 amdgpu_vm_it_insert(before, &vm->va); 2319 if (before->flags & AMDGPU_PTE_PRT) 2320 amdgpu_vm_prt_get(adev); 2321 } else { 2322 kfree(before); 2323 } 2324 2325 /* Insert partial mapping after the range */ 2326 if (!list_empty(&after->list)) { 2327 amdgpu_vm_it_insert(after, &vm->va); 2328 if (after->flags & AMDGPU_PTE_PRT) 2329 amdgpu_vm_prt_get(adev); 2330 } else { 2331 kfree(after); 2332 } 2333 2334 return 0; 2335 } 2336 2337 /** 2338 * amdgpu_vm_bo_lookup_mapping - find mapping by address 2339 * 2340 * @vm: the requested VM 2341 * @addr: the address 2342 * 2343 * Find a mapping by it's address. 2344 * 2345 * Returns: 2346 * The amdgpu_bo_va_mapping matching for addr or NULL 2347 * 2348 */ 2349 struct amdgpu_bo_va_mapping *amdgpu_vm_bo_lookup_mapping(struct amdgpu_vm *vm, 2350 uint64_t addr) 2351 { 2352 return amdgpu_vm_it_iter_first(&vm->va, addr, addr); 2353 } 2354 2355 /** 2356 * amdgpu_vm_bo_trace_cs - trace all reserved mappings 2357 * 2358 * @vm: the requested vm 2359 * @ticket: CS ticket 2360 * 2361 * Trace all mappings of BOs reserved during a command submission. 2362 */ 2363 void amdgpu_vm_bo_trace_cs(struct amdgpu_vm *vm, struct ww_acquire_ctx *ticket) 2364 { 2365 struct amdgpu_bo_va_mapping *mapping; 2366 2367 if (!trace_amdgpu_vm_bo_cs_enabled()) 2368 return; 2369 2370 for (mapping = amdgpu_vm_it_iter_first(&vm->va, 0, U64_MAX); mapping; 2371 mapping = amdgpu_vm_it_iter_next(mapping, 0, U64_MAX)) { 2372 if (mapping->bo_va && mapping->bo_va->base.bo) { 2373 struct amdgpu_bo *bo; 2374 2375 bo = mapping->bo_va->base.bo; 2376 if (READ_ONCE(bo->tbo.resv->lock.ctx) != ticket) 2377 continue; 2378 } 2379 2380 trace_amdgpu_vm_bo_cs(mapping); 2381 } 2382 } 2383 2384 /** 2385 * amdgpu_vm_bo_rmv - remove a bo to a specific vm 2386 * 2387 * @adev: amdgpu_device pointer 2388 * @bo_va: requested bo_va 2389 * 2390 * Remove @bo_va->bo from the requested vm. 2391 * 2392 * Object have to be reserved! 2393 */ 2394 void amdgpu_vm_bo_rmv(struct amdgpu_device *adev, 2395 struct amdgpu_bo_va *bo_va) 2396 { 2397 struct amdgpu_bo_va_mapping *mapping, *next; 2398 struct amdgpu_vm *vm = bo_va->base.vm; 2399 2400 list_del(&bo_va->base.bo_list); 2401 2402 spin_lock(&vm->moved_lock); 2403 list_del(&bo_va->base.vm_status); 2404 spin_unlock(&vm->moved_lock); 2405 2406 list_for_each_entry_safe(mapping, next, &bo_va->valids, list) { 2407 list_del(&mapping->list); 2408 amdgpu_vm_it_remove(mapping, &vm->va); 2409 mapping->bo_va = NULL; 2410 trace_amdgpu_vm_bo_unmap(bo_va, mapping); 2411 list_add(&mapping->list, &vm->freed); 2412 } 2413 list_for_each_entry_safe(mapping, next, &bo_va->invalids, list) { 2414 list_del(&mapping->list); 2415 amdgpu_vm_it_remove(mapping, &vm->va); 2416 amdgpu_vm_free_mapping(adev, vm, mapping, 2417 bo_va->last_pt_update); 2418 } 2419 2420 dma_fence_put(bo_va->last_pt_update); 2421 kfree(bo_va); 2422 } 2423 2424 /** 2425 * amdgpu_vm_bo_invalidate - mark the bo as invalid 2426 * 2427 * @adev: amdgpu_device pointer 2428 * @bo: amdgpu buffer object 2429 * @evicted: is the BO evicted 2430 * 2431 * Mark @bo as invalid. 2432 */ 2433 void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev, 2434 struct amdgpu_bo *bo, bool evicted) 2435 { 2436 struct amdgpu_vm_bo_base *bo_base; 2437 2438 /* shadow bo doesn't have bo base, its validation needs its parent */ 2439 if (bo->parent && bo->parent->shadow == bo) 2440 bo = bo->parent; 2441 2442 list_for_each_entry(bo_base, &bo->va, bo_list) { 2443 struct amdgpu_vm *vm = bo_base->vm; 2444 bool was_moved = bo_base->moved; 2445 2446 bo_base->moved = true; 2447 if (evicted && bo->tbo.resv == vm->root.base.bo->tbo.resv) { 2448 if (bo->tbo.type == ttm_bo_type_kernel) 2449 list_move(&bo_base->vm_status, &vm->evicted); 2450 else 2451 list_move_tail(&bo_base->vm_status, 2452 &vm->evicted); 2453 continue; 2454 } 2455 2456 if (was_moved) 2457 continue; 2458 2459 if (bo->tbo.type == ttm_bo_type_kernel) { 2460 list_move(&bo_base->vm_status, &vm->relocated); 2461 } else { 2462 spin_lock(&bo_base->vm->moved_lock); 2463 list_move(&bo_base->vm_status, &vm->moved); 2464 spin_unlock(&bo_base->vm->moved_lock); 2465 } 2466 } 2467 } 2468 2469 /** 2470 * amdgpu_vm_get_block_size - calculate VM page table size as power of two 2471 * 2472 * @vm_size: VM size 2473 * 2474 * Returns: 2475 * VM page table as power of two 2476 */ 2477 static uint32_t amdgpu_vm_get_block_size(uint64_t vm_size) 2478 { 2479 /* Total bits covered by PD + PTs */ 2480 unsigned bits = ilog2(vm_size) + 18; 2481 2482 /* Make sure the PD is 4K in size up to 8GB address space. 2483 Above that split equal between PD and PTs */ 2484 if (vm_size <= 8) 2485 return (bits - 9); 2486 else 2487 return ((bits + 3) / 2); 2488 } 2489 2490 /** 2491 * amdgpu_vm_adjust_size - adjust vm size, block size and fragment size 2492 * 2493 * @adev: amdgpu_device pointer 2494 * @min_vm_size: the minimum vm size in GB if it's set auto 2495 * @fragment_size_default: Default PTE fragment size 2496 * @max_level: max VMPT level 2497 * @max_bits: max address space size in bits 2498 * 2499 */ 2500 void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint32_t min_vm_size, 2501 uint32_t fragment_size_default, unsigned max_level, 2502 unsigned max_bits) 2503 { 2504 unsigned int max_size = 1 << (max_bits - 30); 2505 unsigned int vm_size; 2506 uint64_t tmp; 2507 2508 /* adjust vm size first */ 2509 if (amdgpu_vm_size != -1) { 2510 vm_size = amdgpu_vm_size; 2511 if (vm_size > max_size) { 2512 dev_warn(adev->dev, "VM size (%d) too large, max is %u GB\n", 2513 amdgpu_vm_size, max_size); 2514 vm_size = max_size; 2515 } 2516 } else { 2517 struct sysinfo si; 2518 unsigned int phys_ram_gb; 2519 2520 /* Optimal VM size depends on the amount of physical 2521 * RAM available. Underlying requirements and 2522 * assumptions: 2523 * 2524 * - Need to map system memory and VRAM from all GPUs 2525 * - VRAM from other GPUs not known here 2526 * - Assume VRAM <= system memory 2527 * - On GFX8 and older, VM space can be segmented for 2528 * different MTYPEs 2529 * - Need to allow room for fragmentation, guard pages etc. 2530 * 2531 * This adds up to a rough guess of system memory x3. 2532 * Round up to power of two to maximize the available 2533 * VM size with the given page table size. 2534 */ 2535 si_meminfo(&si); 2536 phys_ram_gb = ((uint64_t)si.totalram * si.mem_unit + 2537 (1 << 30) - 1) >> 30; 2538 vm_size = roundup_pow_of_two( 2539 min(max(phys_ram_gb * 3, min_vm_size), max_size)); 2540 } 2541 2542 adev->vm_manager.max_pfn = (uint64_t)vm_size << 18; 2543 2544 tmp = roundup_pow_of_two(adev->vm_manager.max_pfn); 2545 if (amdgpu_vm_block_size != -1) 2546 tmp >>= amdgpu_vm_block_size - 9; 2547 tmp = DIV_ROUND_UP(fls64(tmp) - 1, 9) - 1; 2548 adev->vm_manager.num_level = min(max_level, (unsigned)tmp); 2549 switch (adev->vm_manager.num_level) { 2550 case 3: 2551 adev->vm_manager.root_level = AMDGPU_VM_PDB2; 2552 break; 2553 case 2: 2554 adev->vm_manager.root_level = AMDGPU_VM_PDB1; 2555 break; 2556 case 1: 2557 adev->vm_manager.root_level = AMDGPU_VM_PDB0; 2558 break; 2559 default: 2560 dev_err(adev->dev, "VMPT only supports 2~4+1 levels\n"); 2561 } 2562 /* block size depends on vm size and hw setup*/ 2563 if (amdgpu_vm_block_size != -1) 2564 adev->vm_manager.block_size = 2565 min((unsigned)amdgpu_vm_block_size, max_bits 2566 - AMDGPU_GPU_PAGE_SHIFT 2567 - 9 * adev->vm_manager.num_level); 2568 else if (adev->vm_manager.num_level > 1) 2569 adev->vm_manager.block_size = 9; 2570 else 2571 adev->vm_manager.block_size = amdgpu_vm_get_block_size(tmp); 2572 2573 if (amdgpu_vm_fragment_size == -1) 2574 adev->vm_manager.fragment_size = fragment_size_default; 2575 else 2576 adev->vm_manager.fragment_size = amdgpu_vm_fragment_size; 2577 2578 DRM_INFO("vm size is %u GB, %u levels, block size is %u-bit, fragment size is %u-bit\n", 2579 vm_size, adev->vm_manager.num_level + 1, 2580 adev->vm_manager.block_size, 2581 adev->vm_manager.fragment_size); 2582 } 2583 2584 /** 2585 * amdgpu_vm_init - initialize a vm instance 2586 * 2587 * @adev: amdgpu_device pointer 2588 * @vm: requested vm 2589 * @vm_context: Indicates if it GFX or Compute context 2590 * @pasid: Process address space identifier 2591 * 2592 * Init @vm fields. 2593 * 2594 * Returns: 2595 * 0 for success, error for failure. 2596 */ 2597 int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, 2598 int vm_context, unsigned int pasid) 2599 { 2600 struct amdgpu_bo_param bp; 2601 struct amdgpu_bo *root; 2602 const unsigned align = min(AMDGPU_VM_PTB_ALIGN_SIZE, 2603 AMDGPU_VM_PTE_COUNT(adev) * 8); 2604 unsigned ring_instance; 2605 struct amdgpu_ring *ring; 2606 struct drm_sched_rq *rq; 2607 unsigned long size; 2608 uint64_t flags; 2609 int r, i; 2610 2611 vm->va = RB_ROOT_CACHED; 2612 for (i = 0; i < AMDGPU_MAX_VMHUBS; i++) 2613 vm->reserved_vmid[i] = NULL; 2614 INIT_LIST_HEAD(&vm->evicted); 2615 INIT_LIST_HEAD(&vm->relocated); 2616 spin_lock_init(&vm->moved_lock); 2617 INIT_LIST_HEAD(&vm->moved); 2618 INIT_LIST_HEAD(&vm->idle); 2619 INIT_LIST_HEAD(&vm->freed); 2620 2621 /* create scheduler entity for page table updates */ 2622 2623 ring_instance = atomic_inc_return(&adev->vm_manager.vm_pte_next_ring); 2624 ring_instance %= adev->vm_manager.vm_pte_num_rings; 2625 ring = adev->vm_manager.vm_pte_rings[ring_instance]; 2626 rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_KERNEL]; 2627 r = drm_sched_entity_init(&vm->entity, &rq, 1, NULL); 2628 if (r) 2629 return r; 2630 2631 vm->pte_support_ats = false; 2632 2633 if (vm_context == AMDGPU_VM_CONTEXT_COMPUTE) { 2634 vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode & 2635 AMDGPU_VM_USE_CPU_FOR_COMPUTE); 2636 2637 if (adev->asic_type == CHIP_RAVEN) 2638 vm->pte_support_ats = true; 2639 } else { 2640 vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode & 2641 AMDGPU_VM_USE_CPU_FOR_GFX); 2642 } 2643 DRM_DEBUG_DRIVER("VM update mode is %s\n", 2644 vm->use_cpu_for_update ? "CPU" : "SDMA"); 2645 WARN_ONCE((vm->use_cpu_for_update & !amdgpu_gmc_vram_full_visible(&adev->gmc)), 2646 "CPU update of VM recommended only for large BAR system\n"); 2647 vm->last_update = NULL; 2648 2649 flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; 2650 if (vm->use_cpu_for_update) 2651 flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; 2652 else if (vm_context != AMDGPU_VM_CONTEXT_COMPUTE) 2653 flags |= AMDGPU_GEM_CREATE_SHADOW; 2654 2655 size = amdgpu_vm_bo_size(adev, adev->vm_manager.root_level); 2656 memset(&bp, 0, sizeof(bp)); 2657 bp.size = size; 2658 bp.byte_align = align; 2659 bp.domain = AMDGPU_GEM_DOMAIN_VRAM; 2660 bp.flags = flags; 2661 bp.type = ttm_bo_type_kernel; 2662 bp.resv = NULL; 2663 r = amdgpu_bo_create(adev, &bp, &root); 2664 if (r) 2665 goto error_free_sched_entity; 2666 2667 r = amdgpu_bo_reserve(root, true); 2668 if (r) 2669 goto error_free_root; 2670 2671 r = amdgpu_vm_clear_bo(adev, vm, root, 2672 adev->vm_manager.root_level, 2673 vm->pte_support_ats); 2674 if (r) 2675 goto error_unreserve; 2676 2677 amdgpu_vm_bo_base_init(&vm->root.base, vm, root); 2678 amdgpu_bo_unreserve(vm->root.base.bo); 2679 2680 if (pasid) { 2681 unsigned long flags; 2682 2683 spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags); 2684 r = idr_alloc(&adev->vm_manager.pasid_idr, vm, pasid, pasid + 1, 2685 GFP_ATOMIC); 2686 spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags); 2687 if (r < 0) 2688 goto error_free_root; 2689 2690 vm->pasid = pasid; 2691 } 2692 2693 INIT_KFIFO(vm->faults); 2694 vm->fault_credit = 16; 2695 2696 return 0; 2697 2698 error_unreserve: 2699 amdgpu_bo_unreserve(vm->root.base.bo); 2700 2701 error_free_root: 2702 amdgpu_bo_unref(&vm->root.base.bo->shadow); 2703 amdgpu_bo_unref(&vm->root.base.bo); 2704 vm->root.base.bo = NULL; 2705 2706 error_free_sched_entity: 2707 drm_sched_entity_destroy(&vm->entity); 2708 2709 return r; 2710 } 2711 2712 /** 2713 * amdgpu_vm_make_compute - Turn a GFX VM into a compute VM 2714 * 2715 * @adev: amdgpu_device pointer 2716 * @vm: requested vm 2717 * 2718 * This only works on GFX VMs that don't have any BOs added and no 2719 * page tables allocated yet. 2720 * 2721 * Changes the following VM parameters: 2722 * - use_cpu_for_update 2723 * - pte_supports_ats 2724 * - pasid (old PASID is released, because compute manages its own PASIDs) 2725 * 2726 * Reinitializes the page directory to reflect the changed ATS 2727 * setting. 2728 * 2729 * Returns: 2730 * 0 for success, -errno for errors. 2731 */ 2732 int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm) 2733 { 2734 bool pte_support_ats = (adev->asic_type == CHIP_RAVEN); 2735 int r; 2736 2737 r = amdgpu_bo_reserve(vm->root.base.bo, true); 2738 if (r) 2739 return r; 2740 2741 /* Sanity checks */ 2742 if (!RB_EMPTY_ROOT(&vm->va.rb_root) || vm->root.entries) { 2743 r = -EINVAL; 2744 goto error; 2745 } 2746 2747 /* Check if PD needs to be reinitialized and do it before 2748 * changing any other state, in case it fails. 2749 */ 2750 if (pte_support_ats != vm->pte_support_ats) { 2751 r = amdgpu_vm_clear_bo(adev, vm, vm->root.base.bo, 2752 adev->vm_manager.root_level, 2753 pte_support_ats); 2754 if (r) 2755 goto error; 2756 } 2757 2758 /* Update VM state */ 2759 vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode & 2760 AMDGPU_VM_USE_CPU_FOR_COMPUTE); 2761 vm->pte_support_ats = pte_support_ats; 2762 DRM_DEBUG_DRIVER("VM update mode is %s\n", 2763 vm->use_cpu_for_update ? "CPU" : "SDMA"); 2764 WARN_ONCE((vm->use_cpu_for_update & !amdgpu_gmc_vram_full_visible(&adev->gmc)), 2765 "CPU update of VM recommended only for large BAR system\n"); 2766 2767 if (vm->pasid) { 2768 unsigned long flags; 2769 2770 spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags); 2771 idr_remove(&adev->vm_manager.pasid_idr, vm->pasid); 2772 spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags); 2773 2774 vm->pasid = 0; 2775 } 2776 2777 /* Free the shadow bo for compute VM */ 2778 amdgpu_bo_unref(&vm->root.base.bo->shadow); 2779 2780 error: 2781 amdgpu_bo_unreserve(vm->root.base.bo); 2782 return r; 2783 } 2784 2785 /** 2786 * amdgpu_vm_free_levels - free PD/PT levels 2787 * 2788 * @adev: amdgpu device structure 2789 * @parent: PD/PT starting level to free 2790 * @level: level of parent structure 2791 * 2792 * Free the page directory or page table level and all sub levels. 2793 */ 2794 static void amdgpu_vm_free_levels(struct amdgpu_device *adev, 2795 struct amdgpu_vm_pt *parent, 2796 unsigned level) 2797 { 2798 unsigned i, num_entries = amdgpu_vm_num_entries(adev, level); 2799 2800 if (parent->base.bo) { 2801 list_del(&parent->base.bo_list); 2802 list_del(&parent->base.vm_status); 2803 amdgpu_bo_unref(&parent->base.bo->shadow); 2804 amdgpu_bo_unref(&parent->base.bo); 2805 } 2806 2807 if (parent->entries) 2808 for (i = 0; i < num_entries; i++) 2809 amdgpu_vm_free_levels(adev, &parent->entries[i], 2810 level + 1); 2811 2812 kvfree(parent->entries); 2813 } 2814 2815 /** 2816 * amdgpu_vm_fini - tear down a vm instance 2817 * 2818 * @adev: amdgpu_device pointer 2819 * @vm: requested vm 2820 * 2821 * Tear down @vm. 2822 * Unbind the VM and remove all bos from the vm bo list 2823 */ 2824 void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) 2825 { 2826 struct amdgpu_bo_va_mapping *mapping, *tmp; 2827 bool prt_fini_needed = !!adev->gmc.gmc_funcs->set_prt; 2828 struct amdgpu_bo *root; 2829 u64 fault; 2830 int i, r; 2831 2832 amdgpu_amdkfd_gpuvm_destroy_cb(adev, vm); 2833 2834 /* Clear pending page faults from IH when the VM is destroyed */ 2835 while (kfifo_get(&vm->faults, &fault)) 2836 amdgpu_ih_clear_fault(adev, fault); 2837 2838 if (vm->pasid) { 2839 unsigned long flags; 2840 2841 spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags); 2842 idr_remove(&adev->vm_manager.pasid_idr, vm->pasid); 2843 spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags); 2844 } 2845 2846 drm_sched_entity_destroy(&vm->entity); 2847 2848 if (!RB_EMPTY_ROOT(&vm->va.rb_root)) { 2849 dev_err(adev->dev, "still active bo inside vm\n"); 2850 } 2851 rbtree_postorder_for_each_entry_safe(mapping, tmp, 2852 &vm->va.rb_root, rb) { 2853 list_del(&mapping->list); 2854 amdgpu_vm_it_remove(mapping, &vm->va); 2855 kfree(mapping); 2856 } 2857 list_for_each_entry_safe(mapping, tmp, &vm->freed, list) { 2858 if (mapping->flags & AMDGPU_PTE_PRT && prt_fini_needed) { 2859 amdgpu_vm_prt_fini(adev, vm); 2860 prt_fini_needed = false; 2861 } 2862 2863 list_del(&mapping->list); 2864 amdgpu_vm_free_mapping(adev, vm, mapping, NULL); 2865 } 2866 2867 root = amdgpu_bo_ref(vm->root.base.bo); 2868 r = amdgpu_bo_reserve(root, true); 2869 if (r) { 2870 dev_err(adev->dev, "Leaking page tables because BO reservation failed\n"); 2871 } else { 2872 amdgpu_vm_free_levels(adev, &vm->root, 2873 adev->vm_manager.root_level); 2874 amdgpu_bo_unreserve(root); 2875 } 2876 amdgpu_bo_unref(&root); 2877 dma_fence_put(vm->last_update); 2878 for (i = 0; i < AMDGPU_MAX_VMHUBS; i++) 2879 amdgpu_vmid_free_reserved(adev, vm, i); 2880 } 2881 2882 /** 2883 * amdgpu_vm_pasid_fault_credit - Check fault credit for given PASID 2884 * 2885 * @adev: amdgpu_device pointer 2886 * @pasid: PASID do identify the VM 2887 * 2888 * This function is expected to be called in interrupt context. 2889 * 2890 * Returns: 2891 * True if there was fault credit, false otherwise 2892 */ 2893 bool amdgpu_vm_pasid_fault_credit(struct amdgpu_device *adev, 2894 unsigned int pasid) 2895 { 2896 struct amdgpu_vm *vm; 2897 2898 spin_lock(&adev->vm_manager.pasid_lock); 2899 vm = idr_find(&adev->vm_manager.pasid_idr, pasid); 2900 if (!vm) { 2901 /* VM not found, can't track fault credit */ 2902 spin_unlock(&adev->vm_manager.pasid_lock); 2903 return true; 2904 } 2905 2906 /* No lock needed. only accessed by IRQ handler */ 2907 if (!vm->fault_credit) { 2908 /* Too many faults in this VM */ 2909 spin_unlock(&adev->vm_manager.pasid_lock); 2910 return false; 2911 } 2912 2913 vm->fault_credit--; 2914 spin_unlock(&adev->vm_manager.pasid_lock); 2915 return true; 2916 } 2917 2918 /** 2919 * amdgpu_vm_manager_init - init the VM manager 2920 * 2921 * @adev: amdgpu_device pointer 2922 * 2923 * Initialize the VM manager structures 2924 */ 2925 void amdgpu_vm_manager_init(struct amdgpu_device *adev) 2926 { 2927 unsigned i; 2928 2929 amdgpu_vmid_mgr_init(adev); 2930 2931 adev->vm_manager.fence_context = 2932 dma_fence_context_alloc(AMDGPU_MAX_RINGS); 2933 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) 2934 adev->vm_manager.seqno[i] = 0; 2935 2936 atomic_set(&adev->vm_manager.vm_pte_next_ring, 0); 2937 spin_lock_init(&adev->vm_manager.prt_lock); 2938 atomic_set(&adev->vm_manager.num_prt_users, 0); 2939 2940 /* If not overridden by the user, by default, only in large BAR systems 2941 * Compute VM tables will be updated by CPU 2942 */ 2943 #ifdef CONFIG_X86_64 2944 if (amdgpu_vm_update_mode == -1) { 2945 if (amdgpu_gmc_vram_full_visible(&adev->gmc)) 2946 adev->vm_manager.vm_update_mode = 2947 AMDGPU_VM_USE_CPU_FOR_COMPUTE; 2948 else 2949 adev->vm_manager.vm_update_mode = 0; 2950 } else 2951 adev->vm_manager.vm_update_mode = amdgpu_vm_update_mode; 2952 #else 2953 adev->vm_manager.vm_update_mode = 0; 2954 #endif 2955 2956 idr_init(&adev->vm_manager.pasid_idr); 2957 spin_lock_init(&adev->vm_manager.pasid_lock); 2958 } 2959 2960 /** 2961 * amdgpu_vm_manager_fini - cleanup VM manager 2962 * 2963 * @adev: amdgpu_device pointer 2964 * 2965 * Cleanup the VM manager and free resources. 2966 */ 2967 void amdgpu_vm_manager_fini(struct amdgpu_device *adev) 2968 { 2969 WARN_ON(!idr_is_empty(&adev->vm_manager.pasid_idr)); 2970 idr_destroy(&adev->vm_manager.pasid_idr); 2971 2972 amdgpu_vmid_mgr_fini(adev); 2973 } 2974 2975 /** 2976 * amdgpu_vm_ioctl - Manages VMID reservation for vm hubs. 2977 * 2978 * @dev: drm device pointer 2979 * @data: drm_amdgpu_vm 2980 * @filp: drm file pointer 2981 * 2982 * Returns: 2983 * 0 for success, -errno for errors. 2984 */ 2985 int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) 2986 { 2987 union drm_amdgpu_vm *args = data; 2988 struct amdgpu_device *adev = dev->dev_private; 2989 struct amdgpu_fpriv *fpriv = filp->driver_priv; 2990 int r; 2991 2992 switch (args->in.op) { 2993 case AMDGPU_VM_OP_RESERVE_VMID: 2994 /* current, we only have requirement to reserve vmid from gfxhub */ 2995 r = amdgpu_vmid_alloc_reserved(adev, &fpriv->vm, AMDGPU_GFXHUB); 2996 if (r) 2997 return r; 2998 break; 2999 case AMDGPU_VM_OP_UNRESERVE_VMID: 3000 amdgpu_vmid_free_reserved(adev, &fpriv->vm, AMDGPU_GFXHUB); 3001 break; 3002 default: 3003 return -EINVAL; 3004 } 3005 3006 return 0; 3007 } 3008 3009 /** 3010 * amdgpu_vm_get_task_info - Extracts task info for a PASID. 3011 * 3012 * @dev: drm device pointer 3013 * @pasid: PASID identifier for VM 3014 * @task_info: task_info to fill. 3015 */ 3016 void amdgpu_vm_get_task_info(struct amdgpu_device *adev, unsigned int pasid, 3017 struct amdgpu_task_info *task_info) 3018 { 3019 struct amdgpu_vm *vm; 3020 unsigned long flags; 3021 3022 spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags); 3023 3024 vm = idr_find(&adev->vm_manager.pasid_idr, pasid); 3025 if (vm) 3026 *task_info = vm->task_info; 3027 3028 spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags); 3029 } 3030 3031 /** 3032 * amdgpu_vm_set_task_info - Sets VMs task info. 3033 * 3034 * @vm: vm for which to set the info 3035 */ 3036 void amdgpu_vm_set_task_info(struct amdgpu_vm *vm) 3037 { 3038 if (!vm->task_info.pid) { 3039 vm->task_info.pid = current->pid; 3040 get_task_comm(vm->task_info.task_name, current); 3041 3042 if (current->group_leader->mm == current->mm) { 3043 vm->task_info.tgid = current->group_leader->pid; 3044 get_task_comm(vm->task_info.process_name, current->group_leader); 3045 } 3046 } 3047 } 3048