1 /* $NetBSD: amdgpu_ttm.c,v 1.10 2021/12/19 12:31:45 riastradh Exp $ */ 2 3 /* 4 * Copyright 2009 Jerome Glisse. 5 * All Rights Reserved. 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a 8 * copy of this software and associated documentation files (the 9 * "Software"), to deal in the Software without restriction, including 10 * without limitation the rights to use, copy, modify, merge, publish, 11 * distribute, sub license, and/or sell copies of the Software, and to 12 * permit persons to whom the Software is furnished to do so, subject to 13 * the following conditions: 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, 19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21 * USE OR OTHER DEALINGS IN THE SOFTWARE. 22 * 23 * The above copyright notice and this permission notice (including the 24 * next paragraph) shall be included in all copies or substantial portions 25 * of the Software. 26 * 27 */ 28 /* 29 * Authors: 30 * Jerome Glisse <glisse@freedesktop.org> 31 * Thomas Hellstrom <thomas-at-tungstengraphics-dot-com> 32 * Dave Airlie 33 */ 34 35 #include <sys/cdefs.h> 36 __KERNEL_RCSID(0, "$NetBSD: amdgpu_ttm.c,v 1.10 2021/12/19 12:31:45 riastradh Exp $"); 37 38 #include <linux/dma-mapping.h> 39 #include <linux/iommu.h> 40 #include <linux/hmm.h> 41 #include <linux/pagemap.h> 42 #include <linux/sched/task.h> 43 #include <linux/sched/mm.h> 44 #include <linux/seq_file.h> 45 #include <linux/slab.h> 46 #include <linux/swap.h> 47 #include <linux/swiotlb.h> 48 #include <linux/dma-buf.h> 49 #include <linux/sizes.h> 50 51 #include <drm/ttm/ttm_bo_api.h> 52 #include <drm/ttm/ttm_bo_driver.h> 53 #include <drm/ttm/ttm_placement.h> 54 #include <drm/ttm/ttm_module.h> 55 #include <drm/ttm/ttm_page_alloc.h> 56 57 #include <drm/drm_debugfs.h> 58 #include <drm/amdgpu_drm.h> 59 60 #include "amdgpu.h" 61 #include "amdgpu_object.h" 62 #include "amdgpu_trace.h" 63 #include "amdgpu_amdkfd.h" 64 #include "amdgpu_sdma.h" 65 #include "amdgpu_ras.h" 66 #include "bif/bif_4_1_d.h" 67 68 #include <linux/nbsd-namespace.h> 69 70 static int amdgpu_map_buffer(struct ttm_buffer_object *bo, 71 struct ttm_mem_reg *mem, unsigned num_pages, 72 uint64_t offset, unsigned window, 73 struct amdgpu_ring *ring, 74 uint64_t *addr); 75 76 static int amdgpu_ttm_debugfs_init(struct amdgpu_device *adev); 77 static void amdgpu_ttm_debugfs_fini(struct amdgpu_device *adev); 78 79 static int amdgpu_invalidate_caches(struct ttm_bo_device *bdev, uint32_t flags) 80 { 81 return 0; 82 } 83 84 /** 85 * amdgpu_init_mem_type - Initialize a memory manager for a specific type of 86 * memory request. 87 * 88 * @bdev: The TTM BO device object (contains a reference to amdgpu_device) 89 * @type: The type of memory requested 90 * @man: The memory type manager for each domain 91 * 92 * This is called by ttm_bo_init_mm() when a buffer object is being 93 * initialized. 94 */ 95 static int amdgpu_init_mem_type(struct ttm_bo_device *bdev, uint32_t type, 96 struct ttm_mem_type_manager *man) 97 { 98 struct amdgpu_device *adev; 99 100 adev = amdgpu_ttm_adev(bdev); 101 102 switch (type) { 103 case TTM_PL_SYSTEM: 104 /* System memory */ 105 man->flags = TTM_MEMTYPE_FLAG_MAPPABLE; 106 man->available_caching = TTM_PL_MASK_CACHING; 107 man->default_caching = TTM_PL_FLAG_CACHED; 108 break; 109 case TTM_PL_TT: 110 /* GTT memory */ 111 man->func = &amdgpu_gtt_mgr_func; 112 man->gpu_offset = adev->gmc.gart_start; 113 man->available_caching = TTM_PL_MASK_CACHING; 114 man->default_caching = TTM_PL_FLAG_CACHED; 115 man->flags = TTM_MEMTYPE_FLAG_MAPPABLE | TTM_MEMTYPE_FLAG_CMA; 116 break; 117 case TTM_PL_VRAM: 118 /* "On-card" video ram */ 119 man->func = &amdgpu_vram_mgr_func; 120 man->gpu_offset = adev->gmc.vram_start; 121 man->flags = TTM_MEMTYPE_FLAG_FIXED | 122 TTM_MEMTYPE_FLAG_MAPPABLE; 123 man->available_caching = TTM_PL_FLAG_UNCACHED | TTM_PL_FLAG_WC; 124 man->default_caching = TTM_PL_FLAG_WC; 125 break; 126 case AMDGPU_PL_GDS: 127 case AMDGPU_PL_GWS: 128 case AMDGPU_PL_OA: 129 /* On-chip GDS memory*/ 130 man->func = &ttm_bo_manager_func; 131 man->gpu_offset = 0; 132 man->flags = TTM_MEMTYPE_FLAG_FIXED | TTM_MEMTYPE_FLAG_CMA; 133 man->available_caching = TTM_PL_FLAG_UNCACHED; 134 man->default_caching = TTM_PL_FLAG_UNCACHED; 135 break; 136 default: 137 DRM_ERROR("Unsupported memory type %u\n", (unsigned)type); 138 return -EINVAL; 139 } 140 return 0; 141 } 142 143 /** 144 * amdgpu_evict_flags - Compute placement flags 145 * 146 * @bo: The buffer object to evict 147 * @placement: Possible destination(s) for evicted BO 148 * 149 * Fill in placement data when ttm_bo_evict() is called 150 */ 151 static void amdgpu_evict_flags(struct ttm_buffer_object *bo, 152 struct ttm_placement *placement) 153 { 154 struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev); 155 struct amdgpu_bo *abo; 156 static const struct ttm_place placements = { 157 .fpfn = 0, 158 .lpfn = 0, 159 .flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM 160 }; 161 162 /* Don't handle scatter gather BOs */ 163 if (bo->type == ttm_bo_type_sg) { 164 placement->num_placement = 0; 165 placement->num_busy_placement = 0; 166 return; 167 } 168 169 /* Object isn't an AMDGPU object so ignore */ 170 if (!amdgpu_bo_is_amdgpu_bo(bo)) { 171 placement->placement = &placements; 172 placement->busy_placement = &placements; 173 placement->num_placement = 1; 174 placement->num_busy_placement = 1; 175 return; 176 } 177 178 abo = ttm_to_amdgpu_bo(bo); 179 switch (bo->mem.mem_type) { 180 case AMDGPU_PL_GDS: 181 case AMDGPU_PL_GWS: 182 case AMDGPU_PL_OA: 183 placement->num_placement = 0; 184 placement->num_busy_placement = 0; 185 return; 186 187 case TTM_PL_VRAM: 188 if (!adev->mman.buffer_funcs_enabled) { 189 /* Move to system memory */ 190 amdgpu_bo_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_CPU); 191 } else if (!amdgpu_gmc_vram_full_visible(&adev->gmc) && 192 !(abo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) && 193 amdgpu_bo_in_cpu_visible_vram(abo)) { 194 195 /* Try evicting to the CPU inaccessible part of VRAM 196 * first, but only set GTT as busy placement, so this 197 * BO will be evicted to GTT rather than causing other 198 * BOs to be evicted from VRAM 199 */ 200 amdgpu_bo_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_VRAM | 201 AMDGPU_GEM_DOMAIN_GTT); 202 abo->placements[0].fpfn = adev->gmc.visible_vram_size >> PAGE_SHIFT; 203 abo->placements[0].lpfn = 0; 204 abo->placement.busy_placement = &abo->placements[1]; 205 abo->placement.num_busy_placement = 1; 206 } else { 207 /* Move to GTT memory */ 208 amdgpu_bo_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_GTT); 209 } 210 break; 211 case TTM_PL_TT: 212 default: 213 amdgpu_bo_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_CPU); 214 break; 215 } 216 *placement = abo->placement; 217 } 218 219 /** 220 * amdgpu_verify_access - Verify access for a mmap call 221 * 222 * @bo: The buffer object to map 223 * @filp: The file pointer from the process performing the mmap 224 * 225 * This is called by ttm_bo_mmap() to verify whether a process 226 * has the right to mmap a BO to their process space. 227 */ 228 static int amdgpu_verify_access(struct ttm_buffer_object *bo, struct file *filp) 229 { 230 struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo); 231 232 /* 233 * Don't verify access for KFD BOs. They don't have a GEM 234 * object associated with them. 235 */ 236 if (abo->kfd_bo) 237 return 0; 238 239 if (amdgpu_ttm_tt_get_usermm(bo->ttm)) 240 return -EPERM; 241 #ifdef __NetBSD__ 242 return drm_vma_node_verify_access(&abo->tbo.base.vma_node, 243 filp->f_data); 244 #else 245 return drm_vma_node_verify_access(&abo->tbo.base.vma_node, 246 filp->private_data); 247 #endif 248 } 249 250 /** 251 * amdgpu_move_null - Register memory for a buffer object 252 * 253 * @bo: The bo to assign the memory to 254 * @new_mem: The memory to be assigned. 255 * 256 * Assign the memory from new_mem to the memory of the buffer object bo. 257 */ 258 static void amdgpu_move_null(struct ttm_buffer_object *bo, 259 struct ttm_mem_reg *new_mem) 260 { 261 struct ttm_mem_reg *old_mem = &bo->mem; 262 263 BUG_ON(old_mem->mm_node != NULL); 264 *old_mem = *new_mem; 265 new_mem->mm_node = NULL; 266 } 267 268 /** 269 * amdgpu_mm_node_addr - Compute the GPU relative offset of a GTT buffer. 270 * 271 * @bo: The bo to assign the memory to. 272 * @mm_node: Memory manager node for drm allocator. 273 * @mem: The region where the bo resides. 274 * 275 */ 276 static uint64_t amdgpu_mm_node_addr(struct ttm_buffer_object *bo, 277 struct drm_mm_node *mm_node, 278 struct ttm_mem_reg *mem) 279 { 280 uint64_t addr = 0; 281 282 if (mm_node->start != AMDGPU_BO_INVALID_OFFSET) { 283 addr = mm_node->start << PAGE_SHIFT; 284 addr += bo->bdev->man[mem->mem_type].gpu_offset; 285 } 286 return addr; 287 } 288 289 /** 290 * amdgpu_find_mm_node - Helper function finds the drm_mm_node corresponding to 291 * @offset. It also modifies the offset to be within the drm_mm_node returned 292 * 293 * @mem: The region where the bo resides. 294 * @offset: The offset that drm_mm_node is used for finding. 295 * 296 */ 297 static struct drm_mm_node *amdgpu_find_mm_node(struct ttm_mem_reg *mem, 298 unsigned long *offset) 299 { 300 struct drm_mm_node *mm_node = mem->mm_node; 301 302 while (*offset >= (mm_node->size << PAGE_SHIFT)) { 303 *offset -= (mm_node->size << PAGE_SHIFT); 304 ++mm_node; 305 } 306 return mm_node; 307 } 308 309 /** 310 * amdgpu_copy_ttm_mem_to_mem - Helper function for copy 311 * 312 * The function copies @size bytes from {src->mem + src->offset} to 313 * {dst->mem + dst->offset}. src->bo and dst->bo could be same BO for a 314 * move and different for a BO to BO copy. 315 * 316 * @f: Returns the last fence if multiple jobs are submitted. 317 */ 318 int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev, 319 struct amdgpu_copy_mem *src, 320 struct amdgpu_copy_mem *dst, 321 uint64_t size, 322 struct dma_resv *resv, 323 struct dma_fence **f) 324 { 325 struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; 326 struct drm_mm_node *src_mm, *dst_mm; 327 uint64_t src_node_start, dst_node_start, src_node_size, 328 dst_node_size, src_page_offset, dst_page_offset; 329 struct dma_fence *fence = NULL; 330 int r = 0; 331 const uint64_t GTT_MAX_BYTES = (AMDGPU_GTT_MAX_TRANSFER_SIZE * 332 AMDGPU_GPU_PAGE_SIZE); 333 334 if (!adev->mman.buffer_funcs_enabled) { 335 DRM_ERROR("Trying to move memory with ring turned off.\n"); 336 return -EINVAL; 337 } 338 339 src_mm = amdgpu_find_mm_node(src->mem, &src->offset); 340 src_node_start = amdgpu_mm_node_addr(src->bo, src_mm, src->mem) + 341 src->offset; 342 src_node_size = (src_mm->size << PAGE_SHIFT) - src->offset; 343 src_page_offset = src_node_start & (PAGE_SIZE - 1); 344 345 dst_mm = amdgpu_find_mm_node(dst->mem, &dst->offset); 346 dst_node_start = amdgpu_mm_node_addr(dst->bo, dst_mm, dst->mem) + 347 dst->offset; 348 dst_node_size = (dst_mm->size << PAGE_SHIFT) - dst->offset; 349 dst_page_offset = dst_node_start & (PAGE_SIZE - 1); 350 351 mutex_lock(&adev->mman.gtt_window_lock); 352 353 while (size) { 354 unsigned long cur_size; 355 uint64_t from = src_node_start, to = dst_node_start; 356 struct dma_fence *next; 357 358 /* Copy size cannot exceed GTT_MAX_BYTES. So if src or dst 359 * begins at an offset, then adjust the size accordingly 360 */ 361 cur_size = min3(min(src_node_size, dst_node_size), size, 362 GTT_MAX_BYTES); 363 if (cur_size + src_page_offset > GTT_MAX_BYTES || 364 cur_size + dst_page_offset > GTT_MAX_BYTES) 365 cur_size -= max(src_page_offset, dst_page_offset); 366 367 /* Map only what needs to be accessed. Map src to window 0 and 368 * dst to window 1 369 */ 370 if (src->mem->start == AMDGPU_BO_INVALID_OFFSET) { 371 r = amdgpu_map_buffer(src->bo, src->mem, 372 PFN_UP(cur_size + src_page_offset), 373 src_node_start, 0, ring, 374 &from); 375 if (r) 376 goto error; 377 /* Adjust the offset because amdgpu_map_buffer returns 378 * start of mapped page 379 */ 380 from += src_page_offset; 381 } 382 383 if (dst->mem->start == AMDGPU_BO_INVALID_OFFSET) { 384 r = amdgpu_map_buffer(dst->bo, dst->mem, 385 PFN_UP(cur_size + dst_page_offset), 386 dst_node_start, 1, ring, 387 &to); 388 if (r) 389 goto error; 390 to += dst_page_offset; 391 } 392 393 r = amdgpu_copy_buffer(ring, from, to, cur_size, 394 resv, &next, false, true); 395 if (r) 396 goto error; 397 398 dma_fence_put(fence); 399 fence = next; 400 401 size -= cur_size; 402 if (!size) 403 break; 404 405 src_node_size -= cur_size; 406 if (!src_node_size) { 407 src_node_start = amdgpu_mm_node_addr(src->bo, ++src_mm, 408 src->mem); 409 src_node_size = (src_mm->size << PAGE_SHIFT); 410 src_page_offset = 0; 411 } else { 412 src_node_start += cur_size; 413 src_page_offset = src_node_start & (PAGE_SIZE - 1); 414 } 415 dst_node_size -= cur_size; 416 if (!dst_node_size) { 417 dst_node_start = amdgpu_mm_node_addr(dst->bo, ++dst_mm, 418 dst->mem); 419 dst_node_size = (dst_mm->size << PAGE_SHIFT); 420 dst_page_offset = 0; 421 } else { 422 dst_node_start += cur_size; 423 dst_page_offset = dst_node_start & (PAGE_SIZE - 1); 424 } 425 } 426 error: 427 mutex_unlock(&adev->mman.gtt_window_lock); 428 if (f) 429 *f = dma_fence_get(fence); 430 dma_fence_put(fence); 431 return r; 432 } 433 434 /** 435 * amdgpu_move_blit - Copy an entire buffer to another buffer 436 * 437 * This is a helper called by amdgpu_bo_move() and amdgpu_move_vram_ram() to 438 * help move buffers to and from VRAM. 439 */ 440 static int amdgpu_move_blit(struct ttm_buffer_object *bo, 441 bool evict, bool no_wait_gpu, 442 struct ttm_mem_reg *new_mem, 443 struct ttm_mem_reg *old_mem) 444 { 445 struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev); 446 struct amdgpu_copy_mem src, dst; 447 struct dma_fence *fence = NULL; 448 int r; 449 450 src.bo = bo; 451 dst.bo = bo; 452 src.mem = old_mem; 453 dst.mem = new_mem; 454 src.offset = 0; 455 dst.offset = 0; 456 457 r = amdgpu_ttm_copy_mem_to_mem(adev, &src, &dst, 458 new_mem->num_pages << PAGE_SHIFT, 459 bo->base.resv, &fence); 460 if (r) 461 goto error; 462 463 /* clear the space being freed */ 464 if (old_mem->mem_type == TTM_PL_VRAM && 465 (ttm_to_amdgpu_bo(bo)->flags & 466 AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE)) { 467 struct dma_fence *wipe_fence = NULL; 468 469 r = amdgpu_fill_buffer(ttm_to_amdgpu_bo(bo), AMDGPU_POISON, 470 NULL, &wipe_fence); 471 if (r) { 472 goto error; 473 } else if (wipe_fence) { 474 dma_fence_put(fence); 475 fence = wipe_fence; 476 } 477 } 478 479 /* Always block for VM page tables before committing the new location */ 480 if (bo->type == ttm_bo_type_kernel) 481 r = ttm_bo_move_accel_cleanup(bo, fence, true, new_mem); 482 else 483 r = ttm_bo_pipeline_move(bo, fence, evict, new_mem); 484 dma_fence_put(fence); 485 return r; 486 487 error: 488 if (fence) 489 dma_fence_wait(fence, false); 490 dma_fence_put(fence); 491 return r; 492 } 493 494 /** 495 * amdgpu_move_vram_ram - Copy VRAM buffer to RAM buffer 496 * 497 * Called by amdgpu_bo_move(). 498 */ 499 static int amdgpu_move_vram_ram(struct ttm_buffer_object *bo, bool evict, 500 struct ttm_operation_ctx *ctx, 501 struct ttm_mem_reg *new_mem) 502 { 503 struct ttm_mem_reg *old_mem = &bo->mem; 504 struct ttm_mem_reg tmp_mem; 505 struct ttm_place placements; 506 struct ttm_placement placement; 507 int r; 508 509 /* create space/pages for new_mem in GTT space */ 510 tmp_mem = *new_mem; 511 tmp_mem.mm_node = NULL; 512 placement.num_placement = 1; 513 placement.placement = &placements; 514 placement.num_busy_placement = 1; 515 placement.busy_placement = &placements; 516 placements.fpfn = 0; 517 placements.lpfn = 0; 518 placements.flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_TT; 519 r = ttm_bo_mem_space(bo, &placement, &tmp_mem, ctx); 520 if (unlikely(r)) { 521 pr_err("Failed to find GTT space for blit from VRAM\n"); 522 return r; 523 } 524 525 /* set caching flags */ 526 r = ttm_tt_set_placement_caching(bo->ttm, tmp_mem.placement); 527 if (unlikely(r)) { 528 goto out_cleanup; 529 } 530 531 /* Bind the memory to the GTT space */ 532 r = ttm_tt_bind(bo->ttm, &tmp_mem, ctx); 533 if (unlikely(r)) { 534 goto out_cleanup; 535 } 536 537 /* blit VRAM to GTT */ 538 r = amdgpu_move_blit(bo, evict, ctx->no_wait_gpu, &tmp_mem, old_mem); 539 if (unlikely(r)) { 540 goto out_cleanup; 541 } 542 543 /* move BO (in tmp_mem) to new_mem */ 544 r = ttm_bo_move_ttm(bo, ctx, new_mem); 545 out_cleanup: 546 ttm_bo_mem_put(bo, &tmp_mem); 547 return r; 548 } 549 550 /** 551 * amdgpu_move_ram_vram - Copy buffer from RAM to VRAM 552 * 553 * Called by amdgpu_bo_move(). 554 */ 555 static int amdgpu_move_ram_vram(struct ttm_buffer_object *bo, bool evict, 556 struct ttm_operation_ctx *ctx, 557 struct ttm_mem_reg *new_mem) 558 { 559 struct ttm_mem_reg *old_mem = &bo->mem; 560 struct ttm_mem_reg tmp_mem; 561 struct ttm_placement placement; 562 struct ttm_place placements; 563 int r; 564 565 /* make space in GTT for old_mem buffer */ 566 tmp_mem = *new_mem; 567 tmp_mem.mm_node = NULL; 568 placement.num_placement = 1; 569 placement.placement = &placements; 570 placement.num_busy_placement = 1; 571 placement.busy_placement = &placements; 572 placements.fpfn = 0; 573 placements.lpfn = 0; 574 placements.flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_TT; 575 r = ttm_bo_mem_space(bo, &placement, &tmp_mem, ctx); 576 if (unlikely(r)) { 577 pr_err("Failed to find GTT space for blit to VRAM\n"); 578 return r; 579 } 580 581 /* move/bind old memory to GTT space */ 582 r = ttm_bo_move_ttm(bo, ctx, &tmp_mem); 583 if (unlikely(r)) { 584 goto out_cleanup; 585 } 586 587 /* copy to VRAM */ 588 r = amdgpu_move_blit(bo, evict, ctx->no_wait_gpu, new_mem, old_mem); 589 if (unlikely(r)) { 590 goto out_cleanup; 591 } 592 out_cleanup: 593 ttm_bo_mem_put(bo, &tmp_mem); 594 return r; 595 } 596 597 /** 598 * amdgpu_mem_visible - Check that memory can be accessed by ttm_bo_move_memcpy 599 * 600 * Called by amdgpu_bo_move() 601 */ 602 static bool amdgpu_mem_visible(struct amdgpu_device *adev, 603 struct ttm_mem_reg *mem) 604 { 605 struct drm_mm_node *nodes = mem->mm_node; 606 607 if (mem->mem_type == TTM_PL_SYSTEM || 608 mem->mem_type == TTM_PL_TT) 609 return true; 610 if (mem->mem_type != TTM_PL_VRAM) 611 return false; 612 613 /* ttm_mem_reg_ioremap only supports contiguous memory */ 614 if (nodes->size != mem->num_pages) 615 return false; 616 617 return ((nodes->start + nodes->size) << PAGE_SHIFT) 618 <= adev->gmc.visible_vram_size; 619 } 620 621 /** 622 * amdgpu_bo_move - Move a buffer object to a new memory location 623 * 624 * Called by ttm_bo_handle_move_mem() 625 */ 626 static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict, 627 struct ttm_operation_ctx *ctx, 628 struct ttm_mem_reg *new_mem) 629 { 630 struct amdgpu_device *adev; 631 struct amdgpu_bo *abo; 632 struct ttm_mem_reg *old_mem = &bo->mem; 633 int r; 634 635 /* Can't move a pinned BO */ 636 abo = ttm_to_amdgpu_bo(bo); 637 if (WARN_ON_ONCE(abo->pin_count > 0)) 638 return -EINVAL; 639 640 adev = amdgpu_ttm_adev(bo->bdev); 641 642 if (old_mem->mem_type == TTM_PL_SYSTEM && bo->ttm == NULL) { 643 amdgpu_move_null(bo, new_mem); 644 return 0; 645 } 646 if ((old_mem->mem_type == TTM_PL_TT && 647 new_mem->mem_type == TTM_PL_SYSTEM) || 648 (old_mem->mem_type == TTM_PL_SYSTEM && 649 new_mem->mem_type == TTM_PL_TT)) { 650 /* bind is enough */ 651 amdgpu_move_null(bo, new_mem); 652 return 0; 653 } 654 if (old_mem->mem_type == AMDGPU_PL_GDS || 655 old_mem->mem_type == AMDGPU_PL_GWS || 656 old_mem->mem_type == AMDGPU_PL_OA || 657 new_mem->mem_type == AMDGPU_PL_GDS || 658 new_mem->mem_type == AMDGPU_PL_GWS || 659 new_mem->mem_type == AMDGPU_PL_OA) { 660 /* Nothing to save here */ 661 amdgpu_move_null(bo, new_mem); 662 return 0; 663 } 664 665 if (!adev->mman.buffer_funcs_enabled) { 666 r = -ENODEV; 667 goto memcpy; 668 } 669 670 if (old_mem->mem_type == TTM_PL_VRAM && 671 new_mem->mem_type == TTM_PL_SYSTEM) { 672 r = amdgpu_move_vram_ram(bo, evict, ctx, new_mem); 673 } else if (old_mem->mem_type == TTM_PL_SYSTEM && 674 new_mem->mem_type == TTM_PL_VRAM) { 675 r = amdgpu_move_ram_vram(bo, evict, ctx, new_mem); 676 } else { 677 r = amdgpu_move_blit(bo, evict, ctx->no_wait_gpu, 678 new_mem, old_mem); 679 } 680 681 if (r) { 682 memcpy: 683 /* Check that all memory is CPU accessible */ 684 if (!amdgpu_mem_visible(adev, old_mem) || 685 !amdgpu_mem_visible(adev, new_mem)) { 686 pr_err("Move buffer fallback to memcpy unavailable\n"); 687 return r; 688 } 689 690 r = ttm_bo_move_memcpy(bo, ctx, new_mem); 691 if (r) 692 return r; 693 } 694 695 if (bo->type == ttm_bo_type_device && 696 new_mem->mem_type == TTM_PL_VRAM && 697 old_mem->mem_type != TTM_PL_VRAM) { 698 /* amdgpu_bo_fault_reserve_notify will re-set this if the CPU 699 * accesses the BO after it's moved. 700 */ 701 abo->flags &= ~AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; 702 } 703 704 /* update statistics */ 705 atomic64_add((u64)bo->num_pages << PAGE_SHIFT, &adev->num_bytes_moved); 706 return 0; 707 } 708 709 /** 710 * amdgpu_ttm_io_mem_reserve - Reserve a block of memory during a fault 711 * 712 * Called by ttm_mem_io_reserve() ultimately via ttm_bo_vm_fault() 713 */ 714 static int amdgpu_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct ttm_mem_reg *mem) 715 { 716 struct ttm_mem_type_manager *man = &bdev->man[mem->mem_type]; 717 struct amdgpu_device *adev = amdgpu_ttm_adev(bdev); 718 struct drm_mm_node *mm_node = mem->mm_node; 719 720 mem->bus.addr = NULL; 721 mem->bus.offset = 0; 722 mem->bus.size = mem->num_pages << PAGE_SHIFT; 723 mem->bus.base = 0; 724 mem->bus.is_iomem = false; 725 if (!(man->flags & TTM_MEMTYPE_FLAG_MAPPABLE)) 726 return -EINVAL; 727 switch (mem->mem_type) { 728 case TTM_PL_SYSTEM: 729 /* system memory */ 730 return 0; 731 case TTM_PL_TT: 732 break; 733 case TTM_PL_VRAM: 734 mem->bus.offset = mem->start << PAGE_SHIFT; 735 /* check if it's visible */ 736 if ((mem->bus.offset + mem->bus.size) > adev->gmc.visible_vram_size) 737 return -EINVAL; 738 /* Only physically contiguous buffers apply. In a contiguous 739 * buffer, size of the first mm_node would match the number of 740 * pages in ttm_mem_reg. 741 */ 742 if (adev->mman.aper_base_kaddr && 743 (mm_node->size == mem->num_pages)) 744 mem->bus.addr = (u8 *)adev->mman.aper_base_kaddr + 745 mem->bus.offset; 746 747 mem->bus.base = adev->gmc.aper_base; 748 mem->bus.is_iomem = true; 749 break; 750 default: 751 return -EINVAL; 752 } 753 return 0; 754 } 755 756 static void amdgpu_ttm_io_mem_free(struct ttm_bo_device *bdev, struct ttm_mem_reg *mem) 757 { 758 } 759 760 static unsigned long amdgpu_ttm_io_mem_pfn(struct ttm_buffer_object *bo, 761 unsigned long page_offset) 762 { 763 struct drm_mm_node *mm; 764 unsigned long offset = (page_offset << PAGE_SHIFT); 765 766 mm = amdgpu_find_mm_node(&bo->mem, &offset); 767 return (bo->mem.bus.base >> PAGE_SHIFT) + mm->start + 768 (offset >> PAGE_SHIFT); 769 } 770 771 /* 772 * TTM backend functions. 773 */ 774 struct amdgpu_ttm_tt { 775 struct ttm_dma_tt ttm; 776 struct drm_gem_object *gobj; 777 u64 offset; 778 uint64_t userptr; 779 #ifdef __NetBSD__ 780 struct proc *usertask; 781 #else 782 struct task_struct *usertask; 783 #endif 784 uint32_t userflags; 785 #if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR) 786 struct hmm_range *range; 787 #endif 788 }; 789 790 #ifdef CONFIG_DRM_AMDGPU_USERPTR 791 /* flags used by HMM internal, not related to CPU/GPU PTE flags */ 792 static const uint64_t hmm_range_flags[HMM_PFN_FLAG_MAX] = { 793 (1 << 0), /* HMM_PFN_VALID */ 794 (1 << 1), /* HMM_PFN_WRITE */ 795 0 /* HMM_PFN_DEVICE_PRIVATE */ 796 }; 797 798 static const uint64_t hmm_range_values[HMM_PFN_VALUE_MAX] = { 799 0xfffffffffffffffeUL, /* HMM_PFN_ERROR */ 800 0, /* HMM_PFN_NONE */ 801 0xfffffffffffffffcUL /* HMM_PFN_SPECIAL */ 802 }; 803 804 /** 805 * amdgpu_ttm_tt_get_user_pages - get device accessible pages that back user 806 * memory and start HMM tracking CPU page table update 807 * 808 * Calling function must call amdgpu_ttm_tt_userptr_range_done() once and only 809 * once afterwards to stop HMM tracking 810 */ 811 int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages) 812 { 813 struct ttm_tt *ttm = bo->tbo.ttm; 814 struct amdgpu_ttm_tt *gtt = (void *)ttm; 815 unsigned long start = gtt->userptr; 816 struct vm_area_struct *vma; 817 struct hmm_range *range; 818 unsigned long timeout; 819 struct mm_struct *mm; 820 unsigned long i; 821 int r = 0; 822 823 mm = bo->notifier.mm; 824 if (unlikely(!mm)) { 825 DRM_DEBUG_DRIVER("BO is not registered?\n"); 826 return -EFAULT; 827 } 828 829 /* Another get_user_pages is running at the same time?? */ 830 if (WARN_ON(gtt->range)) 831 return -EFAULT; 832 833 if (!mmget_not_zero(mm)) /* Happens during process shutdown */ 834 return -ESRCH; 835 836 range = kzalloc(sizeof(*range), GFP_KERNEL); 837 if (unlikely(!range)) { 838 r = -ENOMEM; 839 goto out; 840 } 841 range->notifier = &bo->notifier; 842 range->flags = hmm_range_flags; 843 range->values = hmm_range_values; 844 range->pfn_shift = PAGE_SHIFT; 845 range->start = bo->notifier.interval_tree.start; 846 range->end = bo->notifier.interval_tree.last + 1; 847 range->default_flags = hmm_range_flags[HMM_PFN_VALID]; 848 if (!amdgpu_ttm_tt_is_readonly(ttm)) 849 range->default_flags |= range->flags[HMM_PFN_WRITE]; 850 851 range->pfns = kvmalloc_array(ttm->num_pages, sizeof(*range->pfns), 852 GFP_KERNEL); 853 if (unlikely(!range->pfns)) { 854 r = -ENOMEM; 855 goto out_free_ranges; 856 } 857 858 down_read(&mm->mmap_sem); 859 vma = find_vma(mm, start); 860 if (unlikely(!vma || start < vma->vm_start)) { 861 r = -EFAULT; 862 goto out_unlock; 863 } 864 if (unlikely((gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) && 865 vma->vm_file)) { 866 r = -EPERM; 867 goto out_unlock; 868 } 869 up_read(&mm->mmap_sem); 870 timeout = jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT); 871 872 retry: 873 range->notifier_seq = mmu_interval_read_begin(&bo->notifier); 874 875 down_read(&mm->mmap_sem); 876 r = hmm_range_fault(range, 0); 877 up_read(&mm->mmap_sem); 878 if (unlikely(r <= 0)) { 879 /* 880 * FIXME: This timeout should encompass the retry from 881 * mmu_interval_read_retry() as well. 882 */ 883 if ((r == 0 || r == -EBUSY) && !time_after(jiffies, timeout)) 884 goto retry; 885 goto out_free_pfns; 886 } 887 888 for (i = 0; i < ttm->num_pages; i++) { 889 /* FIXME: The pages cannot be touched outside the notifier_lock */ 890 pages[i] = hmm_device_entry_to_page(range, range->pfns[i]); 891 if (unlikely(!pages[i])) { 892 pr_err("Page fault failed for pfn[%lu] = 0x%llx\n", 893 i, range->pfns[i]); 894 r = -ENOMEM; 895 896 goto out_free_pfns; 897 } 898 } 899 900 gtt->range = range; 901 mmput(mm); 902 903 return 0; 904 905 out_unlock: 906 up_read(&mm->mmap_sem); 907 out_free_pfns: 908 kvfree(range->pfns); 909 out_free_ranges: 910 kfree(range); 911 out: 912 mmput(mm); 913 return r; 914 } 915 916 /** 917 * amdgpu_ttm_tt_userptr_range_done - stop HMM track the CPU page table change 918 * Check if the pages backing this ttm range have been invalidated 919 * 920 * Returns: true if pages are still valid 921 */ 922 bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm) 923 { 924 struct amdgpu_ttm_tt *gtt = (void *)ttm; 925 bool r = false; 926 927 if (!gtt || !gtt->userptr) 928 return false; 929 930 DRM_DEBUG_DRIVER("user_pages_done 0x%llx pages 0x%lx\n", 931 gtt->userptr, ttm->num_pages); 932 933 WARN_ONCE(!gtt->range || !gtt->range->pfns, 934 "No user pages to check\n"); 935 936 if (gtt->range) { 937 /* 938 * FIXME: Must always hold notifier_lock for this, and must 939 * not ignore the return code. 940 */ 941 r = mmu_interval_read_retry(gtt->range->notifier, 942 gtt->range->notifier_seq); 943 kvfree(gtt->range->pfns); 944 kfree(gtt->range); 945 gtt->range = NULL; 946 } 947 948 return !r; 949 } 950 #endif 951 952 /** 953 * amdgpu_ttm_tt_set_user_pages - Copy pages in, putting old pages as necessary. 954 * 955 * Called by amdgpu_cs_list_validate(). This creates the page list 956 * that backs user memory and will ultimately be mapped into the device 957 * address space. 958 */ 959 void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages) 960 { 961 unsigned long i; 962 963 for (i = 0; i < ttm->num_pages; ++i) 964 ttm->pages[i] = pages ? pages[i] : NULL; 965 } 966 967 /** 968 * amdgpu_ttm_tt_pin_userptr - prepare the sg table with the user pages 969 * 970 * Called by amdgpu_ttm_backend_bind() 971 **/ 972 static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm) 973 { 974 #ifdef __NetBSD__ /* XXX amdgpu userptr */ 975 return -ENODEV; 976 #else 977 struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev); 978 struct amdgpu_ttm_tt *gtt = (void *)ttm; 979 unsigned nents; 980 int r; 981 982 int write = !(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY); 983 enum dma_data_direction direction = write ? 984 DMA_BIDIRECTIONAL : DMA_TO_DEVICE; 985 986 /* Allocate an SG array and squash pages into it */ 987 r = sg_alloc_table_from_pages(ttm->sg, ttm->pages, ttm->num_pages, 0, 988 ttm->num_pages << PAGE_SHIFT, 989 GFP_KERNEL); 990 if (r) 991 goto release_sg; 992 993 /* Map SG to device */ 994 r = -ENOMEM; 995 nents = dma_map_sg(adev->dev, ttm->sg->sgl, ttm->sg->nents, direction); 996 if (nents != ttm->sg->nents) 997 goto release_sg; 998 999 /* convert SG to linear array of pages and dma addresses */ 1000 drm_prime_sg_to_page_addr_arrays(ttm->sg, ttm->pages, 1001 gtt->ttm.dma_address, ttm->num_pages); 1002 1003 return 0; 1004 1005 release_sg: 1006 kfree(ttm->sg); 1007 return r; 1008 #endif 1009 } 1010 1011 /** 1012 * amdgpu_ttm_tt_unpin_userptr - Unpin and unmap userptr pages 1013 */ 1014 static void amdgpu_ttm_tt_unpin_userptr(struct ttm_tt *ttm) 1015 { 1016 #ifndef __NetBSD__ /* XXX amdgpu userptr */ 1017 struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev); 1018 struct amdgpu_ttm_tt *gtt = (void *)ttm; 1019 1020 int write = !(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY); 1021 enum dma_data_direction direction = write ? 1022 DMA_BIDIRECTIONAL : DMA_TO_DEVICE; 1023 1024 /* double check that we don't free the table twice */ 1025 if (!ttm->sg->sgl) 1026 return; 1027 1028 /* unmap the pages mapped to the device */ 1029 dma_unmap_sg(adev->dev, ttm->sg->sgl, ttm->sg->nents, direction); 1030 1031 sg_free_table(ttm->sg); 1032 1033 #if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR) 1034 if (gtt->range) { 1035 unsigned long i; 1036 1037 for (i = 0; i < ttm->num_pages; i++) { 1038 if (ttm->pages[i] != 1039 hmm_device_entry_to_page(gtt->range, 1040 gtt->range->pfns[i])) 1041 break; 1042 } 1043 1044 WARN((i == ttm->num_pages), "Missing get_user_page_done\n"); 1045 } 1046 #endif 1047 #endif 1048 } 1049 1050 int amdgpu_ttm_gart_bind(struct amdgpu_device *adev, 1051 struct ttm_buffer_object *tbo, 1052 uint64_t flags) 1053 { 1054 struct amdgpu_bo *abo = ttm_to_amdgpu_bo(tbo); 1055 struct ttm_tt *ttm = tbo->ttm; 1056 struct amdgpu_ttm_tt *gtt = (void *)ttm; 1057 int r; 1058 1059 if (abo->flags & AMDGPU_GEM_CREATE_MQD_GFX9) { 1060 uint64_t page_idx = 1; 1061 1062 r = amdgpu_gart_bind(adev, gtt->offset, page_idx, 1063 ttm->pages, gtt->ttm.dma_address, flags); 1064 if (r) 1065 goto gart_bind_fail; 1066 1067 /* Patch mtype of the second part BO */ 1068 flags &= ~AMDGPU_PTE_MTYPE_VG10_MASK; 1069 flags |= AMDGPU_PTE_MTYPE_VG10(AMDGPU_MTYPE_NC); 1070 1071 r = amdgpu_gart_bind(adev, 1072 gtt->offset + (page_idx << PAGE_SHIFT), 1073 ttm->num_pages - page_idx, 1074 &ttm->pages[page_idx], 1075 &(gtt->ttm.dma_address[page_idx]), flags); 1076 } else { 1077 r = amdgpu_gart_bind(adev, gtt->offset, ttm->num_pages, 1078 ttm->pages, gtt->ttm.dma_address, flags); 1079 } 1080 1081 gart_bind_fail: 1082 if (r) 1083 DRM_ERROR("failed to bind %lu pages at 0x%08"PRIX64"\n", 1084 ttm->num_pages, gtt->offset); 1085 1086 return r; 1087 } 1088 1089 /** 1090 * amdgpu_ttm_backend_bind - Bind GTT memory 1091 * 1092 * Called by ttm_tt_bind() on behalf of ttm_bo_handle_move_mem(). 1093 * This handles binding GTT memory to the device address space. 1094 */ 1095 static int amdgpu_ttm_backend_bind(struct ttm_tt *ttm, 1096 struct ttm_mem_reg *bo_mem) 1097 { 1098 struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev); 1099 struct amdgpu_ttm_tt *gtt = (void*)ttm; 1100 uint64_t flags; 1101 int r = 0; 1102 1103 if (gtt->userptr) { 1104 r = amdgpu_ttm_tt_pin_userptr(ttm); 1105 if (r) { 1106 DRM_ERROR("failed to pin userptr\n"); 1107 return r; 1108 } 1109 } 1110 if (!ttm->num_pages) { 1111 WARN(1, "nothing to bind %lu pages for mreg %p back %p!\n", 1112 ttm->num_pages, bo_mem, ttm); 1113 } 1114 1115 if (bo_mem->mem_type == AMDGPU_PL_GDS || 1116 bo_mem->mem_type == AMDGPU_PL_GWS || 1117 bo_mem->mem_type == AMDGPU_PL_OA) 1118 return -EINVAL; 1119 1120 if (!amdgpu_gtt_mgr_has_gart_addr(bo_mem)) { 1121 gtt->offset = AMDGPU_BO_INVALID_OFFSET; 1122 return 0; 1123 } 1124 1125 /* compute PTE flags relevant to this BO memory */ 1126 flags = amdgpu_ttm_tt_pte_flags(adev, ttm, bo_mem); 1127 1128 /* bind pages into GART page tables */ 1129 gtt->offset = (u64)bo_mem->start << PAGE_SHIFT; 1130 r = amdgpu_gart_bind(adev, gtt->offset, ttm->num_pages, 1131 ttm->pages, gtt->ttm.dma_address, flags); 1132 1133 if (r) 1134 DRM_ERROR("failed to bind %lu pages at 0x%08"PRIX64"\n", 1135 ttm->num_pages, gtt->offset); 1136 return r; 1137 } 1138 1139 /** 1140 * amdgpu_ttm_alloc_gart - Allocate GART memory for buffer object 1141 */ 1142 int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo) 1143 { 1144 struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev); 1145 struct ttm_operation_ctx ctx = { false, false }; 1146 struct amdgpu_ttm_tt *gtt = (void*)bo->ttm; 1147 struct ttm_mem_reg tmp; 1148 struct ttm_placement placement; 1149 struct ttm_place placements; 1150 uint64_t addr, flags; 1151 int r; 1152 1153 if (bo->mem.start != AMDGPU_BO_INVALID_OFFSET) 1154 return 0; 1155 1156 addr = amdgpu_gmc_agp_addr(bo); 1157 if (addr != AMDGPU_BO_INVALID_OFFSET) { 1158 bo->mem.start = addr >> PAGE_SHIFT; 1159 } else { 1160 1161 /* allocate GART space */ 1162 tmp = bo->mem; 1163 tmp.mm_node = NULL; 1164 placement.num_placement = 1; 1165 placement.placement = &placements; 1166 placement.num_busy_placement = 1; 1167 placement.busy_placement = &placements; 1168 placements.fpfn = 0; 1169 placements.lpfn = adev->gmc.gart_size >> PAGE_SHIFT; 1170 placements.flags = (bo->mem.placement & ~TTM_PL_MASK_MEM) | 1171 TTM_PL_FLAG_TT; 1172 1173 r = ttm_bo_mem_space(bo, &placement, &tmp, &ctx); 1174 if (unlikely(r)) 1175 return r; 1176 1177 /* compute PTE flags for this buffer object */ 1178 flags = amdgpu_ttm_tt_pte_flags(adev, bo->ttm, &tmp); 1179 1180 /* Bind pages */ 1181 gtt->offset = (u64)tmp.start << PAGE_SHIFT; 1182 r = amdgpu_ttm_gart_bind(adev, bo, flags); 1183 if (unlikely(r)) { 1184 ttm_bo_mem_put(bo, &tmp); 1185 return r; 1186 } 1187 1188 ttm_bo_mem_put(bo, &bo->mem); 1189 bo->mem = tmp; 1190 } 1191 1192 bo->offset = (bo->mem.start << PAGE_SHIFT) + 1193 bo->bdev->man[bo->mem.mem_type].gpu_offset; 1194 1195 return 0; 1196 } 1197 1198 /** 1199 * amdgpu_ttm_recover_gart - Rebind GTT pages 1200 * 1201 * Called by amdgpu_gtt_mgr_recover() from amdgpu_device_reset() to 1202 * rebind GTT pages during a GPU reset. 1203 */ 1204 int amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo) 1205 { 1206 struct amdgpu_device *adev = amdgpu_ttm_adev(tbo->bdev); 1207 uint64_t flags; 1208 int r; 1209 1210 if (!tbo->ttm) 1211 return 0; 1212 1213 flags = amdgpu_ttm_tt_pte_flags(adev, tbo->ttm, &tbo->mem); 1214 r = amdgpu_ttm_gart_bind(adev, tbo, flags); 1215 1216 return r; 1217 } 1218 1219 /** 1220 * amdgpu_ttm_backend_unbind - Unbind GTT mapped pages 1221 * 1222 * Called by ttm_tt_unbind() on behalf of ttm_bo_move_ttm() and 1223 * ttm_tt_destroy(). 1224 */ 1225 static int amdgpu_ttm_backend_unbind(struct ttm_tt *ttm) 1226 { 1227 struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev); 1228 struct amdgpu_ttm_tt *gtt = (void *)ttm; 1229 int r; 1230 1231 /* if the pages have userptr pinning then clear that first */ 1232 if (gtt->userptr) 1233 amdgpu_ttm_tt_unpin_userptr(ttm); 1234 1235 if (gtt->offset == AMDGPU_BO_INVALID_OFFSET) 1236 return 0; 1237 1238 /* unbind shouldn't be done for GDS/GWS/OA in ttm_bo_clean_mm */ 1239 r = amdgpu_gart_unbind(adev, gtt->offset, ttm->num_pages); 1240 if (r) 1241 DRM_ERROR("failed to unbind %lu pages at 0x%08"PRIX64"\n", 1242 gtt->ttm.ttm.num_pages, gtt->offset); 1243 return r; 1244 } 1245 1246 static void amdgpu_ttm_backend_destroy(struct ttm_tt *ttm) 1247 { 1248 struct amdgpu_ttm_tt *gtt = (void *)ttm; 1249 1250 #ifndef __NetBSD__ /* XXX amdgpu userptr */ 1251 if (gtt->usertask) 1252 put_task_struct(gtt->usertask); 1253 #endif 1254 1255 ttm_dma_tt_fini(>t->ttm); 1256 kfree(gtt); 1257 } 1258 1259 static struct ttm_backend_func amdgpu_backend_func = { 1260 .bind = &amdgpu_ttm_backend_bind, 1261 .unbind = &amdgpu_ttm_backend_unbind, 1262 .destroy = &amdgpu_ttm_backend_destroy, 1263 }; 1264 1265 /** 1266 * amdgpu_ttm_tt_create - Create a ttm_tt object for a given BO 1267 * 1268 * @bo: The buffer object to create a GTT ttm_tt object around 1269 * 1270 * Called by ttm_tt_create(). 1271 */ 1272 static struct ttm_tt *amdgpu_ttm_tt_create(struct ttm_buffer_object *bo, 1273 uint32_t page_flags) 1274 { 1275 struct amdgpu_ttm_tt *gtt; 1276 1277 gtt = kzalloc(sizeof(struct amdgpu_ttm_tt), GFP_KERNEL); 1278 if (gtt == NULL) { 1279 return NULL; 1280 } 1281 gtt->ttm.ttm.func = &amdgpu_backend_func; 1282 gtt->gobj = &bo->base; 1283 1284 /* allocate space for the uninitialized page entries */ 1285 if (ttm_sg_tt_init(>t->ttm, bo, page_flags)) { 1286 kfree(gtt); 1287 return NULL; 1288 } 1289 return >t->ttm.ttm; 1290 } 1291 1292 /** 1293 * amdgpu_ttm_tt_populate - Map GTT pages visible to the device 1294 * 1295 * Map the pages of a ttm_tt object to an address space visible 1296 * to the underlying device. 1297 */ 1298 static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm, 1299 struct ttm_operation_ctx *ctx) 1300 { 1301 #ifndef __NetBSD__ 1302 struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev); 1303 #endif 1304 struct amdgpu_ttm_tt *gtt = (void *)ttm; 1305 1306 /* user pages are bound by amdgpu_ttm_tt_pin_userptr() */ 1307 if (gtt && gtt->userptr) { 1308 #ifdef __NetBSD__ 1309 ttm->sg = NULL; 1310 #else 1311 ttm->sg = kzalloc(sizeof(struct sg_table), GFP_KERNEL); 1312 if (!ttm->sg) 1313 return -ENOMEM; 1314 #endif 1315 1316 ttm->page_flags |= TTM_PAGE_FLAG_SG; 1317 ttm->state = tt_unbound; 1318 return 0; 1319 } 1320 1321 if (ttm->page_flags & TTM_PAGE_FLAG_SG) { 1322 if (!ttm->sg) { 1323 struct dma_buf_attachment *attach; 1324 struct sg_table *sgt; 1325 1326 attach = gtt->gobj->import_attach; 1327 sgt = dma_buf_map_attachment(attach, DMA_BIDIRECTIONAL); 1328 if (IS_ERR(sgt)) 1329 return PTR_ERR(sgt); 1330 1331 ttm->sg = sgt; 1332 } 1333 1334 #ifdef __NetBSD__ 1335 int r = drm_prime_bus_dmamap_load_sgt(ttm->bdev->dmat, 1336 gtt->ttm.dma_address, ttm->sg); 1337 if (r) 1338 return r; 1339 #else 1340 drm_prime_sg_to_page_addr_arrays(ttm->sg, ttm->pages, 1341 gtt->ttm.dma_address, 1342 ttm->num_pages); 1343 #endif 1344 ttm->state = tt_unbound; 1345 return 0; 1346 } 1347 1348 #ifdef __NetBSD__ 1349 /* XXX errno NetBSD->Linux */ 1350 return ttm_bus_dma_populate(>t->ttm); 1351 #else 1352 #ifdef CONFIG_SWIOTLB 1353 if (adev->need_swiotlb && swiotlb_nr_tbl()) { 1354 return ttm_dma_populate(>t->ttm, adev->dev, ctx); 1355 } 1356 #endif 1357 1358 /* fall back to generic helper to populate the page array 1359 * and map them to the device */ 1360 return ttm_populate_and_map_pages(adev->dev, >t->ttm, ctx); 1361 #endif 1362 } 1363 1364 /** 1365 * amdgpu_ttm_tt_unpopulate - unmap GTT pages and unpopulate page arrays 1366 * 1367 * Unmaps pages of a ttm_tt object from the device address space and 1368 * unpopulates the page array backing it. 1369 */ 1370 static void amdgpu_ttm_tt_unpopulate(struct ttm_tt *ttm) 1371 { 1372 struct amdgpu_ttm_tt *gtt = (void *)ttm; 1373 #ifndef __NetBSD__ 1374 struct amdgpu_device *adev; 1375 #endif 1376 1377 if (gtt && gtt->userptr) { 1378 amdgpu_ttm_tt_set_user_pages(ttm, NULL); 1379 kfree(ttm->sg); 1380 ttm->page_flags &= ~TTM_PAGE_FLAG_SG; 1381 return; 1382 } 1383 1384 if (ttm->sg && gtt->gobj->import_attach) { 1385 struct dma_buf_attachment *attach; 1386 1387 attach = gtt->gobj->import_attach; 1388 dma_buf_unmap_attachment(attach, ttm->sg, DMA_BIDIRECTIONAL); 1389 ttm->sg = NULL; 1390 return; 1391 } 1392 1393 if (ttm->page_flags & TTM_PAGE_FLAG_SG) 1394 return; 1395 1396 #ifdef __NetBSD__ 1397 ttm_bus_dma_unpopulate(>t->ttm); 1398 return; 1399 #else 1400 adev = amdgpu_ttm_adev(ttm->bdev); 1401 1402 #ifdef CONFIG_SWIOTLB 1403 if (adev->need_swiotlb && swiotlb_nr_tbl()) { 1404 ttm_dma_unpopulate(>t->ttm, adev->dev); 1405 return; 1406 } 1407 #endif 1408 1409 /* fall back to generic helper to unmap and unpopulate array */ 1410 ttm_unmap_and_unpopulate_pages(adev->dev, >t->ttm); 1411 #endif /* __NetBSD__ */ 1412 } 1413 1414 #ifdef __NetBSD__ 1415 static void amdgpu_ttm_tt_swapout(struct ttm_tt *ttm) 1416 { 1417 struct amdgpu_ttm_tt *gtt = container_of(ttm, struct amdgpu_ttm_tt, 1418 ttm.ttm); 1419 struct ttm_dma_tt *ttm_dma = >t->ttm; 1420 1421 ttm_bus_dma_swapout(ttm_dma); 1422 } 1423 1424 static const struct uvm_pagerops amdgpu_uvm_ops = { 1425 .pgo_reference = &ttm_bo_uvm_reference, 1426 .pgo_detach = &ttm_bo_uvm_detach, 1427 .pgo_fault = &ttm_bo_uvm_fault, 1428 }; 1429 #endif 1430 1431 /** 1432 * amdgpu_ttm_tt_set_userptr - Initialize userptr GTT ttm_tt for the current 1433 * task 1434 * 1435 * @ttm: The ttm_tt object to bind this userptr object to 1436 * @addr: The address in the current tasks VM space to use 1437 * @flags: Requirements of userptr object. 1438 * 1439 * Called by amdgpu_gem_userptr_ioctl() to bind userptr pages 1440 * to current task 1441 */ 1442 int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr, 1443 uint32_t flags) 1444 { 1445 #ifdef __NetBSD__ /* XXX amdgpu userptr */ 1446 return -ENODEV; 1447 #else 1448 struct amdgpu_ttm_tt *gtt = (void *)ttm; 1449 1450 if (gtt == NULL) 1451 return -EINVAL; 1452 1453 gtt->userptr = addr; 1454 gtt->userflags = flags; 1455 1456 if (gtt->usertask) 1457 put_task_struct(gtt->usertask); 1458 gtt->usertask = current->group_leader; 1459 get_task_struct(gtt->usertask); 1460 1461 return 0; 1462 #endif 1463 } 1464 1465 /** 1466 * amdgpu_ttm_tt_get_usermm - Return memory manager for ttm_tt object 1467 */ 1468 #ifdef __NetBSD__ 1469 struct vmspace *amdgpu_ttm_tt_get_usermm(struct ttm_tt *ttm) 1470 #else 1471 struct mm_struct *amdgpu_ttm_tt_get_usermm(struct ttm_tt *ttm) 1472 #endif 1473 { 1474 struct amdgpu_ttm_tt *gtt = (void *)ttm; 1475 1476 if (gtt == NULL) 1477 return NULL; 1478 1479 if (gtt->usertask == NULL) 1480 return NULL; 1481 1482 #ifdef __NetBSD__ 1483 return gtt->usertask->p_vmspace; 1484 #else 1485 return gtt->usertask->mm; 1486 #endif 1487 } 1488 1489 /** 1490 * amdgpu_ttm_tt_affect_userptr - Determine if a ttm_tt object lays inside an 1491 * address range for the current task. 1492 * 1493 */ 1494 bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start, 1495 unsigned long end) 1496 { 1497 struct amdgpu_ttm_tt *gtt = (void *)ttm; 1498 unsigned long size; 1499 1500 if (gtt == NULL || !gtt->userptr) 1501 return false; 1502 1503 /* Return false if no part of the ttm_tt object lies within 1504 * the range 1505 */ 1506 size = (unsigned long)gtt->ttm.ttm.num_pages * PAGE_SIZE; 1507 if (gtt->userptr > end || gtt->userptr + size <= start) 1508 return false; 1509 1510 return true; 1511 } 1512 1513 /** 1514 * amdgpu_ttm_tt_is_userptr - Have the pages backing by userptr? 1515 */ 1516 bool amdgpu_ttm_tt_is_userptr(struct ttm_tt *ttm) 1517 { 1518 struct amdgpu_ttm_tt *gtt = (void *)ttm; 1519 1520 if (gtt == NULL || !gtt->userptr) 1521 return false; 1522 1523 return true; 1524 } 1525 1526 /** 1527 * amdgpu_ttm_tt_is_readonly - Is the ttm_tt object read only? 1528 */ 1529 bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm) 1530 { 1531 struct amdgpu_ttm_tt *gtt = (void *)ttm; 1532 1533 if (gtt == NULL) 1534 return false; 1535 1536 return !!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY); 1537 } 1538 1539 /** 1540 * amdgpu_ttm_tt_pde_flags - Compute PDE flags for ttm_tt object 1541 * 1542 * @ttm: The ttm_tt object to compute the flags for 1543 * @mem: The memory registry backing this ttm_tt object 1544 * 1545 * Figure out the flags to use for a VM PDE (Page Directory Entry). 1546 */ 1547 uint64_t amdgpu_ttm_tt_pde_flags(struct ttm_tt *ttm, struct ttm_mem_reg *mem) 1548 { 1549 uint64_t flags = 0; 1550 1551 if (mem && mem->mem_type != TTM_PL_SYSTEM) 1552 flags |= AMDGPU_PTE_VALID; 1553 1554 if (mem && mem->mem_type == TTM_PL_TT) { 1555 flags |= AMDGPU_PTE_SYSTEM; 1556 1557 if (ttm->caching_state == tt_cached) 1558 flags |= AMDGPU_PTE_SNOOPED; 1559 } 1560 1561 return flags; 1562 } 1563 1564 /** 1565 * amdgpu_ttm_tt_pte_flags - Compute PTE flags for ttm_tt object 1566 * 1567 * @ttm: The ttm_tt object to compute the flags for 1568 * @mem: The memory registry backing this ttm_tt object 1569 1570 * Figure out the flags to use for a VM PTE (Page Table Entry). 1571 */ 1572 uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm, 1573 struct ttm_mem_reg *mem) 1574 { 1575 uint64_t flags = amdgpu_ttm_tt_pde_flags(ttm, mem); 1576 1577 flags |= adev->gart.gart_pte_flags; 1578 flags |= AMDGPU_PTE_READABLE; 1579 1580 if (!amdgpu_ttm_tt_is_readonly(ttm)) 1581 flags |= AMDGPU_PTE_WRITEABLE; 1582 1583 return flags; 1584 } 1585 1586 /** 1587 * amdgpu_ttm_bo_eviction_valuable - Check to see if we can evict a buffer 1588 * object. 1589 * 1590 * Return true if eviction is sensible. Called by ttm_mem_evict_first() on 1591 * behalf of ttm_bo_mem_force_space() which tries to evict buffer objects until 1592 * it can find space for a new object and by ttm_bo_force_list_clean() which is 1593 * used to clean out a memory space. 1594 */ 1595 static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo, 1596 const struct ttm_place *place) 1597 { 1598 unsigned long num_pages = bo->mem.num_pages; 1599 struct drm_mm_node *node = bo->mem.mm_node; 1600 struct dma_resv_list *flist; 1601 struct dma_fence *f; 1602 int i; 1603 1604 if (bo->type == ttm_bo_type_kernel && 1605 !amdgpu_vm_evictable(ttm_to_amdgpu_bo(bo))) 1606 return false; 1607 1608 /* If bo is a KFD BO, check if the bo belongs to the current process. 1609 * If true, then return false as any KFD process needs all its BOs to 1610 * be resident to run successfully 1611 */ 1612 #ifdef __NetBSD__ /* XXX amdgpu kfd */ 1613 __USE(flist); 1614 __USE(f); 1615 __USE(i); 1616 #else 1617 flist = dma_resv_get_list(bo->base.resv); 1618 if (flist) { 1619 for (i = 0; i < flist->shared_count; ++i) { 1620 f = rcu_dereference_protected(flist->shared[i], 1621 dma_resv_held(bo->base.resv)); 1622 if (amdkfd_fence_check_mm(f, current->mm)) 1623 return false; 1624 } 1625 } 1626 #endif 1627 1628 switch (bo->mem.mem_type) { 1629 case TTM_PL_TT: 1630 return true; 1631 1632 case TTM_PL_VRAM: 1633 /* Check each drm MM node individually */ 1634 while (num_pages) { 1635 if (place->fpfn < (node->start + node->size) && 1636 !(place->lpfn && place->lpfn <= node->start)) 1637 return true; 1638 1639 num_pages -= node->size; 1640 ++node; 1641 } 1642 return false; 1643 1644 default: 1645 break; 1646 } 1647 1648 return ttm_bo_eviction_valuable(bo, place); 1649 } 1650 1651 /** 1652 * amdgpu_ttm_access_memory - Read or Write memory that backs a buffer object. 1653 * 1654 * @bo: The buffer object to read/write 1655 * @offset: Offset into buffer object 1656 * @buf: Secondary buffer to write/read from 1657 * @len: Length in bytes of access 1658 * @write: true if writing 1659 * 1660 * This is used to access VRAM that backs a buffer object via MMIO 1661 * access for debugging purposes. 1662 */ 1663 static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo, 1664 unsigned long offset, 1665 void *buf, int len, int write) 1666 { 1667 struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo); 1668 struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev); 1669 struct drm_mm_node *nodes; 1670 uint32_t value = 0; 1671 int ret = 0; 1672 uint64_t pos; 1673 unsigned long flags; 1674 1675 if (bo->mem.mem_type != TTM_PL_VRAM) 1676 return -EIO; 1677 1678 nodes = amdgpu_find_mm_node(&abo->tbo.mem, &offset); 1679 pos = (nodes->start << PAGE_SHIFT) + offset; 1680 1681 while (len && pos < adev->gmc.mc_vram_size) { 1682 uint64_t aligned_pos = pos & ~(uint64_t)3; 1683 uint32_t bytes = 4 - (pos & 3); 1684 uint32_t shift = (pos & 3) * 8; 1685 uint32_t mask = 0xffffffff << shift; 1686 1687 if (len < bytes) { 1688 mask &= 0xffffffff >> (bytes - len) * 8; 1689 bytes = len; 1690 } 1691 1692 spin_lock_irqsave(&adev->mmio_idx_lock, flags); 1693 WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)aligned_pos) | 0x80000000); 1694 WREG32_NO_KIQ(mmMM_INDEX_HI, aligned_pos >> 31); 1695 if (!write || mask != 0xffffffff) 1696 value = RREG32_NO_KIQ(mmMM_DATA); 1697 if (write) { 1698 value &= ~mask; 1699 value |= (*(uint32_t *)buf << shift) & mask; 1700 WREG32_NO_KIQ(mmMM_DATA, value); 1701 } 1702 spin_unlock_irqrestore(&adev->mmio_idx_lock, flags); 1703 if (!write) { 1704 value = (value & mask) >> shift; 1705 memcpy(buf, &value, bytes); 1706 } 1707 1708 ret += bytes; 1709 buf = (uint8_t *)buf + bytes; 1710 pos += bytes; 1711 len -= bytes; 1712 if (pos >= (nodes->start + nodes->size) << PAGE_SHIFT) { 1713 ++nodes; 1714 pos = (nodes->start << PAGE_SHIFT); 1715 } 1716 } 1717 1718 return ret; 1719 } 1720 1721 static struct ttm_bo_driver amdgpu_bo_driver = { 1722 .ttm_tt_create = &amdgpu_ttm_tt_create, 1723 .ttm_tt_populate = &amdgpu_ttm_tt_populate, 1724 .ttm_tt_unpopulate = &amdgpu_ttm_tt_unpopulate, 1725 #ifdef __NetBSD__ 1726 .ttm_tt_swapout = &amdgpu_ttm_tt_swapout, 1727 .ttm_uvm_ops = &amdgpu_uvm_ops, 1728 #endif 1729 .invalidate_caches = &amdgpu_invalidate_caches, 1730 .init_mem_type = &amdgpu_init_mem_type, 1731 .eviction_valuable = amdgpu_ttm_bo_eviction_valuable, 1732 .evict_flags = &amdgpu_evict_flags, 1733 .move = &amdgpu_bo_move, 1734 .verify_access = &amdgpu_verify_access, 1735 .move_notify = &amdgpu_bo_move_notify, 1736 .release_notify = &amdgpu_bo_release_notify, 1737 .fault_reserve_notify = &amdgpu_bo_fault_reserve_notify, 1738 .io_mem_reserve = &amdgpu_ttm_io_mem_reserve, 1739 .io_mem_free = &amdgpu_ttm_io_mem_free, 1740 .io_mem_pfn = amdgpu_ttm_io_mem_pfn, 1741 .access_memory = &amdgpu_ttm_access_memory, 1742 .del_from_lru_notify = &amdgpu_vm_del_from_lru_notify 1743 }; 1744 1745 /* 1746 * Firmware Reservation functions 1747 */ 1748 /** 1749 * amdgpu_ttm_fw_reserve_vram_fini - free fw reserved vram 1750 * 1751 * @adev: amdgpu_device pointer 1752 * 1753 * free fw reserved vram if it has been reserved. 1754 */ 1755 static void amdgpu_ttm_fw_reserve_vram_fini(struct amdgpu_device *adev) 1756 { 1757 amdgpu_bo_free_kernel(&adev->fw_vram_usage.reserved_bo, 1758 NULL, &adev->fw_vram_usage.va); 1759 } 1760 1761 /** 1762 * amdgpu_ttm_fw_reserve_vram_init - create bo vram reservation from fw 1763 * 1764 * @adev: amdgpu_device pointer 1765 * 1766 * create bo vram reservation from fw. 1767 */ 1768 static int amdgpu_ttm_fw_reserve_vram_init(struct amdgpu_device *adev) 1769 { 1770 uint64_t vram_size = adev->gmc.visible_vram_size; 1771 1772 adev->fw_vram_usage.va = NULL; 1773 adev->fw_vram_usage.reserved_bo = NULL; 1774 1775 if (adev->fw_vram_usage.size == 0 || 1776 adev->fw_vram_usage.size > vram_size) 1777 return 0; 1778 1779 return amdgpu_bo_create_kernel_at(adev, 1780 adev->fw_vram_usage.start_offset, 1781 adev->fw_vram_usage.size, 1782 AMDGPU_GEM_DOMAIN_VRAM, 1783 &adev->fw_vram_usage.reserved_bo, 1784 &adev->fw_vram_usage.va); 1785 } 1786 1787 /* 1788 * Memoy training reservation functions 1789 */ 1790 1791 /** 1792 * amdgpu_ttm_training_reserve_vram_fini - free memory training reserved vram 1793 * 1794 * @adev: amdgpu_device pointer 1795 * 1796 * free memory training reserved vram if it has been reserved. 1797 */ 1798 static int amdgpu_ttm_training_reserve_vram_fini(struct amdgpu_device *adev) 1799 { 1800 struct psp_memory_training_context *ctx = &adev->psp.mem_train_ctx; 1801 1802 ctx->init = PSP_MEM_TRAIN_NOT_SUPPORT; 1803 amdgpu_bo_free_kernel(&ctx->c2p_bo, NULL, NULL); 1804 ctx->c2p_bo = NULL; 1805 1806 return 0; 1807 } 1808 1809 static u64 amdgpu_ttm_training_get_c2p_offset(u64 vram_size) 1810 { 1811 if ((vram_size & (SZ_1M - 1)) < (SZ_4K + 1) ) 1812 vram_size -= SZ_1M; 1813 1814 return ALIGN(vram_size, SZ_1M); 1815 } 1816 1817 /** 1818 * amdgpu_ttm_training_reserve_vram_init - create bo vram reservation from memory training 1819 * 1820 * @adev: amdgpu_device pointer 1821 * 1822 * create bo vram reservation from memory training. 1823 */ 1824 static int amdgpu_ttm_training_reserve_vram_init(struct amdgpu_device *adev) 1825 { 1826 int ret; 1827 struct psp_memory_training_context *ctx = &adev->psp.mem_train_ctx; 1828 1829 memset(ctx, 0, sizeof(*ctx)); 1830 if (!adev->fw_vram_usage.mem_train_support) { 1831 DRM_DEBUG("memory training does not support!\n"); 1832 return 0; 1833 } 1834 1835 ctx->c2p_train_data_offset = amdgpu_ttm_training_get_c2p_offset(adev->gmc.mc_vram_size); 1836 ctx->p2c_train_data_offset = (adev->gmc.mc_vram_size - GDDR6_MEM_TRAINING_OFFSET); 1837 ctx->train_data_size = GDDR6_MEM_TRAINING_DATA_SIZE_IN_BYTES; 1838 1839 DRM_DEBUG("train_data_size:%"PRIx64",p2c_train_data_offset:%"PRIx64",c2p_train_data_offset:%"PRIx64".\n", 1840 ctx->train_data_size, 1841 ctx->p2c_train_data_offset, 1842 ctx->c2p_train_data_offset); 1843 1844 ret = amdgpu_bo_create_kernel_at(adev, 1845 ctx->c2p_train_data_offset, 1846 ctx->train_data_size, 1847 AMDGPU_GEM_DOMAIN_VRAM, 1848 &ctx->c2p_bo, 1849 NULL); 1850 if (ret) { 1851 DRM_ERROR("alloc c2p_bo failed(%d)!\n", ret); 1852 amdgpu_ttm_training_reserve_vram_fini(adev); 1853 return ret; 1854 } 1855 1856 ctx->init = PSP_MEM_TRAIN_RESERVE_SUCCESS; 1857 return 0; 1858 } 1859 1860 /** 1861 * amdgpu_ttm_init - Init the memory management (ttm) as well as various 1862 * gtt/vram related fields. 1863 * 1864 * This initializes all of the memory space pools that the TTM layer 1865 * will need such as the GTT space (system memory mapped to the device), 1866 * VRAM (on-board memory), and on-chip memories (GDS, GWS, OA) which 1867 * can be mapped per VMID. 1868 */ 1869 int amdgpu_ttm_init(struct amdgpu_device *adev) 1870 { 1871 uint64_t gtt_size; 1872 int r; 1873 u64 vis_vram_limit; 1874 void *stolen_vga_buf; 1875 1876 mutex_init(&adev->mman.gtt_window_lock); 1877 1878 /* No others user of address space so set it to 0 */ 1879 r = ttm_bo_device_init(&adev->mman.bdev, 1880 &amdgpu_bo_driver, 1881 #ifdef __NetBSD__ 1882 adev->ddev->bst, 1883 adev->ddev->dmat, 1884 #else 1885 adev->ddev->anon_inode->i_mapping, 1886 #endif 1887 adev->ddev->vma_offset_manager, 1888 dma_addressing_limited(adev->dev)); 1889 if (r) { 1890 DRM_ERROR("failed initializing buffer object driver(%d).\n", r); 1891 return r; 1892 } 1893 adev->mman.initialized = true; 1894 1895 /* We opt to avoid OOM on system pages allocations */ 1896 adev->mman.bdev.no_retry = true; 1897 1898 /* Initialize VRAM pool with all of VRAM divided into pages */ 1899 r = ttm_bo_init_mm(&adev->mman.bdev, TTM_PL_VRAM, 1900 adev->gmc.real_vram_size >> PAGE_SHIFT); 1901 if (r) { 1902 DRM_ERROR("Failed initializing VRAM heap.\n"); 1903 return r; 1904 } 1905 1906 /* Reduce size of CPU-visible VRAM if requested */ 1907 vis_vram_limit = (u64)amdgpu_vis_vram_limit * 1024 * 1024; 1908 if (amdgpu_vis_vram_limit > 0 && 1909 vis_vram_limit <= adev->gmc.visible_vram_size) 1910 adev->gmc.visible_vram_size = vis_vram_limit; 1911 1912 /* Change the size here instead of the init above so only lpfn is affected */ 1913 amdgpu_ttm_set_buffer_funcs_status(adev, false); 1914 #ifdef __NetBSD__ 1915 #ifdef _LP64 1916 if (bus_space_map(adev->gmc.aper_tag, adev->gmc.aper_base, 1917 adev->gmc.visible_vram_size, 1918 BUS_SPACE_MAP_LINEAR|BUS_SPACE_MAP_PREFETCHABLE, 1919 &adev->mman.aper_base_handle)) { 1920 return -EIO; 1921 } 1922 adev->mman.aper_base_kaddr = bus_space_vaddr(adev->gmc.aper_tag, 1923 adev->mman.aper_base_handle); 1924 KASSERT(adev->mman.aper_base_kaddr != NULL); 1925 #endif /* _LP64 */ 1926 #else /* __NetBSD__ */ 1927 #ifdef CONFIG_64BIT 1928 adev->mman.aper_base_kaddr = ioremap_wc(adev->gmc.aper_base, 1929 adev->gmc.visible_vram_size); 1930 #endif 1931 #endif 1932 1933 /* 1934 *The reserved vram for firmware must be pinned to the specified 1935 *place on the VRAM, so reserve it early. 1936 */ 1937 r = amdgpu_ttm_fw_reserve_vram_init(adev); 1938 if (r) { 1939 return r; 1940 } 1941 1942 /* 1943 *The reserved vram for memory training must be pinned to the specified 1944 *place on the VRAM, so reserve it early. 1945 */ 1946 r = amdgpu_ttm_training_reserve_vram_init(adev); 1947 if (r) 1948 return r; 1949 1950 /* allocate memory as required for VGA 1951 * This is used for VGA emulation and pre-OS scanout buffers to 1952 * avoid display artifacts while transitioning between pre-OS 1953 * and driver. */ 1954 r = amdgpu_bo_create_kernel(adev, adev->gmc.stolen_size, PAGE_SIZE, 1955 AMDGPU_GEM_DOMAIN_VRAM, 1956 &adev->stolen_vga_memory, 1957 NULL, &stolen_vga_buf); 1958 if (r) 1959 return r; 1960 1961 /* 1962 * reserve one TMR (64K) memory at the top of VRAM which holds 1963 * IP Discovery data and is protected by PSP. 1964 */ 1965 r = amdgpu_bo_create_kernel_at(adev, 1966 adev->gmc.real_vram_size - DISCOVERY_TMR_SIZE, 1967 DISCOVERY_TMR_SIZE, 1968 AMDGPU_GEM_DOMAIN_VRAM, 1969 &adev->discovery_memory, 1970 NULL); 1971 if (r) 1972 return r; 1973 1974 DRM_INFO("amdgpu: %uM of VRAM memory ready\n", 1975 (unsigned) (adev->gmc.real_vram_size / (1024 * 1024))); 1976 1977 /* Compute GTT size, either bsaed on 3/4th the size of RAM size 1978 * or whatever the user passed on module init */ 1979 if (amdgpu_gtt_size == -1) { 1980 struct sysinfo si; 1981 1982 si_meminfo(&si); 1983 gtt_size = min(max((AMDGPU_DEFAULT_GTT_SIZE_MB << 20), 1984 adev->gmc.mc_vram_size), 1985 ((uint64_t)si.totalram * si.mem_unit * 3/4)); 1986 } 1987 else 1988 gtt_size = (uint64_t)amdgpu_gtt_size << 20; 1989 1990 /* Initialize GTT memory pool */ 1991 r = ttm_bo_init_mm(&adev->mman.bdev, TTM_PL_TT, gtt_size >> PAGE_SHIFT); 1992 if (r) { 1993 DRM_ERROR("Failed initializing GTT heap.\n"); 1994 return r; 1995 } 1996 DRM_INFO("amdgpu: %uM of GTT memory ready.\n", 1997 (unsigned)(gtt_size / (1024 * 1024))); 1998 1999 /* Initialize various on-chip memory pools */ 2000 r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_GDS, 2001 adev->gds.gds_size); 2002 if (r) { 2003 DRM_ERROR("Failed initializing GDS heap.\n"); 2004 return r; 2005 } 2006 2007 r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_GWS, 2008 adev->gds.gws_size); 2009 if (r) { 2010 DRM_ERROR("Failed initializing gws heap.\n"); 2011 return r; 2012 } 2013 2014 r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_OA, 2015 adev->gds.oa_size); 2016 if (r) { 2017 DRM_ERROR("Failed initializing oa heap.\n"); 2018 return r; 2019 } 2020 2021 /* Register debugfs entries for amdgpu_ttm */ 2022 r = amdgpu_ttm_debugfs_init(adev); 2023 if (r) { 2024 DRM_ERROR("Failed to init debugfs\n"); 2025 return r; 2026 } 2027 return 0; 2028 } 2029 2030 /** 2031 * amdgpu_ttm_late_init - Handle any late initialization for amdgpu_ttm 2032 */ 2033 void amdgpu_ttm_late_init(struct amdgpu_device *adev) 2034 { 2035 void *stolen_vga_buf; 2036 /* return the VGA stolen memory (if any) back to VRAM */ 2037 amdgpu_bo_free_kernel(&adev->stolen_vga_memory, NULL, &stolen_vga_buf); 2038 } 2039 2040 /** 2041 * amdgpu_ttm_fini - De-initialize the TTM memory pools 2042 */ 2043 void amdgpu_ttm_fini(struct amdgpu_device *adev) 2044 { 2045 if (!adev->mman.initialized) 2046 return; 2047 2048 amdgpu_ttm_debugfs_fini(adev); 2049 amdgpu_ttm_training_reserve_vram_fini(adev); 2050 /* return the IP Discovery TMR memory back to VRAM */ 2051 amdgpu_bo_free_kernel(&adev->discovery_memory, NULL, NULL); 2052 amdgpu_ttm_fw_reserve_vram_fini(adev); 2053 2054 #ifdef __NetBSD__ 2055 if (adev->mman.aper_base_handle) { 2056 bus_space_unmap(adev->gmc.aper_tag, 2057 adev->mman.aper_base_handle, adev->gmc.visible_vram_size); 2058 } 2059 #else 2060 if (adev->mman.aper_base_kaddr) 2061 iounmap(adev->mman.aper_base_kaddr); 2062 #endif 2063 adev->mman.aper_base_kaddr = NULL; 2064 2065 ttm_bo_clean_mm(&adev->mman.bdev, TTM_PL_VRAM); 2066 ttm_bo_clean_mm(&adev->mman.bdev, TTM_PL_TT); 2067 ttm_bo_clean_mm(&adev->mman.bdev, AMDGPU_PL_GDS); 2068 ttm_bo_clean_mm(&adev->mman.bdev, AMDGPU_PL_GWS); 2069 ttm_bo_clean_mm(&adev->mman.bdev, AMDGPU_PL_OA); 2070 ttm_bo_device_release(&adev->mman.bdev); 2071 adev->mman.initialized = false; 2072 mutex_destroy(&adev->mman.gtt_window_lock); 2073 DRM_INFO("amdgpu: ttm finalized\n"); 2074 } 2075 2076 /** 2077 * amdgpu_ttm_set_buffer_funcs_status - enable/disable use of buffer functions 2078 * 2079 * @adev: amdgpu_device pointer 2080 * @enable: true when we can use buffer functions. 2081 * 2082 * Enable/disable use of buffer functions during suspend/resume. This should 2083 * only be called at bootup or when userspace isn't running. 2084 */ 2085 void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable) 2086 { 2087 struct ttm_mem_type_manager *man = &adev->mman.bdev.man[TTM_PL_VRAM]; 2088 uint64_t size; 2089 int r; 2090 2091 if (!adev->mman.initialized || adev->in_gpu_reset || 2092 adev->mman.buffer_funcs_enabled == enable) 2093 return; 2094 2095 if (enable) { 2096 struct amdgpu_ring *ring; 2097 struct drm_gpu_scheduler *sched; 2098 2099 ring = adev->mman.buffer_funcs_ring; 2100 sched = &ring->sched; 2101 r = drm_sched_entity_init(&adev->mman.entity, 2102 DRM_SCHED_PRIORITY_KERNEL, &sched, 2103 1, NULL); 2104 if (r) { 2105 DRM_ERROR("Failed setting up TTM BO move entity (%d)\n", 2106 r); 2107 return; 2108 } 2109 } else { 2110 drm_sched_entity_destroy(&adev->mman.entity); 2111 dma_fence_put(man->move); 2112 man->move = NULL; 2113 } 2114 2115 /* this just adjusts TTM size idea, which sets lpfn to the correct value */ 2116 if (enable) 2117 size = adev->gmc.real_vram_size; 2118 else 2119 size = adev->gmc.visible_vram_size; 2120 man->size = size >> PAGE_SHIFT; 2121 adev->mman.buffer_funcs_enabled = enable; 2122 } 2123 2124 #ifdef __NetBSD__ 2125 2126 int 2127 amdgpu_mmap_object(struct drm_device *dev, off_t offset, size_t size, 2128 vm_prot_t prot, struct uvm_object **uobjp, voff_t *uoffsetp, 2129 struct file *file) 2130 { 2131 struct amdgpu_device *adev = dev->dev_private; 2132 2133 KASSERT(0 == (offset & (PAGE_SIZE - 1))); 2134 2135 if (__predict_false(adev == NULL)) /* XXX How?? */ 2136 return -EINVAL; 2137 2138 return ttm_bo_mmap_object(&adev->mman.bdev, offset, size, prot, 2139 uobjp, uoffsetp, file); 2140 } 2141 2142 #else /* __NetBSD__ */ 2143 2144 int amdgpu_mmap(struct file *filp, struct vm_area_struct *vma) 2145 { 2146 struct drm_file *file_priv = filp->private_data; 2147 struct amdgpu_device *adev = file_priv->minor->dev->dev_private; 2148 2149 if (adev == NULL) 2150 return -EINVAL; 2151 2152 return ttm_bo_mmap(filp, vma, &adev->mman.bdev); 2153 } 2154 2155 #endif /* __NetBSD__ */ 2156 static int amdgpu_map_buffer(struct ttm_buffer_object *bo, 2157 struct ttm_mem_reg *mem, unsigned num_pages, 2158 uint64_t offset, unsigned window, 2159 struct amdgpu_ring *ring, 2160 uint64_t *addr) 2161 { 2162 struct amdgpu_ttm_tt *gtt = (void *)bo->ttm; 2163 struct amdgpu_device *adev = ring->adev; 2164 struct ttm_tt *ttm = bo->ttm; 2165 struct amdgpu_job *job; 2166 unsigned num_dw, num_bytes; 2167 dma_addr_t *dma_address; 2168 struct dma_fence *fence; 2169 uint64_t src_addr, dst_addr; 2170 uint64_t flags; 2171 int r; 2172 2173 BUG_ON(adev->mman.buffer_funcs->copy_max_bytes < 2174 AMDGPU_GTT_MAX_TRANSFER_SIZE * 8); 2175 2176 *addr = adev->gmc.gart_start; 2177 *addr += (u64)window * AMDGPU_GTT_MAX_TRANSFER_SIZE * 2178 AMDGPU_GPU_PAGE_SIZE; 2179 2180 num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8); 2181 num_bytes = num_pages * 8; 2182 2183 r = amdgpu_job_alloc_with_ib(adev, num_dw * 4 + num_bytes, &job); 2184 if (r) 2185 return r; 2186 2187 src_addr = num_dw * 4; 2188 src_addr += job->ibs[0].gpu_addr; 2189 2190 dst_addr = amdgpu_bo_gpu_offset(adev->gart.bo); 2191 dst_addr += window * AMDGPU_GTT_MAX_TRANSFER_SIZE * 8; 2192 amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr, 2193 dst_addr, num_bytes); 2194 2195 amdgpu_ring_pad_ib(ring, &job->ibs[0]); 2196 WARN_ON(job->ibs[0].length_dw > num_dw); 2197 2198 #ifdef __NetBSD__ 2199 __USE(dma_address); 2200 flags = amdgpu_ttm_tt_pte_flags(adev, ttm, mem); 2201 r = amdgpu_gart_map(adev, 0, num_pages, offset, gtt->ttm.dma_address, 2202 flags, &job->ibs[0].ptr[num_dw]); 2203 #else 2204 dma_address = >t->ttm.dma_address[offset >> PAGE_SHIFT]; 2205 flags = amdgpu_ttm_tt_pte_flags(adev, ttm, mem); 2206 r = amdgpu_gart_map(adev, 0, num_pages, dma_address, flags, 2207 &job->ibs[0].ptr[num_dw]); 2208 #endif 2209 if (r) 2210 goto error_free; 2211 2212 r = amdgpu_job_submit(job, &adev->mman.entity, 2213 AMDGPU_FENCE_OWNER_UNDEFINED, &fence); 2214 if (r) 2215 goto error_free; 2216 2217 dma_fence_put(fence); 2218 2219 return r; 2220 2221 error_free: 2222 amdgpu_job_free(job); 2223 return r; 2224 } 2225 2226 int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, 2227 uint64_t dst_offset, uint32_t byte_count, 2228 struct dma_resv *resv, 2229 struct dma_fence **fence, bool direct_submit, 2230 bool vm_needs_flush) 2231 { 2232 struct amdgpu_device *adev = ring->adev; 2233 struct amdgpu_job *job; 2234 2235 uint32_t max_bytes; 2236 unsigned num_loops, num_dw; 2237 unsigned i; 2238 int r; 2239 2240 if (direct_submit && !ring->sched.ready) { 2241 DRM_ERROR("Trying to move memory with ring turned off.\n"); 2242 return -EINVAL; 2243 } 2244 2245 max_bytes = adev->mman.buffer_funcs->copy_max_bytes; 2246 num_loops = DIV_ROUND_UP(byte_count, max_bytes); 2247 num_dw = ALIGN(num_loops * adev->mman.buffer_funcs->copy_num_dw, 8); 2248 2249 r = amdgpu_job_alloc_with_ib(adev, num_dw * 4, &job); 2250 if (r) 2251 return r; 2252 2253 if (vm_needs_flush) { 2254 job->vm_pd_addr = amdgpu_gmc_pd_addr(adev->gart.bo); 2255 job->vm_needs_flush = true; 2256 } 2257 if (resv) { 2258 r = amdgpu_sync_resv(adev, &job->sync, resv, 2259 AMDGPU_FENCE_OWNER_UNDEFINED, 2260 false); 2261 if (r) { 2262 DRM_ERROR("sync failed (%d).\n", r); 2263 goto error_free; 2264 } 2265 } 2266 2267 for (i = 0; i < num_loops; i++) { 2268 uint32_t cur_size_in_bytes = min(byte_count, max_bytes); 2269 2270 amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_offset, 2271 dst_offset, cur_size_in_bytes); 2272 2273 src_offset += cur_size_in_bytes; 2274 dst_offset += cur_size_in_bytes; 2275 byte_count -= cur_size_in_bytes; 2276 } 2277 2278 amdgpu_ring_pad_ib(ring, &job->ibs[0]); 2279 WARN_ON(job->ibs[0].length_dw > num_dw); 2280 if (direct_submit) 2281 r = amdgpu_job_submit_direct(job, ring, fence); 2282 else 2283 r = amdgpu_job_submit(job, &adev->mman.entity, 2284 AMDGPU_FENCE_OWNER_UNDEFINED, fence); 2285 if (r) 2286 goto error_free; 2287 2288 return r; 2289 2290 error_free: 2291 amdgpu_job_free(job); 2292 DRM_ERROR("Error scheduling IBs (%d)\n", r); 2293 return r; 2294 } 2295 2296 int amdgpu_fill_buffer(struct amdgpu_bo *bo, 2297 uint32_t src_data, 2298 struct dma_resv *resv, 2299 struct dma_fence **fence) 2300 { 2301 struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); 2302 uint32_t max_bytes = adev->mman.buffer_funcs->fill_max_bytes; 2303 struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; 2304 2305 struct drm_mm_node *mm_node; 2306 unsigned long num_pages; 2307 unsigned int num_loops, num_dw; 2308 2309 struct amdgpu_job *job; 2310 int r; 2311 2312 if (!adev->mman.buffer_funcs_enabled) { 2313 DRM_ERROR("Trying to clear memory with ring turned off.\n"); 2314 return -EINVAL; 2315 } 2316 2317 if (bo->tbo.mem.mem_type == TTM_PL_TT) { 2318 r = amdgpu_ttm_alloc_gart(&bo->tbo); 2319 if (r) 2320 return r; 2321 } 2322 2323 num_pages = bo->tbo.num_pages; 2324 mm_node = bo->tbo.mem.mm_node; 2325 num_loops = 0; 2326 while (num_pages) { 2327 uint64_t byte_count = mm_node->size << PAGE_SHIFT; 2328 2329 num_loops += DIV_ROUND_UP_ULL(byte_count, max_bytes); 2330 num_pages -= mm_node->size; 2331 ++mm_node; 2332 } 2333 num_dw = num_loops * adev->mman.buffer_funcs->fill_num_dw; 2334 2335 /* for IB padding */ 2336 num_dw += 64; 2337 2338 r = amdgpu_job_alloc_with_ib(adev, num_dw * 4, &job); 2339 if (r) 2340 return r; 2341 2342 if (resv) { 2343 r = amdgpu_sync_resv(adev, &job->sync, resv, 2344 AMDGPU_FENCE_OWNER_UNDEFINED, false); 2345 if (r) { 2346 DRM_ERROR("sync failed (%d).\n", r); 2347 goto error_free; 2348 } 2349 } 2350 2351 num_pages = bo->tbo.num_pages; 2352 mm_node = bo->tbo.mem.mm_node; 2353 2354 while (num_pages) { 2355 uint64_t byte_count = mm_node->size << PAGE_SHIFT; 2356 uint64_t dst_addr; 2357 2358 dst_addr = amdgpu_mm_node_addr(&bo->tbo, mm_node, &bo->tbo.mem); 2359 while (byte_count) { 2360 uint32_t cur_size_in_bytes = min_t(uint64_t, byte_count, 2361 max_bytes); 2362 2363 amdgpu_emit_fill_buffer(adev, &job->ibs[0], src_data, 2364 dst_addr, cur_size_in_bytes); 2365 2366 dst_addr += cur_size_in_bytes; 2367 byte_count -= cur_size_in_bytes; 2368 } 2369 2370 num_pages -= mm_node->size; 2371 ++mm_node; 2372 } 2373 2374 amdgpu_ring_pad_ib(ring, &job->ibs[0]); 2375 WARN_ON(job->ibs[0].length_dw > num_dw); 2376 r = amdgpu_job_submit(job, &adev->mman.entity, 2377 AMDGPU_FENCE_OWNER_UNDEFINED, fence); 2378 if (r) 2379 goto error_free; 2380 2381 return 0; 2382 2383 error_free: 2384 amdgpu_job_free(job); 2385 return r; 2386 } 2387 2388 #if defined(CONFIG_DEBUG_FS) 2389 2390 static int amdgpu_mm_dump_table(struct seq_file *m, void *data) 2391 { 2392 struct drm_info_node *node = (struct drm_info_node *)m->private; 2393 unsigned ttm_pl = (uintptr_t)node->info_ent->data; 2394 struct drm_device *dev = node->minor->dev; 2395 struct amdgpu_device *adev = dev->dev_private; 2396 struct ttm_mem_type_manager *man = &adev->mman.bdev.man[ttm_pl]; 2397 struct drm_printer p = drm_seq_file_printer(m); 2398 2399 man->func->debug(man, &p); 2400 return 0; 2401 } 2402 2403 static const struct drm_info_list amdgpu_ttm_debugfs_list[] = { 2404 {"amdgpu_vram_mm", amdgpu_mm_dump_table, 0, (void *)TTM_PL_VRAM}, 2405 {"amdgpu_gtt_mm", amdgpu_mm_dump_table, 0, (void *)TTM_PL_TT}, 2406 {"amdgpu_gds_mm", amdgpu_mm_dump_table, 0, (void *)AMDGPU_PL_GDS}, 2407 {"amdgpu_gws_mm", amdgpu_mm_dump_table, 0, (void *)AMDGPU_PL_GWS}, 2408 {"amdgpu_oa_mm", amdgpu_mm_dump_table, 0, (void *)AMDGPU_PL_OA}, 2409 {"ttm_page_pool", ttm_page_alloc_debugfs, 0, NULL}, 2410 #ifdef CONFIG_SWIOTLB 2411 {"ttm_dma_page_pool", ttm_dma_page_alloc_debugfs, 0, NULL} 2412 #endif 2413 }; 2414 2415 /** 2416 * amdgpu_ttm_vram_read - Linear read access to VRAM 2417 * 2418 * Accesses VRAM via MMIO for debugging purposes. 2419 */ 2420 static ssize_t amdgpu_ttm_vram_read(struct file *f, char __user *buf, 2421 size_t size, loff_t *pos) 2422 { 2423 struct amdgpu_device *adev = file_inode(f)->i_private; 2424 ssize_t result = 0; 2425 int r; 2426 2427 if (size & 0x3 || *pos & 0x3) 2428 return -EINVAL; 2429 2430 if (*pos >= adev->gmc.mc_vram_size) 2431 return -ENXIO; 2432 2433 while (size) { 2434 unsigned long flags; 2435 uint32_t value; 2436 2437 if (*pos >= adev->gmc.mc_vram_size) 2438 return result; 2439 2440 spin_lock_irqsave(&adev->mmio_idx_lock, flags); 2441 WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)*pos) | 0x80000000); 2442 WREG32_NO_KIQ(mmMM_INDEX_HI, *pos >> 31); 2443 value = RREG32_NO_KIQ(mmMM_DATA); 2444 spin_unlock_irqrestore(&adev->mmio_idx_lock, flags); 2445 2446 r = put_user(value, (uint32_t *)buf); 2447 if (r) 2448 return r; 2449 2450 result += 4; 2451 buf += 4; 2452 *pos += 4; 2453 size -= 4; 2454 } 2455 2456 return result; 2457 } 2458 2459 /** 2460 * amdgpu_ttm_vram_write - Linear write access to VRAM 2461 * 2462 * Accesses VRAM via MMIO for debugging purposes. 2463 */ 2464 static ssize_t amdgpu_ttm_vram_write(struct file *f, const char __user *buf, 2465 size_t size, loff_t *pos) 2466 { 2467 struct amdgpu_device *adev = file_inode(f)->i_private; 2468 ssize_t result = 0; 2469 int r; 2470 2471 if (size & 0x3 || *pos & 0x3) 2472 return -EINVAL; 2473 2474 if (*pos >= adev->gmc.mc_vram_size) 2475 return -ENXIO; 2476 2477 while (size) { 2478 unsigned long flags; 2479 uint32_t value; 2480 2481 if (*pos >= adev->gmc.mc_vram_size) 2482 return result; 2483 2484 r = get_user(value, (uint32_t *)buf); 2485 if (r) 2486 return r; 2487 2488 spin_lock_irqsave(&adev->mmio_idx_lock, flags); 2489 WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)*pos) | 0x80000000); 2490 WREG32_NO_KIQ(mmMM_INDEX_HI, *pos >> 31); 2491 WREG32_NO_KIQ(mmMM_DATA, value); 2492 spin_unlock_irqrestore(&adev->mmio_idx_lock, flags); 2493 2494 result += 4; 2495 buf += 4; 2496 *pos += 4; 2497 size -= 4; 2498 } 2499 2500 return result; 2501 } 2502 2503 static const struct file_operations amdgpu_ttm_vram_fops = { 2504 .owner = THIS_MODULE, 2505 .read = amdgpu_ttm_vram_read, 2506 .write = amdgpu_ttm_vram_write, 2507 .llseek = default_llseek, 2508 }; 2509 2510 #ifdef CONFIG_DRM_AMDGPU_GART_DEBUGFS 2511 2512 /** 2513 * amdgpu_ttm_gtt_read - Linear read access to GTT memory 2514 */ 2515 static ssize_t amdgpu_ttm_gtt_read(struct file *f, char __user *buf, 2516 size_t size, loff_t *pos) 2517 { 2518 struct amdgpu_device *adev = file_inode(f)->i_private; 2519 ssize_t result = 0; 2520 int r; 2521 2522 while (size) { 2523 loff_t p = *pos / PAGE_SIZE; 2524 unsigned off = *pos & ~PAGE_MASK; 2525 size_t cur_size = min_t(size_t, size, PAGE_SIZE - off); 2526 struct page *page; 2527 void *ptr; 2528 2529 if (p >= adev->gart.num_cpu_pages) 2530 return result; 2531 2532 page = adev->gart.pages[p]; 2533 if (page) { 2534 ptr = kmap(page); 2535 ptr += off; 2536 2537 r = copy_to_user(buf, ptr, cur_size); 2538 kunmap(adev->gart.pages[p]); 2539 } else 2540 r = clear_user(buf, cur_size); 2541 2542 if (r) 2543 return -EFAULT; 2544 2545 result += cur_size; 2546 buf += cur_size; 2547 *pos += cur_size; 2548 size -= cur_size; 2549 } 2550 2551 return result; 2552 } 2553 2554 static const struct file_operations amdgpu_ttm_gtt_fops = { 2555 .owner = THIS_MODULE, 2556 .read = amdgpu_ttm_gtt_read, 2557 .llseek = default_llseek 2558 }; 2559 2560 #endif 2561 2562 /** 2563 * amdgpu_iomem_read - Virtual read access to GPU mapped memory 2564 * 2565 * This function is used to read memory that has been mapped to the 2566 * GPU and the known addresses are not physical addresses but instead 2567 * bus addresses (e.g., what you'd put in an IB or ring buffer). 2568 */ 2569 static ssize_t amdgpu_iomem_read(struct file *f, char __user *buf, 2570 size_t size, loff_t *pos) 2571 { 2572 struct amdgpu_device *adev = file_inode(f)->i_private; 2573 struct iommu_domain *dom; 2574 ssize_t result = 0; 2575 int r; 2576 2577 /* retrieve the IOMMU domain if any for this device */ 2578 dom = iommu_get_domain_for_dev(adev->dev); 2579 2580 while (size) { 2581 phys_addr_t addr = *pos & PAGE_MASK; 2582 loff_t off = *pos & ~PAGE_MASK; 2583 size_t bytes = PAGE_SIZE - off; 2584 unsigned long pfn; 2585 struct page *p; 2586 void *ptr; 2587 2588 bytes = bytes < size ? bytes : size; 2589 2590 /* Translate the bus address to a physical address. If 2591 * the domain is NULL it means there is no IOMMU active 2592 * and the address translation is the identity 2593 */ 2594 addr = dom ? iommu_iova_to_phys(dom, addr) : addr; 2595 2596 pfn = addr >> PAGE_SHIFT; 2597 if (!pfn_valid(pfn)) 2598 return -EPERM; 2599 2600 p = pfn_to_page(pfn); 2601 if (p->mapping != adev->mman.bdev.dev_mapping) 2602 return -EPERM; 2603 2604 ptr = kmap(p); 2605 r = copy_to_user(buf, ptr + off, bytes); 2606 kunmap(p); 2607 if (r) 2608 return -EFAULT; 2609 2610 size -= bytes; 2611 *pos += bytes; 2612 result += bytes; 2613 } 2614 2615 return result; 2616 } 2617 2618 /** 2619 * amdgpu_iomem_write - Virtual write access to GPU mapped memory 2620 * 2621 * This function is used to write memory that has been mapped to the 2622 * GPU and the known addresses are not physical addresses but instead 2623 * bus addresses (e.g., what you'd put in an IB or ring buffer). 2624 */ 2625 static ssize_t amdgpu_iomem_write(struct file *f, const char __user *buf, 2626 size_t size, loff_t *pos) 2627 { 2628 struct amdgpu_device *adev = file_inode(f)->i_private; 2629 struct iommu_domain *dom; 2630 ssize_t result = 0; 2631 int r; 2632 2633 dom = iommu_get_domain_for_dev(adev->dev); 2634 2635 while (size) { 2636 phys_addr_t addr = *pos & PAGE_MASK; 2637 loff_t off = *pos & ~PAGE_MASK; 2638 size_t bytes = PAGE_SIZE - off; 2639 unsigned long pfn; 2640 struct page *p; 2641 void *ptr; 2642 2643 bytes = bytes < size ? bytes : size; 2644 2645 addr = dom ? iommu_iova_to_phys(dom, addr) : addr; 2646 2647 pfn = addr >> PAGE_SHIFT; 2648 if (!pfn_valid(pfn)) 2649 return -EPERM; 2650 2651 p = pfn_to_page(pfn); 2652 if (p->mapping != adev->mman.bdev.dev_mapping) 2653 return -EPERM; 2654 2655 ptr = kmap(p); 2656 r = copy_from_user(ptr + off, buf, bytes); 2657 kunmap(p); 2658 if (r) 2659 return -EFAULT; 2660 2661 size -= bytes; 2662 *pos += bytes; 2663 result += bytes; 2664 } 2665 2666 return result; 2667 } 2668 2669 static const struct file_operations amdgpu_ttm_iomem_fops = { 2670 .owner = THIS_MODULE, 2671 .read = amdgpu_iomem_read, 2672 .write = amdgpu_iomem_write, 2673 .llseek = default_llseek 2674 }; 2675 2676 static const struct { 2677 char *name; 2678 const struct file_operations *fops; 2679 int domain; 2680 } ttm_debugfs_entries[] = { 2681 { "amdgpu_vram", &amdgpu_ttm_vram_fops, TTM_PL_VRAM }, 2682 #ifdef CONFIG_DRM_AMDGPU_GART_DEBUGFS 2683 { "amdgpu_gtt", &amdgpu_ttm_gtt_fops, TTM_PL_TT }, 2684 #endif 2685 { "amdgpu_iomem", &amdgpu_ttm_iomem_fops, TTM_PL_SYSTEM }, 2686 }; 2687 2688 #endif 2689 2690 static int amdgpu_ttm_debugfs_init(struct amdgpu_device *adev) 2691 { 2692 #if defined(CONFIG_DEBUG_FS) 2693 unsigned count; 2694 2695 struct drm_minor *minor = adev->ddev->primary; 2696 struct dentry *ent, *root = minor->debugfs_root; 2697 2698 for (count = 0; count < ARRAY_SIZE(ttm_debugfs_entries); count++) { 2699 ent = debugfs_create_file( 2700 ttm_debugfs_entries[count].name, 2701 S_IFREG | S_IRUGO, root, 2702 adev, 2703 ttm_debugfs_entries[count].fops); 2704 if (IS_ERR(ent)) 2705 return PTR_ERR(ent); 2706 if (ttm_debugfs_entries[count].domain == TTM_PL_VRAM) 2707 i_size_write(ent->d_inode, adev->gmc.mc_vram_size); 2708 else if (ttm_debugfs_entries[count].domain == TTM_PL_TT) 2709 i_size_write(ent->d_inode, adev->gmc.gart_size); 2710 adev->mman.debugfs_entries[count] = ent; 2711 } 2712 2713 count = ARRAY_SIZE(amdgpu_ttm_debugfs_list); 2714 2715 #ifdef CONFIG_SWIOTLB 2716 if (!(adev->need_swiotlb && swiotlb_nr_tbl())) 2717 --count; 2718 #endif 2719 2720 return amdgpu_debugfs_add_files(adev, amdgpu_ttm_debugfs_list, count); 2721 #else 2722 return 0; 2723 #endif 2724 } 2725 2726 static void amdgpu_ttm_debugfs_fini(struct amdgpu_device *adev) 2727 { 2728 #if defined(CONFIG_DEBUG_FS) 2729 unsigned i; 2730 2731 for (i = 0; i < ARRAY_SIZE(ttm_debugfs_entries); i++) 2732 debugfs_remove(adev->mman.debugfs_entries[i]); 2733 #endif 2734 } 2735