1 /* $NetBSD: amdgpu_cs.c,v 1.4 2020/02/14 04:38:23 riastradh Exp $ */ 2 3 /* 4 * Copyright 2008 Jerome Glisse. 5 * All Rights Reserved. 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a 8 * copy of this software and associated documentation files (the "Software"), 9 * to deal in the Software without restriction, including without limitation 10 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 11 * and/or sell copies of the Software, and to permit persons to whom the 12 * Software is furnished to do so, subject to the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the next 15 * paragraph) shall be included in all copies or substantial portions of the 16 * Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 21 * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 22 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 23 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 24 * DEALINGS IN THE SOFTWARE. 25 * 26 * Authors: 27 * Jerome Glisse <glisse@freedesktop.org> 28 */ 29 #include <sys/cdefs.h> 30 __KERNEL_RCSID(0, "$NetBSD: amdgpu_cs.c,v 1.4 2020/02/14 04:38:23 riastradh Exp $"); 31 32 #include <linux/list_sort.h> 33 #include <drm/drmP.h> 34 #include <drm/amdgpu_drm.h> 35 #include "amdgpu.h" 36 #include "amdgpu_trace.h" 37 38 #define AMDGPU_CS_MAX_PRIORITY 32u 39 #define AMDGPU_CS_NUM_BUCKETS (AMDGPU_CS_MAX_PRIORITY + 1) 40 41 /* This is based on the bucket sort with O(n) time complexity. 42 * An item with priority "i" is added to bucket[i]. The lists are then 43 * concatenated in descending order. 44 */ 45 struct amdgpu_cs_buckets { 46 struct list_head bucket[AMDGPU_CS_NUM_BUCKETS]; 47 }; 48 49 static void amdgpu_cs_buckets_init(struct amdgpu_cs_buckets *b) 50 { 51 unsigned i; 52 53 for (i = 0; i < AMDGPU_CS_NUM_BUCKETS; i++) 54 INIT_LIST_HEAD(&b->bucket[i]); 55 } 56 57 static void amdgpu_cs_buckets_add(struct amdgpu_cs_buckets *b, 58 struct list_head *item, unsigned priority) 59 { 60 /* Since buffers which appear sooner in the relocation list are 61 * likely to be used more often than buffers which appear later 62 * in the list, the sort mustn't change the ordering of buffers 63 * with the same priority, i.e. it must be stable. 64 */ 65 list_add_tail(item, &b->bucket[min(priority, AMDGPU_CS_MAX_PRIORITY)]); 66 } 67 68 static void amdgpu_cs_buckets_get_list(struct amdgpu_cs_buckets *b, 69 struct list_head *out_list) 70 { 71 unsigned i; 72 73 /* Connect the sorted buckets in the output list. */ 74 for (i = 0; i < AMDGPU_CS_NUM_BUCKETS; i++) { 75 list_splice(&b->bucket[i], out_list); 76 } 77 } 78 79 int amdgpu_cs_get_ring(struct amdgpu_device *adev, u32 ip_type, 80 u32 ip_instance, u32 ring, 81 struct amdgpu_ring **out_ring) 82 { 83 /* Right now all IPs have only one instance - multiple rings. */ 84 if (ip_instance != 0) { 85 DRM_ERROR("invalid ip instance: %d\n", ip_instance); 86 return -EINVAL; 87 } 88 89 switch (ip_type) { 90 default: 91 DRM_ERROR("unknown ip type: %d\n", ip_type); 92 return -EINVAL; 93 case AMDGPU_HW_IP_GFX: 94 if (ring < adev->gfx.num_gfx_rings) { 95 *out_ring = &adev->gfx.gfx_ring[ring]; 96 } else { 97 DRM_ERROR("only %d gfx rings are supported now\n", 98 adev->gfx.num_gfx_rings); 99 return -EINVAL; 100 } 101 break; 102 case AMDGPU_HW_IP_COMPUTE: 103 if (ring < adev->gfx.num_compute_rings) { 104 *out_ring = &adev->gfx.compute_ring[ring]; 105 } else { 106 DRM_ERROR("only %d compute rings are supported now\n", 107 adev->gfx.num_compute_rings); 108 return -EINVAL; 109 } 110 break; 111 case AMDGPU_HW_IP_DMA: 112 if (ring < adev->sdma.num_instances) { 113 *out_ring = &adev->sdma.instance[ring].ring; 114 } else { 115 DRM_ERROR("only %d SDMA rings are supported\n", 116 adev->sdma.num_instances); 117 return -EINVAL; 118 } 119 break; 120 case AMDGPU_HW_IP_UVD: 121 *out_ring = &adev->uvd.ring; 122 break; 123 case AMDGPU_HW_IP_VCE: 124 if (ring < 2){ 125 *out_ring = &adev->vce.ring[ring]; 126 } else { 127 DRM_ERROR("only two VCE rings are supported\n"); 128 return -EINVAL; 129 } 130 break; 131 } 132 133 if (!(*out_ring && (*out_ring)->adev)) { 134 DRM_ERROR("Ring %d is not initialized on IP %d\n", 135 ring, ip_type); 136 return -EINVAL; 137 } 138 139 return 0; 140 } 141 142 static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p, 143 struct drm_amdgpu_cs_chunk_fence *fence_data) 144 { 145 struct drm_gem_object *gobj; 146 uint32_t handle __unused; 147 148 handle = fence_data->handle; 149 gobj = drm_gem_object_lookup(p->adev->ddev, p->filp, 150 fence_data->handle); 151 if (gobj == NULL) 152 return -EINVAL; 153 154 p->uf.bo = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj)); 155 p->uf.offset = fence_data->offset; 156 157 if (amdgpu_ttm_tt_has_userptr(p->uf.bo->tbo.ttm)) { 158 drm_gem_object_unreference_unlocked(gobj); 159 return -EINVAL; 160 } 161 162 p->uf_entry.robj = amdgpu_bo_ref(p->uf.bo); 163 p->uf_entry.prefered_domains = AMDGPU_GEM_DOMAIN_GTT; 164 p->uf_entry.allowed_domains = AMDGPU_GEM_DOMAIN_GTT; 165 p->uf_entry.priority = 0; 166 p->uf_entry.tv.bo = &p->uf_entry.robj->tbo; 167 p->uf_entry.tv.shared = true; 168 169 drm_gem_object_unreference_unlocked(gobj); 170 return 0; 171 } 172 173 int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) 174 { 175 union drm_amdgpu_cs *cs = data; 176 uint64_t *chunk_array_user; 177 uint64_t *chunk_array; 178 struct amdgpu_fpriv *fpriv = p->filp->driver_priv; 179 unsigned size; 180 int i; 181 int ret; 182 183 if (cs->in.num_chunks == 0) 184 return 0; 185 186 chunk_array = kmalloc_array(cs->in.num_chunks, sizeof(uint64_t), GFP_KERNEL); 187 if (!chunk_array) 188 return -ENOMEM; 189 190 p->ctx = amdgpu_ctx_get(fpriv, cs->in.ctx_id); 191 if (!p->ctx) { 192 ret = -EINVAL; 193 goto free_chunk; 194 } 195 196 p->bo_list = amdgpu_bo_list_get(fpriv, cs->in.bo_list_handle); 197 198 /* get chunks */ 199 INIT_LIST_HEAD(&p->validated); 200 chunk_array_user = (uint64_t __user *)(unsigned long)(cs->in.chunks); 201 if (copy_from_user(chunk_array, chunk_array_user, 202 sizeof(uint64_t)*cs->in.num_chunks)) { 203 ret = -EFAULT; 204 goto put_bo_list; 205 } 206 207 p->nchunks = cs->in.num_chunks; 208 p->chunks = kmalloc_array(p->nchunks, sizeof(struct amdgpu_cs_chunk), 209 GFP_KERNEL); 210 if (!p->chunks) { 211 ret = -ENOMEM; 212 goto put_bo_list; 213 } 214 215 for (i = 0; i < p->nchunks; i++) { 216 struct drm_amdgpu_cs_chunk __user **chunk_ptr = NULL; 217 struct drm_amdgpu_cs_chunk user_chunk; 218 uint32_t __user *cdata; 219 220 chunk_ptr = (void __user *)(unsigned long)chunk_array[i]; 221 if (copy_from_user(&user_chunk, chunk_ptr, 222 sizeof(struct drm_amdgpu_cs_chunk))) { 223 ret = -EFAULT; 224 i--; 225 goto free_partial_kdata; 226 } 227 p->chunks[i].chunk_id = user_chunk.chunk_id; 228 p->chunks[i].length_dw = user_chunk.length_dw; 229 230 size = p->chunks[i].length_dw; 231 cdata = (void __user *)(unsigned long)user_chunk.chunk_data; 232 p->chunks[i].user_ptr = cdata; 233 234 p->chunks[i].kdata = drm_malloc_ab(size, sizeof(uint32_t)); 235 if (p->chunks[i].kdata == NULL) { 236 ret = -ENOMEM; 237 i--; 238 goto free_partial_kdata; 239 } 240 size *= sizeof(uint32_t); 241 if (copy_from_user(p->chunks[i].kdata, cdata, size)) { 242 ret = -EFAULT; 243 goto free_partial_kdata; 244 } 245 246 switch (p->chunks[i].chunk_id) { 247 case AMDGPU_CHUNK_ID_IB: 248 p->num_ibs++; 249 break; 250 251 case AMDGPU_CHUNK_ID_FENCE: 252 size = sizeof(struct drm_amdgpu_cs_chunk_fence); 253 if (p->chunks[i].length_dw * sizeof(uint32_t) < size) { 254 ret = -EINVAL; 255 goto free_partial_kdata; 256 } 257 258 ret = amdgpu_cs_user_fence_chunk(p, (void *)p->chunks[i].kdata); 259 if (ret) 260 goto free_partial_kdata; 261 262 break; 263 264 case AMDGPU_CHUNK_ID_DEPENDENCIES: 265 break; 266 267 default: 268 ret = -EINVAL; 269 goto free_partial_kdata; 270 } 271 } 272 273 274 p->ibs = kcalloc(p->num_ibs, sizeof(struct amdgpu_ib), GFP_KERNEL); 275 if (!p->ibs) { 276 ret = -ENOMEM; 277 goto free_all_kdata; 278 } 279 280 kfree(chunk_array); 281 return 0; 282 283 free_all_kdata: 284 i = p->nchunks - 1; 285 free_partial_kdata: 286 for (; i >= 0; i--) 287 drm_free_large(p->chunks[i].kdata); 288 kfree(p->chunks); 289 put_bo_list: 290 if (p->bo_list) 291 amdgpu_bo_list_put(p->bo_list); 292 amdgpu_ctx_put(p->ctx); 293 free_chunk: 294 kfree(chunk_array); 295 296 return ret; 297 } 298 299 /* Returns how many bytes TTM can move per IB. 300 */ 301 static u64 amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev) 302 { 303 u64 real_vram_size = adev->mc.real_vram_size; 304 u64 vram_usage = atomic64_read(&adev->vram_usage); 305 306 /* This function is based on the current VRAM usage. 307 * 308 * - If all of VRAM is free, allow relocating the number of bytes that 309 * is equal to 1/4 of the size of VRAM for this IB. 310 311 * - If more than one half of VRAM is occupied, only allow relocating 312 * 1 MB of data for this IB. 313 * 314 * - From 0 to one half of used VRAM, the threshold decreases 315 * linearly. 316 * __________________ 317 * 1/4 of -|\ | 318 * VRAM | \ | 319 * | \ | 320 * | \ | 321 * | \ | 322 * | \ | 323 * | \ | 324 * | \________|1 MB 325 * |----------------| 326 * VRAM 0 % 100 % 327 * used used 328 * 329 * Note: It's a threshold, not a limit. The threshold must be crossed 330 * for buffer relocations to stop, so any buffer of an arbitrary size 331 * can be moved as long as the threshold isn't crossed before 332 * the relocation takes place. We don't want to disable buffer 333 * relocations completely. 334 * 335 * The idea is that buffers should be placed in VRAM at creation time 336 * and TTM should only do a minimum number of relocations during 337 * command submission. In practice, you need to submit at least 338 * a dozen IBs to move all buffers to VRAM if they are in GTT. 339 * 340 * Also, things can get pretty crazy under memory pressure and actual 341 * VRAM usage can change a lot, so playing safe even at 50% does 342 * consistently increase performance. 343 */ 344 345 u64 half_vram = real_vram_size >> 1; 346 u64 half_free_vram = vram_usage >= half_vram ? 0 : half_vram - vram_usage; 347 u64 bytes_moved_threshold = half_free_vram >> 1; 348 return max(bytes_moved_threshold, 1024*1024ull); 349 } 350 351 static 352 int amdgpu_cs_list_validate(struct amdgpu_device *adev, 353 struct amdgpu_vm *vm, 354 struct list_head *validated) 355 { 356 struct amdgpu_bo_list_entry *lobj; 357 struct amdgpu_bo *bo; 358 u64 bytes_moved = 0, initial_bytes_moved; 359 u64 bytes_moved_threshold = amdgpu_cs_get_threshold_for_moves(adev); 360 int r; 361 362 list_for_each_entry(lobj, validated, tv.head) { 363 bo = lobj->robj; 364 if (!bo->pin_count) { 365 u32 domain = lobj->prefered_domains; 366 u32 current_domain = 367 amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type); 368 369 /* Check if this buffer will be moved and don't move it 370 * if we have moved too many buffers for this IB already. 371 * 372 * Note that this allows moving at least one buffer of 373 * any size, because it doesn't take the current "bo" 374 * into account. We don't want to disallow buffer moves 375 * completely. 376 */ 377 if ((lobj->allowed_domains & current_domain) != 0 && 378 (domain & current_domain) == 0 && /* will be moved */ 379 bytes_moved > bytes_moved_threshold) { 380 /* don't move it */ 381 domain = current_domain; 382 } 383 384 retry: 385 amdgpu_ttm_placement_from_domain(bo, domain); 386 initial_bytes_moved = atomic64_read(&adev->num_bytes_moved); 387 r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false); 388 bytes_moved += atomic64_read(&adev->num_bytes_moved) - 389 initial_bytes_moved; 390 391 if (unlikely(r)) { 392 if (r != -ERESTARTSYS && domain != lobj->allowed_domains) { 393 domain = lobj->allowed_domains; 394 goto retry; 395 } 396 return r; 397 } 398 } 399 lobj->bo_va = amdgpu_vm_bo_find(vm, bo); 400 } 401 return 0; 402 } 403 404 static int amdgpu_cs_parser_relocs(struct amdgpu_cs_parser *p) 405 { 406 struct amdgpu_fpriv *fpriv = p->filp->driver_priv; 407 struct amdgpu_cs_buckets buckets; 408 struct list_head duplicates; 409 bool need_mmap_lock = false; 410 int i, r; 411 412 if (p->bo_list) { 413 need_mmap_lock = p->bo_list->has_userptr; 414 amdgpu_cs_buckets_init(&buckets); 415 for (i = 0; i < p->bo_list->num_entries; i++) 416 amdgpu_cs_buckets_add(&buckets, &p->bo_list->array[i].tv.head, 417 p->bo_list->array[i].priority); 418 419 amdgpu_cs_buckets_get_list(&buckets, &p->validated); 420 } 421 422 p->vm_bos = amdgpu_vm_get_bos(p->adev, &fpriv->vm, 423 &p->validated); 424 425 if (p->uf.bo) 426 list_add(&p->uf_entry.tv.head, &p->validated); 427 428 #ifdef __NetBSD__ 429 if (need_mmap_lock) 430 vm_map_lock_read(&curproc->p_vmspace->vm_map); 431 #else 432 if (need_mmap_lock) 433 down_read(¤t->mm->mmap_sem); 434 #endif 435 436 INIT_LIST_HEAD(&duplicates); 437 r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true, &duplicates); 438 if (unlikely(r != 0)) 439 goto error_reserve; 440 441 r = amdgpu_cs_list_validate(p->adev, &fpriv->vm, &p->validated); 442 if (r) 443 goto error_validate; 444 445 r = amdgpu_cs_list_validate(p->adev, &fpriv->vm, &duplicates); 446 447 error_validate: 448 if (r) 449 ttm_eu_backoff_reservation(&p->ticket, &p->validated); 450 451 error_reserve: 452 #ifdef __NetBSD__ 453 if (need_mmap_lock) 454 vm_map_unlock_read(&curproc->p_vmspace->vm_map); 455 #else 456 if (need_mmap_lock) 457 up_read(¤t->mm->mmap_sem); 458 #endif 459 460 return r; 461 } 462 463 static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p) 464 { 465 struct amdgpu_bo_list_entry *e; 466 int r; 467 468 list_for_each_entry(e, &p->validated, tv.head) { 469 struct reservation_object *resv = e->robj->tbo.resv; 470 r = amdgpu_sync_resv(p->adev, &p->ibs[0].sync, resv, p->filp); 471 472 if (r) 473 return r; 474 } 475 return 0; 476 } 477 478 static int cmp_size_smaller_first(void *priv, struct list_head *a, 479 struct list_head *b) 480 { 481 struct amdgpu_bo_list_entry *la = list_entry(a, struct amdgpu_bo_list_entry, tv.head); 482 struct amdgpu_bo_list_entry *lb = list_entry(b, struct amdgpu_bo_list_entry, tv.head); 483 484 /* Sort A before B if A is smaller. */ 485 return (int)la->robj->tbo.num_pages - (int)lb->robj->tbo.num_pages; 486 } 487 488 /** 489 * cs_parser_fini() - clean parser states 490 * @parser: parser structure holding parsing context. 491 * @error: error number 492 * 493 * If error is set than unvalidate buffer, otherwise just free memory 494 * used by parsing context. 495 **/ 496 static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, bool backoff) 497 { 498 unsigned i; 499 500 if (!error) { 501 /* Sort the buffer list from the smallest to largest buffer, 502 * which affects the order of buffers in the LRU list. 503 * This assures that the smallest buffers are added first 504 * to the LRU list, so they are likely to be later evicted 505 * first, instead of large buffers whose eviction is more 506 * expensive. 507 * 508 * This slightly lowers the number of bytes moved by TTM 509 * per frame under memory pressure. 510 */ 511 list_sort(NULL, &parser->validated, cmp_size_smaller_first); 512 513 ttm_eu_fence_buffer_objects(&parser->ticket, 514 &parser->validated, 515 parser->fence); 516 } else if (backoff) { 517 ttm_eu_backoff_reservation(&parser->ticket, 518 &parser->validated); 519 } 520 fence_put(parser->fence); 521 522 if (parser->ctx) 523 amdgpu_ctx_put(parser->ctx); 524 if (parser->bo_list) 525 amdgpu_bo_list_put(parser->bo_list); 526 527 drm_free_large(parser->vm_bos); 528 for (i = 0; i < parser->nchunks; i++) 529 drm_free_large(parser->chunks[i].kdata); 530 kfree(parser->chunks); 531 if (parser->ibs) 532 for (i = 0; i < parser->num_ibs; i++) 533 amdgpu_ib_free(parser->adev, &parser->ibs[i]); 534 kfree(parser->ibs); 535 amdgpu_bo_unref(&parser->uf.bo); 536 amdgpu_bo_unref(&parser->uf_entry.robj); 537 } 538 539 static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p, 540 struct amdgpu_vm *vm) 541 { 542 struct amdgpu_device *adev = p->adev; 543 struct amdgpu_bo_va *bo_va; 544 struct amdgpu_bo *bo; 545 int i, r; 546 547 r = amdgpu_vm_update_page_directory(adev, vm); 548 if (r) 549 return r; 550 551 r = amdgpu_sync_fence(adev, &p->ibs[0].sync, vm->page_directory_fence); 552 if (r) 553 return r; 554 555 r = amdgpu_vm_clear_freed(adev, vm); 556 if (r) 557 return r; 558 559 if (p->bo_list) { 560 for (i = 0; i < p->bo_list->num_entries; i++) { 561 struct fence *f; 562 563 /* ignore duplicates */ 564 bo = p->bo_list->array[i].robj; 565 if (!bo) 566 continue; 567 568 bo_va = p->bo_list->array[i].bo_va; 569 if (bo_va == NULL) 570 continue; 571 572 r = amdgpu_vm_bo_update(adev, bo_va, &bo->tbo.mem); 573 if (r) 574 return r; 575 576 f = bo_va->last_pt_update; 577 r = amdgpu_sync_fence(adev, &p->ibs[0].sync, f); 578 if (r) 579 return r; 580 } 581 582 } 583 584 r = amdgpu_vm_clear_invalids(adev, vm, &p->ibs[0].sync); 585 586 if (amdgpu_vm_debug && p->bo_list) { 587 /* Invalidate all BOs to test for userspace bugs */ 588 for (i = 0; i < p->bo_list->num_entries; i++) { 589 /* ignore duplicates */ 590 bo = p->bo_list->array[i].robj; 591 if (!bo) 592 continue; 593 594 amdgpu_vm_bo_invalidate(adev, bo); 595 } 596 } 597 598 return r; 599 } 600 601 static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev, 602 struct amdgpu_cs_parser *parser) 603 { 604 struct amdgpu_fpriv *fpriv = parser->filp->driver_priv; 605 struct amdgpu_vm *vm = &fpriv->vm; 606 struct amdgpu_ring *ring; 607 int i, r; 608 609 if (parser->num_ibs == 0) 610 return 0; 611 612 /* Only for UVD/VCE VM emulation */ 613 for (i = 0; i < parser->num_ibs; i++) { 614 ring = parser->ibs[i].ring; 615 if (ring->funcs->parse_cs) { 616 r = amdgpu_ring_parse_cs(ring, parser, i); 617 if (r) 618 return r; 619 } 620 } 621 622 r = amdgpu_bo_vm_update_pte(parser, vm); 623 if (!r) 624 amdgpu_cs_sync_rings(parser); 625 626 return r; 627 } 628 629 static int amdgpu_cs_handle_lockup(struct amdgpu_device *adev, int r) 630 { 631 if (r == -EDEADLK) { 632 r = amdgpu_gpu_reset(adev); 633 if (!r) 634 r = -EAGAIN; 635 } 636 return r; 637 } 638 639 static int amdgpu_cs_ib_fill(struct amdgpu_device *adev, 640 struct amdgpu_cs_parser *parser) 641 { 642 struct amdgpu_fpriv *fpriv = parser->filp->driver_priv; 643 struct amdgpu_vm *vm = &fpriv->vm; 644 int i, j; 645 int r; 646 647 for (i = 0, j = 0; i < parser->nchunks && j < parser->num_ibs; i++) { 648 struct amdgpu_cs_chunk *chunk; 649 struct amdgpu_ib *ib; 650 struct drm_amdgpu_cs_chunk_ib *chunk_ib; 651 struct amdgpu_ring *ring; 652 653 chunk = &parser->chunks[i]; 654 ib = &parser->ibs[j]; 655 chunk_ib = (struct drm_amdgpu_cs_chunk_ib *)chunk->kdata; 656 657 if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB) 658 continue; 659 660 r = amdgpu_cs_get_ring(adev, chunk_ib->ip_type, 661 chunk_ib->ip_instance, chunk_ib->ring, 662 &ring); 663 if (r) 664 return r; 665 666 if (ring->funcs->parse_cs) { 667 struct amdgpu_bo_va_mapping *m; 668 struct amdgpu_bo *aobj = NULL; 669 uint64_t offset; 670 uint8_t *kptr; 671 672 m = amdgpu_cs_find_mapping(parser, chunk_ib->va_start, 673 &aobj); 674 if (!aobj) { 675 DRM_ERROR("IB va_start is invalid\n"); 676 return -EINVAL; 677 } 678 679 if ((chunk_ib->va_start + chunk_ib->ib_bytes) > 680 (m->it.last + 1) * AMDGPU_GPU_PAGE_SIZE) { 681 DRM_ERROR("IB va_start+ib_bytes is invalid\n"); 682 return -EINVAL; 683 } 684 685 /* the IB should be reserved at this point */ 686 r = amdgpu_bo_kmap(aobj, (void **)&kptr); 687 if (r) { 688 return r; 689 } 690 691 offset = ((uint64_t)m->it.start) * AMDGPU_GPU_PAGE_SIZE; 692 kptr += chunk_ib->va_start - offset; 693 694 r = amdgpu_ib_get(ring, NULL, chunk_ib->ib_bytes, ib); 695 if (r) { 696 DRM_ERROR("Failed to get ib !\n"); 697 return r; 698 } 699 700 memcpy(ib->ptr, kptr, chunk_ib->ib_bytes); 701 amdgpu_bo_kunmap(aobj); 702 } else { 703 r = amdgpu_ib_get(ring, vm, 0, ib); 704 if (r) { 705 DRM_ERROR("Failed to get ib !\n"); 706 return r; 707 } 708 709 ib->gpu_addr = chunk_ib->va_start; 710 } 711 712 ib->length_dw = chunk_ib->ib_bytes / 4; 713 ib->flags = chunk_ib->flags; 714 ib->ctx = parser->ctx; 715 j++; 716 } 717 718 if (!parser->num_ibs) 719 return 0; 720 721 /* add GDS resources to first IB */ 722 if (parser->bo_list) { 723 struct amdgpu_bo *gds = parser->bo_list->gds_obj; 724 struct amdgpu_bo *gws = parser->bo_list->gws_obj; 725 struct amdgpu_bo *oa = parser->bo_list->oa_obj; 726 struct amdgpu_ib *ib = &parser->ibs[0]; 727 728 if (gds) { 729 ib->gds_base = amdgpu_bo_gpu_offset(gds); 730 ib->gds_size = amdgpu_bo_size(gds); 731 } 732 if (gws) { 733 ib->gws_base = amdgpu_bo_gpu_offset(gws); 734 ib->gws_size = amdgpu_bo_size(gws); 735 } 736 if (oa) { 737 ib->oa_base = amdgpu_bo_gpu_offset(oa); 738 ib->oa_size = amdgpu_bo_size(oa); 739 } 740 } 741 /* wrap the last IB with user fence */ 742 if (parser->uf.bo) { 743 struct amdgpu_ib *ib = &parser->ibs[parser->num_ibs - 1]; 744 745 /* UVD & VCE fw doesn't support user fences */ 746 if (ib->ring->type == AMDGPU_RING_TYPE_UVD || 747 ib->ring->type == AMDGPU_RING_TYPE_VCE) 748 return -EINVAL; 749 750 ib->user = &parser->uf; 751 } 752 753 return 0; 754 } 755 756 static int amdgpu_cs_dependencies(struct amdgpu_device *adev, 757 struct amdgpu_cs_parser *p) 758 { 759 struct amdgpu_fpriv *fpriv = p->filp->driver_priv; 760 struct amdgpu_ib *ib; 761 int i, j, r; 762 763 if (!p->num_ibs) 764 return 0; 765 766 /* Add dependencies to first IB */ 767 ib = &p->ibs[0]; 768 for (i = 0; i < p->nchunks; ++i) { 769 struct drm_amdgpu_cs_chunk_dep *deps; 770 struct amdgpu_cs_chunk *chunk; 771 unsigned num_deps; 772 773 chunk = &p->chunks[i]; 774 775 if (chunk->chunk_id != AMDGPU_CHUNK_ID_DEPENDENCIES) 776 continue; 777 778 deps = (struct drm_amdgpu_cs_chunk_dep *)chunk->kdata; 779 num_deps = chunk->length_dw * 4 / 780 sizeof(struct drm_amdgpu_cs_chunk_dep); 781 782 for (j = 0; j < num_deps; ++j) { 783 struct amdgpu_ring *ring; 784 struct amdgpu_ctx *ctx; 785 struct fence *fence; 786 787 r = amdgpu_cs_get_ring(adev, deps[j].ip_type, 788 deps[j].ip_instance, 789 deps[j].ring, &ring); 790 if (r) 791 return r; 792 793 ctx = amdgpu_ctx_get(fpriv, deps[j].ctx_id); 794 if (ctx == NULL) 795 return -EINVAL; 796 797 fence = amdgpu_ctx_get_fence(ctx, ring, 798 deps[j].handle); 799 if (IS_ERR(fence)) { 800 r = PTR_ERR(fence); 801 amdgpu_ctx_put(ctx); 802 return r; 803 804 } else if (fence) { 805 r = amdgpu_sync_fence(adev, &ib->sync, fence); 806 fence_put(fence); 807 amdgpu_ctx_put(ctx); 808 if (r) 809 return r; 810 } 811 } 812 } 813 814 return 0; 815 } 816 817 static int amdgpu_cs_free_job(struct amdgpu_job *job) 818 { 819 int i; 820 if (job->ibs) 821 for (i = 0; i < job->num_ibs; i++) 822 amdgpu_ib_free(job->adev, &job->ibs[i]); 823 kfree(job->ibs); 824 if (job->uf.bo) 825 amdgpu_bo_unref(&job->uf.bo); 826 return 0; 827 } 828 829 int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) 830 { 831 struct amdgpu_device *adev = dev->dev_private; 832 union drm_amdgpu_cs *cs = data; 833 struct amdgpu_cs_parser parser = {}; 834 bool reserved_buffers = false; 835 int i, r; 836 837 if (!adev->accel_working) 838 return -EBUSY; 839 840 parser.adev = adev; 841 parser.filp = filp; 842 843 r = amdgpu_cs_parser_init(&parser, data); 844 if (r) { 845 DRM_ERROR("Failed to initialize parser !\n"); 846 amdgpu_cs_parser_fini(&parser, r, false); 847 r = amdgpu_cs_handle_lockup(adev, r); 848 return r; 849 } 850 r = amdgpu_cs_parser_relocs(&parser); 851 if (r == -ENOMEM) 852 DRM_ERROR("Not enough memory for command submission!\n"); 853 else if (r && r != -ERESTARTSYS) 854 DRM_ERROR("Failed to process the buffer list %d!\n", r); 855 else if (!r) { 856 reserved_buffers = true; 857 r = amdgpu_cs_ib_fill(adev, &parser); 858 } 859 860 if (!r) { 861 r = amdgpu_cs_dependencies(adev, &parser); 862 if (r) 863 DRM_ERROR("Failed in the dependencies handling %d!\n", r); 864 } 865 866 if (r) 867 goto out; 868 869 for (i = 0; i < parser.num_ibs; i++) 870 trace_amdgpu_cs(&parser, i); 871 872 r = amdgpu_cs_ib_vm_chunk(adev, &parser); 873 if (r) 874 goto out; 875 876 if (amdgpu_enable_scheduler && parser.num_ibs) { 877 struct amdgpu_ring * ring = parser.ibs->ring; 878 struct amd_sched_fence *fence; 879 struct amdgpu_job *job; 880 881 job = kzalloc(sizeof(struct amdgpu_job), GFP_KERNEL); 882 if (!job) { 883 r = -ENOMEM; 884 goto out; 885 } 886 887 job->base.sched = &ring->sched; 888 job->base.s_entity = &parser.ctx->rings[ring->idx].entity; 889 job->adev = parser.adev; 890 job->owner = parser.filp; 891 job->free_job = amdgpu_cs_free_job; 892 893 job->ibs = parser.ibs; 894 job->num_ibs = parser.num_ibs; 895 parser.ibs = NULL; 896 parser.num_ibs = 0; 897 898 if (job->ibs[job->num_ibs - 1].user) { 899 job->uf = parser.uf; 900 job->ibs[job->num_ibs - 1].user = &job->uf; 901 parser.uf.bo = NULL; 902 } 903 904 fence = amd_sched_fence_create(job->base.s_entity, 905 parser.filp); 906 if (!fence) { 907 r = -ENOMEM; 908 amdgpu_cs_free_job(job); 909 kfree(job); 910 goto out; 911 } 912 job->base.s_fence = fence; 913 parser.fence = fence_get(&fence->base); 914 915 cs->out.handle = amdgpu_ctx_add_fence(parser.ctx, ring, 916 &fence->base); 917 job->ibs[job->num_ibs - 1].sequence = cs->out.handle; 918 919 trace_amdgpu_cs_ioctl(job); 920 amd_sched_entity_push_job(&job->base); 921 922 } else { 923 struct amdgpu_fence *fence; 924 925 r = amdgpu_ib_schedule(adev, parser.num_ibs, parser.ibs, 926 parser.filp); 927 fence = parser.ibs[parser.num_ibs - 1].fence; 928 parser.fence = fence_get(&fence->base); 929 cs->out.handle = parser.ibs[parser.num_ibs - 1].sequence; 930 } 931 932 out: 933 amdgpu_cs_parser_fini(&parser, r, reserved_buffers); 934 r = amdgpu_cs_handle_lockup(adev, r); 935 return r; 936 } 937 938 /** 939 * amdgpu_cs_wait_ioctl - wait for a command submission to finish 940 * 941 * @dev: drm device 942 * @data: data from userspace 943 * @filp: file private 944 * 945 * Wait for the command submission identified by handle to finish. 946 */ 947 int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data, 948 struct drm_file *filp) 949 { 950 union drm_amdgpu_wait_cs *wait = data; 951 struct amdgpu_device *adev = dev->dev_private; 952 unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout); 953 struct amdgpu_ring *ring = NULL; 954 struct amdgpu_ctx *ctx; 955 struct fence *fence; 956 long r; 957 958 r = amdgpu_cs_get_ring(adev, wait->in.ip_type, wait->in.ip_instance, 959 wait->in.ring, &ring); 960 if (r) 961 return r; 962 963 ctx = amdgpu_ctx_get(filp->driver_priv, wait->in.ctx_id); 964 if (ctx == NULL) 965 return -EINVAL; 966 967 fence = amdgpu_ctx_get_fence(ctx, ring, wait->in.handle); 968 if (IS_ERR(fence)) 969 r = PTR_ERR(fence); 970 else if (fence) { 971 r = fence_wait_timeout(fence, true, timeout); 972 fence_put(fence); 973 } else 974 r = 1; 975 976 amdgpu_ctx_put(ctx); 977 if (r < 0) 978 return r; 979 980 memset(wait, 0, sizeof(*wait)); 981 wait->out.status = (r == 0); 982 983 return 0; 984 } 985 986 /** 987 * amdgpu_cs_find_bo_va - find bo_va for VM address 988 * 989 * @parser: command submission parser context 990 * @addr: VM address 991 * @bo: resulting BO of the mapping found 992 * 993 * Search the buffer objects in the command submission context for a certain 994 * virtual memory address. Returns allocation structure when found, NULL 995 * otherwise. 996 */ 997 struct amdgpu_bo_va_mapping * 998 amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser, 999 uint64_t addr, struct amdgpu_bo **bo) 1000 { 1001 struct amdgpu_bo_list_entry *reloc; 1002 struct amdgpu_bo_va_mapping *mapping; 1003 1004 addr /= AMDGPU_GPU_PAGE_SIZE; 1005 1006 list_for_each_entry(reloc, &parser->validated, tv.head) { 1007 if (!reloc->bo_va) 1008 continue; 1009 1010 list_for_each_entry(mapping, &reloc->bo_va->valids, list) { 1011 if (mapping->it.start > addr || 1012 addr > mapping->it.last) 1013 continue; 1014 1015 *bo = reloc->bo_va->bo; 1016 return mapping; 1017 } 1018 1019 list_for_each_entry(mapping, &reloc->bo_va->invalids, list) { 1020 if (mapping->it.start > addr || 1021 addr > mapping->it.last) 1022 continue; 1023 1024 *bo = reloc->bo_va->bo; 1025 return mapping; 1026 } 1027 } 1028 1029 return NULL; 1030 } 1031