1 /* $NetBSD: radeon_cs.c,v 1.5 2020/02/14 04:38:24 riastradh Exp $ */ 2 3 /* 4 * Copyright 2008 Jerome Glisse. 5 * All Rights Reserved. 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a 8 * copy of this software and associated documentation files (the "Software"), 9 * to deal in the Software without restriction, including without limitation 10 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 11 * and/or sell copies of the Software, and to permit persons to whom the 12 * Software is furnished to do so, subject to the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the next 15 * paragraph) shall be included in all copies or substantial portions of the 16 * Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 21 * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 22 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 23 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 24 * DEALINGS IN THE SOFTWARE. 25 * 26 * Authors: 27 * Jerome Glisse <glisse@freedesktop.org> 28 */ 29 #include <sys/cdefs.h> 30 __KERNEL_RCSID(0, "$NetBSD: radeon_cs.c,v 1.5 2020/02/14 04:38:24 riastradh Exp $"); 31 32 #include <linux/list_sort.h> 33 #include <drm/drmP.h> 34 #include <drm/radeon_drm.h> 35 #include "radeon_reg.h" 36 #include "radeon.h" 37 #include "radeon_trace.h" 38 39 #define RADEON_CS_MAX_PRIORITY 32u 40 #define RADEON_CS_NUM_BUCKETS (RADEON_CS_MAX_PRIORITY + 1) 41 42 /* This is based on the bucket sort with O(n) time complexity. 43 * An item with priority "i" is added to bucket[i]. The lists are then 44 * concatenated in descending order. 45 */ 46 struct radeon_cs_buckets { 47 struct list_head bucket[RADEON_CS_NUM_BUCKETS]; 48 }; 49 50 static void radeon_cs_buckets_init(struct radeon_cs_buckets *b) 51 { 52 unsigned i; 53 54 for (i = 0; i < RADEON_CS_NUM_BUCKETS; i++) 55 INIT_LIST_HEAD(&b->bucket[i]); 56 } 57 58 static void radeon_cs_buckets_add(struct radeon_cs_buckets *b, 59 struct list_head *item, unsigned priority) 60 { 61 /* Since buffers which appear sooner in the relocation list are 62 * likely to be used more often than buffers which appear later 63 * in the list, the sort mustn't change the ordering of buffers 64 * with the same priority, i.e. it must be stable. 65 */ 66 list_add_tail(item, &b->bucket[min(priority, RADEON_CS_MAX_PRIORITY)]); 67 } 68 69 static void radeon_cs_buckets_get_list(struct radeon_cs_buckets *b, 70 struct list_head *out_list) 71 { 72 unsigned i; 73 74 /* Connect the sorted buckets in the output list. */ 75 for (i = 0; i < RADEON_CS_NUM_BUCKETS; i++) { 76 list_splice(&b->bucket[i], out_list); 77 } 78 } 79 80 static int radeon_cs_parser_relocs(struct radeon_cs_parser *p) 81 { 82 struct drm_device *ddev = p->rdev->ddev; 83 struct radeon_cs_chunk *chunk; 84 struct radeon_cs_buckets buckets; 85 unsigned i; 86 bool need_mmap_lock = false; 87 int r; 88 89 if (p->chunk_relocs == NULL) { 90 return 0; 91 } 92 chunk = p->chunk_relocs; 93 p->dma_reloc_idx = 0; 94 /* FIXME: we assume that each relocs use 4 dwords */ 95 p->nrelocs = chunk->length_dw / 4; 96 p->relocs = drm_calloc_large(p->nrelocs, sizeof(struct radeon_bo_list)); 97 if (p->relocs == NULL) { 98 return -ENOMEM; 99 } 100 101 radeon_cs_buckets_init(&buckets); 102 103 for (i = 0; i < p->nrelocs; i++) { 104 struct drm_radeon_cs_reloc *r; 105 struct drm_gem_object *gobj; 106 unsigned priority; 107 108 r = (struct drm_radeon_cs_reloc *)&chunk->kdata[i*4]; 109 gobj = drm_gem_object_lookup(ddev, p->filp, r->handle); 110 if (gobj == NULL) { 111 DRM_ERROR("gem object lookup failed 0x%x\n", 112 r->handle); 113 return -ENOENT; 114 } 115 p->relocs[i].robj = gem_to_radeon_bo(gobj); 116 117 /* The userspace buffer priorities are from 0 to 15. A higher 118 * number means the buffer is more important. 119 * Also, the buffers used for write have a higher priority than 120 * the buffers used for read only, which doubles the range 121 * to 0 to 31. 32 is reserved for the kernel driver. 122 */ 123 priority = (r->flags & RADEON_RELOC_PRIO_MASK) * 2 124 + !!r->write_domain; 125 126 /* the first reloc of an UVD job is the msg and that must be in 127 VRAM, also but everything into VRAM on AGP cards and older 128 IGP chips to avoid image corruptions */ 129 if (p->ring == R600_RING_TYPE_UVD_INDEX && 130 (i == 0 || drm_pci_device_is_agp(p->rdev->ddev) || 131 p->rdev->family == CHIP_RS780 || 132 p->rdev->family == CHIP_RS880)) { 133 134 /* TODO: is this still needed for NI+ ? */ 135 p->relocs[i].prefered_domains = 136 RADEON_GEM_DOMAIN_VRAM; 137 138 p->relocs[i].allowed_domains = 139 RADEON_GEM_DOMAIN_VRAM; 140 141 /* prioritize this over any other relocation */ 142 priority = RADEON_CS_MAX_PRIORITY; 143 } else { 144 uint32_t domain = r->write_domain ? 145 r->write_domain : r->read_domains; 146 147 if (domain & RADEON_GEM_DOMAIN_CPU) { 148 DRM_ERROR("RADEON_GEM_DOMAIN_CPU is not valid " 149 "for command submission\n"); 150 return -EINVAL; 151 } 152 153 p->relocs[i].prefered_domains = domain; 154 if (domain == RADEON_GEM_DOMAIN_VRAM) 155 domain |= RADEON_GEM_DOMAIN_GTT; 156 p->relocs[i].allowed_domains = domain; 157 } 158 159 if (radeon_ttm_tt_has_userptr(p->relocs[i].robj->tbo.ttm)) { 160 uint32_t domain = p->relocs[i].prefered_domains; 161 if (!(domain & RADEON_GEM_DOMAIN_GTT)) { 162 DRM_ERROR("Only RADEON_GEM_DOMAIN_GTT is " 163 "allowed for userptr BOs\n"); 164 return -EINVAL; 165 } 166 need_mmap_lock = true; 167 domain = RADEON_GEM_DOMAIN_GTT; 168 p->relocs[i].prefered_domains = domain; 169 p->relocs[i].allowed_domains = domain; 170 } 171 172 p->relocs[i].tv.bo = &p->relocs[i].robj->tbo; 173 p->relocs[i].tv.shared = !r->write_domain; 174 175 radeon_cs_buckets_add(&buckets, &p->relocs[i].tv.head, 176 priority); 177 } 178 179 radeon_cs_buckets_get_list(&buckets, &p->validated); 180 181 if (p->cs_flags & RADEON_CS_USE_VM) 182 p->vm_bos = radeon_vm_get_bos(p->rdev, p->ib.vm, 183 &p->validated); 184 #ifdef __NetBSD__ 185 if (need_mmap_lock) 186 vm_map_lock_read(&curproc->p_vmspace->vm_map); 187 #else 188 if (need_mmap_lock) 189 down_read(¤t->mm->mmap_sem); 190 #endif 191 192 r = radeon_bo_list_validate(p->rdev, &p->ticket, &p->validated, p->ring); 193 194 #ifdef __NetBSD__ 195 if (need_mmap_lock) 196 vm_map_unlock_read(&curproc->p_vmspace->vm_map); 197 #else 198 if (need_mmap_lock) 199 up_read(¤t->mm->mmap_sem); 200 #endif 201 202 return r; 203 } 204 205 static int radeon_cs_get_ring(struct radeon_cs_parser *p, u32 ring, s32 priority) 206 { 207 p->priority = priority; 208 209 switch (ring) { 210 default: 211 DRM_ERROR("unknown ring id: %d\n", ring); 212 return -EINVAL; 213 case RADEON_CS_RING_GFX: 214 p->ring = RADEON_RING_TYPE_GFX_INDEX; 215 break; 216 case RADEON_CS_RING_COMPUTE: 217 if (p->rdev->family >= CHIP_TAHITI) { 218 if (p->priority > 0) 219 p->ring = CAYMAN_RING_TYPE_CP1_INDEX; 220 else 221 p->ring = CAYMAN_RING_TYPE_CP2_INDEX; 222 } else 223 p->ring = RADEON_RING_TYPE_GFX_INDEX; 224 break; 225 case RADEON_CS_RING_DMA: 226 if (p->rdev->family >= CHIP_CAYMAN) { 227 if (p->priority > 0) 228 p->ring = R600_RING_TYPE_DMA_INDEX; 229 else 230 p->ring = CAYMAN_RING_TYPE_DMA1_INDEX; 231 } else if (p->rdev->family >= CHIP_RV770) { 232 p->ring = R600_RING_TYPE_DMA_INDEX; 233 } else { 234 return -EINVAL; 235 } 236 break; 237 case RADEON_CS_RING_UVD: 238 p->ring = R600_RING_TYPE_UVD_INDEX; 239 break; 240 case RADEON_CS_RING_VCE: 241 /* TODO: only use the low priority ring for now */ 242 p->ring = TN_RING_TYPE_VCE1_INDEX; 243 break; 244 } 245 return 0; 246 } 247 248 static int radeon_cs_sync_rings(struct radeon_cs_parser *p) 249 { 250 struct radeon_bo_list *reloc; 251 int r; 252 253 list_for_each_entry(reloc, &p->validated, tv.head) { 254 struct reservation_object *resv; 255 256 resv = reloc->robj->tbo.resv; 257 r = radeon_sync_resv(p->rdev, &p->ib.sync, resv, 258 reloc->tv.shared); 259 if (r) 260 return r; 261 } 262 return 0; 263 } 264 265 /* XXX: note that this is called from the legacy UMS CS ioctl as well */ 266 int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data) 267 { 268 struct drm_radeon_cs *cs = data; 269 uint64_t *chunk_array_ptr; 270 unsigned size, i; 271 u32 ring = RADEON_CS_RING_GFX; 272 s32 priority = 0; 273 274 INIT_LIST_HEAD(&p->validated); 275 276 if (!cs->num_chunks) { 277 return 0; 278 } 279 280 /* get chunks */ 281 p->idx = 0; 282 p->ib.sa_bo = NULL; 283 p->const_ib.sa_bo = NULL; 284 p->chunk_ib = NULL; 285 p->chunk_relocs = NULL; 286 p->chunk_flags = NULL; 287 p->chunk_const_ib = NULL; 288 p->chunks_array = kcalloc(cs->num_chunks, sizeof(uint64_t), GFP_KERNEL); 289 if (p->chunks_array == NULL) { 290 return -ENOMEM; 291 } 292 chunk_array_ptr = (uint64_t *)(unsigned long)(cs->chunks); 293 if (copy_from_user(p->chunks_array, chunk_array_ptr, 294 sizeof(uint64_t)*cs->num_chunks)) { 295 return -EFAULT; 296 } 297 p->cs_flags = 0; 298 p->nchunks = cs->num_chunks; 299 p->chunks = kcalloc(p->nchunks, sizeof(struct radeon_cs_chunk), GFP_KERNEL); 300 if (p->chunks == NULL) { 301 return -ENOMEM; 302 } 303 for (i = 0; i < p->nchunks; i++) { 304 struct drm_radeon_cs_chunk __user **chunk_ptr = NULL; 305 struct drm_radeon_cs_chunk user_chunk; 306 uint32_t __user *cdata; 307 308 chunk_ptr = (void __user*)(unsigned long)p->chunks_array[i]; 309 if (copy_from_user(&user_chunk, chunk_ptr, 310 sizeof(struct drm_radeon_cs_chunk))) { 311 return -EFAULT; 312 } 313 p->chunks[i].length_dw = user_chunk.length_dw; 314 if (user_chunk.chunk_id == RADEON_CHUNK_ID_RELOCS) { 315 p->chunk_relocs = &p->chunks[i]; 316 } 317 if (user_chunk.chunk_id == RADEON_CHUNK_ID_IB) { 318 p->chunk_ib = &p->chunks[i]; 319 /* zero length IB isn't useful */ 320 if (p->chunks[i].length_dw == 0) 321 return -EINVAL; 322 } 323 if (user_chunk.chunk_id == RADEON_CHUNK_ID_CONST_IB) { 324 p->chunk_const_ib = &p->chunks[i]; 325 /* zero length CONST IB isn't useful */ 326 if (p->chunks[i].length_dw == 0) 327 return -EINVAL; 328 } 329 if (user_chunk.chunk_id == RADEON_CHUNK_ID_FLAGS) { 330 p->chunk_flags = &p->chunks[i]; 331 /* zero length flags aren't useful */ 332 if (p->chunks[i].length_dw == 0) 333 return -EINVAL; 334 } 335 336 size = p->chunks[i].length_dw; 337 cdata = (void __user *)(unsigned long)user_chunk.chunk_data; 338 p->chunks[i].user_ptr = cdata; 339 if (user_chunk.chunk_id == RADEON_CHUNK_ID_CONST_IB) 340 continue; 341 342 if (user_chunk.chunk_id == RADEON_CHUNK_ID_IB) { 343 if (!p->rdev || !(p->rdev->flags & RADEON_IS_AGP)) 344 continue; 345 } 346 347 p->chunks[i].kdata = drm_malloc_ab(size, sizeof(uint32_t)); 348 size *= sizeof(uint32_t); 349 if (p->chunks[i].kdata == NULL) { 350 return -ENOMEM; 351 } 352 if (copy_from_user(p->chunks[i].kdata, cdata, size)) { 353 return -EFAULT; 354 } 355 if (user_chunk.chunk_id == RADEON_CHUNK_ID_FLAGS) { 356 p->cs_flags = p->chunks[i].kdata[0]; 357 if (p->chunks[i].length_dw > 1) 358 ring = p->chunks[i].kdata[1]; 359 if (p->chunks[i].length_dw > 2) 360 priority = (s32)p->chunks[i].kdata[2]; 361 } 362 } 363 364 /* these are KMS only */ 365 if (p->rdev) { 366 if ((p->cs_flags & RADEON_CS_USE_VM) && 367 !p->rdev->vm_manager.enabled) { 368 DRM_ERROR("VM not active on asic!\n"); 369 return -EINVAL; 370 } 371 372 if (radeon_cs_get_ring(p, ring, priority)) 373 return -EINVAL; 374 375 /* we only support VM on some SI+ rings */ 376 if ((p->cs_flags & RADEON_CS_USE_VM) == 0) { 377 if (p->rdev->asic->ring[p->ring]->cs_parse == NULL) { 378 DRM_ERROR("Ring %d requires VM!\n", p->ring); 379 return -EINVAL; 380 } 381 } else { 382 if (p->rdev->asic->ring[p->ring]->ib_parse == NULL) { 383 DRM_ERROR("VM not supported on ring %d!\n", 384 p->ring); 385 return -EINVAL; 386 } 387 } 388 } 389 390 return 0; 391 } 392 393 static int cmp_size_smaller_first(void *priv, struct list_head *a, 394 struct list_head *b) 395 { 396 struct radeon_bo_list *la = list_entry(a, struct radeon_bo_list, tv.head); 397 struct radeon_bo_list *lb = list_entry(b, struct radeon_bo_list, tv.head); 398 399 /* Sort A before B if A is smaller. */ 400 return (int)la->robj->tbo.num_pages - (int)lb->robj->tbo.num_pages; 401 } 402 403 /** 404 * cs_parser_fini() - clean parser states 405 * @parser: parser structure holding parsing context. 406 * @error: error number 407 * 408 * If error is set than unvalidate buffer, otherwise just free memory 409 * used by parsing context. 410 **/ 411 static void radeon_cs_parser_fini(struct radeon_cs_parser *parser, int error, bool backoff) 412 { 413 unsigned i; 414 415 if (!error) { 416 /* Sort the buffer list from the smallest to largest buffer, 417 * which affects the order of buffers in the LRU list. 418 * This assures that the smallest buffers are added first 419 * to the LRU list, so they are likely to be later evicted 420 * first, instead of large buffers whose eviction is more 421 * expensive. 422 * 423 * This slightly lowers the number of bytes moved by TTM 424 * per frame under memory pressure. 425 */ 426 list_sort(NULL, &parser->validated, cmp_size_smaller_first); 427 428 ttm_eu_fence_buffer_objects(&parser->ticket, 429 &parser->validated, 430 &parser->ib.fence->base); 431 } else if (backoff) { 432 ttm_eu_backoff_reservation(&parser->ticket, 433 &parser->validated); 434 } 435 436 if (parser->relocs != NULL) { 437 for (i = 0; i < parser->nrelocs; i++) { 438 struct radeon_bo *bo = parser->relocs[i].robj; 439 if (bo == NULL) 440 continue; 441 442 drm_gem_object_unreference_unlocked(&bo->gem_base); 443 } 444 } 445 kfree(parser->track); 446 drm_free_large(parser->relocs); 447 drm_free_large(parser->vm_bos); 448 for (i = 0; i < parser->nchunks; i++) 449 drm_free_large(parser->chunks[i].kdata); 450 kfree(parser->chunks); 451 kfree(parser->chunks_array); 452 radeon_ib_free(parser->rdev, &parser->ib); 453 radeon_ib_free(parser->rdev, &parser->const_ib); 454 } 455 456 static int radeon_cs_ib_chunk(struct radeon_device *rdev, 457 struct radeon_cs_parser *parser) 458 { 459 int r; 460 461 if (parser->chunk_ib == NULL) 462 return 0; 463 464 if (parser->cs_flags & RADEON_CS_USE_VM) 465 return 0; 466 467 r = radeon_cs_parse(rdev, parser->ring, parser); 468 if (r || parser->parser_error) { 469 DRM_ERROR("Invalid command stream !\n"); 470 return r; 471 } 472 473 r = radeon_cs_sync_rings(parser); 474 if (r) { 475 if (r != -ERESTARTSYS) 476 DRM_ERROR("Failed to sync rings: %i\n", r); 477 return r; 478 } 479 480 if (parser->ring == R600_RING_TYPE_UVD_INDEX) 481 radeon_uvd_note_usage(rdev); 482 else if ((parser->ring == TN_RING_TYPE_VCE1_INDEX) || 483 (parser->ring == TN_RING_TYPE_VCE2_INDEX)) 484 radeon_vce_note_usage(rdev); 485 486 r = radeon_ib_schedule(rdev, &parser->ib, NULL, true); 487 if (r) { 488 DRM_ERROR("Failed to schedule IB !\n"); 489 } 490 return r; 491 } 492 493 static int radeon_bo_vm_update_pte(struct radeon_cs_parser *p, 494 struct radeon_vm *vm) 495 { 496 struct radeon_device *rdev = p->rdev; 497 struct radeon_bo_va *bo_va; 498 int i, r; 499 500 r = radeon_vm_update_page_directory(rdev, vm); 501 if (r) 502 return r; 503 504 r = radeon_vm_clear_freed(rdev, vm); 505 if (r) 506 return r; 507 508 if (vm->ib_bo_va == NULL) { 509 DRM_ERROR("Tmp BO not in VM!\n"); 510 return -EINVAL; 511 } 512 513 r = radeon_vm_bo_update(rdev, vm->ib_bo_va, 514 &rdev->ring_tmp_bo.bo->tbo.mem); 515 if (r) 516 return r; 517 518 for (i = 0; i < p->nrelocs; i++) { 519 struct radeon_bo *bo; 520 521 bo = p->relocs[i].robj; 522 bo_va = radeon_vm_bo_find(vm, bo); 523 if (bo_va == NULL) { 524 dev_err(rdev->dev, "bo %p not in vm %p\n", bo, vm); 525 return -EINVAL; 526 } 527 528 r = radeon_vm_bo_update(rdev, bo_va, &bo->tbo.mem); 529 if (r) 530 return r; 531 532 radeon_sync_fence(&p->ib.sync, bo_va->last_pt_update); 533 } 534 535 return radeon_vm_clear_invalids(rdev, vm); 536 } 537 538 static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev, 539 struct radeon_cs_parser *parser) 540 { 541 struct radeon_fpriv *fpriv = parser->filp->driver_priv; 542 struct radeon_vm *vm = &fpriv->vm; 543 int r; 544 545 if (parser->chunk_ib == NULL) 546 return 0; 547 if ((parser->cs_flags & RADEON_CS_USE_VM) == 0) 548 return 0; 549 550 if (parser->const_ib.length_dw) { 551 r = radeon_ring_ib_parse(rdev, parser->ring, &parser->const_ib); 552 if (r) { 553 return r; 554 } 555 } 556 557 r = radeon_ring_ib_parse(rdev, parser->ring, &parser->ib); 558 if (r) { 559 return r; 560 } 561 562 if (parser->ring == R600_RING_TYPE_UVD_INDEX) 563 radeon_uvd_note_usage(rdev); 564 565 mutex_lock(&vm->mutex); 566 r = radeon_bo_vm_update_pte(parser, vm); 567 if (r) { 568 goto out; 569 } 570 571 r = radeon_cs_sync_rings(parser); 572 if (r) { 573 if (r != -ERESTARTSYS) 574 DRM_ERROR("Failed to sync rings: %i\n", r); 575 goto out; 576 } 577 578 if ((rdev->family >= CHIP_TAHITI) && 579 (parser->chunk_const_ib != NULL)) { 580 r = radeon_ib_schedule(rdev, &parser->ib, &parser->const_ib, true); 581 } else { 582 r = radeon_ib_schedule(rdev, &parser->ib, NULL, true); 583 } 584 585 out: 586 mutex_unlock(&vm->mutex); 587 return r; 588 } 589 590 static int radeon_cs_handle_lockup(struct radeon_device *rdev, int r) 591 { 592 if (r == -EDEADLK) { 593 r = radeon_gpu_reset(rdev); 594 if (!r) 595 r = -EAGAIN; 596 } 597 return r; 598 } 599 600 static int radeon_cs_ib_fill(struct radeon_device *rdev, struct radeon_cs_parser *parser) 601 { 602 struct radeon_cs_chunk *ib_chunk; 603 struct radeon_vm *vm = NULL; 604 int r; 605 606 if (parser->chunk_ib == NULL) 607 return 0; 608 609 if (parser->cs_flags & RADEON_CS_USE_VM) { 610 struct radeon_fpriv *fpriv = parser->filp->driver_priv; 611 vm = &fpriv->vm; 612 613 if ((rdev->family >= CHIP_TAHITI) && 614 (parser->chunk_const_ib != NULL)) { 615 ib_chunk = parser->chunk_const_ib; 616 if (ib_chunk->length_dw > RADEON_IB_VM_MAX_SIZE) { 617 DRM_ERROR("cs IB CONST too big: %d\n", ib_chunk->length_dw); 618 return -EINVAL; 619 } 620 r = radeon_ib_get(rdev, parser->ring, &parser->const_ib, 621 vm, ib_chunk->length_dw * 4); 622 if (r) { 623 DRM_ERROR("Failed to get const ib !\n"); 624 return r; 625 } 626 parser->const_ib.is_const_ib = true; 627 parser->const_ib.length_dw = ib_chunk->length_dw; 628 if (copy_from_user(parser->const_ib.ptr, 629 ib_chunk->user_ptr, 630 ib_chunk->length_dw * 4)) 631 return -EFAULT; 632 } 633 634 ib_chunk = parser->chunk_ib; 635 if (ib_chunk->length_dw > RADEON_IB_VM_MAX_SIZE) { 636 DRM_ERROR("cs IB too big: %d\n", ib_chunk->length_dw); 637 return -EINVAL; 638 } 639 } 640 ib_chunk = parser->chunk_ib; 641 642 r = radeon_ib_get(rdev, parser->ring, &parser->ib, 643 vm, ib_chunk->length_dw * 4); 644 if (r) { 645 DRM_ERROR("Failed to get ib !\n"); 646 return r; 647 } 648 parser->ib.length_dw = ib_chunk->length_dw; 649 if (ib_chunk->kdata) 650 memcpy(parser->ib.ptr, ib_chunk->kdata, ib_chunk->length_dw * 4); 651 else if (copy_from_user(parser->ib.ptr, ib_chunk->user_ptr, ib_chunk->length_dw * 4)) 652 return -EFAULT; 653 return 0; 654 } 655 656 int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) 657 { 658 struct radeon_device *rdev = dev->dev_private; 659 struct radeon_cs_parser parser; 660 int r; 661 662 down_read(&rdev->exclusive_lock); 663 if (!rdev->accel_working) { 664 up_read(&rdev->exclusive_lock); 665 return -EBUSY; 666 } 667 if (rdev->in_reset) { 668 up_read(&rdev->exclusive_lock); 669 r = radeon_gpu_reset(rdev); 670 if (!r) 671 r = -EAGAIN; 672 return r; 673 } 674 /* initialize parser */ 675 memset(&parser, 0, sizeof(struct radeon_cs_parser)); 676 parser.filp = filp; 677 parser.rdev = rdev; 678 parser.dev = rdev->dev; 679 parser.family = rdev->family; 680 r = radeon_cs_parser_init(&parser, data); 681 if (r) { 682 DRM_ERROR("Failed to initialize parser !\n"); 683 radeon_cs_parser_fini(&parser, r, false); 684 up_read(&rdev->exclusive_lock); 685 r = radeon_cs_handle_lockup(rdev, r); 686 return r; 687 } 688 689 r = radeon_cs_ib_fill(rdev, &parser); 690 if (!r) { 691 r = radeon_cs_parser_relocs(&parser); 692 if (r && r != -ERESTARTSYS) 693 DRM_ERROR("Failed to parse relocation %d!\n", r); 694 } 695 696 if (r) { 697 radeon_cs_parser_fini(&parser, r, false); 698 up_read(&rdev->exclusive_lock); 699 r = radeon_cs_handle_lockup(rdev, r); 700 return r; 701 } 702 703 trace_radeon_cs(&parser); 704 705 r = radeon_cs_ib_chunk(rdev, &parser); 706 if (r) { 707 goto out; 708 } 709 r = radeon_cs_ib_vm_chunk(rdev, &parser); 710 if (r) { 711 goto out; 712 } 713 out: 714 radeon_cs_parser_fini(&parser, r, true); 715 up_read(&rdev->exclusive_lock); 716 r = radeon_cs_handle_lockup(rdev, r); 717 return r; 718 } 719 720 /** 721 * radeon_cs_packet_parse() - parse cp packet and point ib index to next packet 722 * @parser: parser structure holding parsing context. 723 * @pkt: where to store packet information 724 * 725 * Assume that chunk_ib_index is properly set. Will return -EINVAL 726 * if packet is bigger than remaining ib size. or if packets is unknown. 727 **/ 728 int radeon_cs_packet_parse(struct radeon_cs_parser *p, 729 struct radeon_cs_packet *pkt, 730 unsigned idx) 731 { 732 struct radeon_cs_chunk *ib_chunk = p->chunk_ib; 733 struct radeon_device *rdev = p->rdev; 734 uint32_t header; 735 int ret = 0, i; 736 737 if (idx >= ib_chunk->length_dw) { 738 DRM_ERROR("Can not parse packet at %d after CS end %d !\n", 739 idx, ib_chunk->length_dw); 740 return -EINVAL; 741 } 742 header = radeon_get_ib_value(p, idx); 743 pkt->idx = idx; 744 pkt->type = RADEON_CP_PACKET_GET_TYPE(header); 745 pkt->count = RADEON_CP_PACKET_GET_COUNT(header); 746 pkt->one_reg_wr = 0; 747 switch (pkt->type) { 748 case RADEON_PACKET_TYPE0: 749 if (rdev->family < CHIP_R600) { 750 pkt->reg = R100_CP_PACKET0_GET_REG(header); 751 pkt->one_reg_wr = 752 RADEON_CP_PACKET0_GET_ONE_REG_WR(header); 753 } else 754 pkt->reg = R600_CP_PACKET0_GET_REG(header); 755 break; 756 case RADEON_PACKET_TYPE3: 757 pkt->opcode = RADEON_CP_PACKET3_GET_OPCODE(header); 758 break; 759 case RADEON_PACKET_TYPE2: 760 pkt->count = -1; 761 break; 762 default: 763 DRM_ERROR("Unknown packet type %d at %d !\n", pkt->type, idx); 764 ret = -EINVAL; 765 goto dump_ib; 766 } 767 if ((pkt->count + 1 + pkt->idx) >= ib_chunk->length_dw) { 768 DRM_ERROR("Packet (%d:%d:%d) end after CS buffer (%d) !\n", 769 pkt->idx, pkt->type, pkt->count, ib_chunk->length_dw); 770 ret = -EINVAL; 771 goto dump_ib; 772 } 773 return 0; 774 775 dump_ib: 776 for (i = 0; i < ib_chunk->length_dw; i++) { 777 if (i == idx) 778 printk("\t0x%08x <---\n", radeon_get_ib_value(p, i)); 779 else 780 printk("\t0x%08x\n", radeon_get_ib_value(p, i)); 781 } 782 return ret; 783 } 784 785 /** 786 * radeon_cs_packet_next_is_pkt3_nop() - test if the next packet is P3 NOP 787 * @p: structure holding the parser context. 788 * 789 * Check if the next packet is NOP relocation packet3. 790 **/ 791 bool radeon_cs_packet_next_is_pkt3_nop(struct radeon_cs_parser *p) 792 { 793 struct radeon_cs_packet p3reloc; 794 int r; 795 796 r = radeon_cs_packet_parse(p, &p3reloc, p->idx); 797 if (r) 798 return false; 799 if (p3reloc.type != RADEON_PACKET_TYPE3) 800 return false; 801 if (p3reloc.opcode != RADEON_PACKET3_NOP) 802 return false; 803 return true; 804 } 805 806 /** 807 * radeon_cs_dump_packet() - dump raw packet context 808 * @p: structure holding the parser context. 809 * @pkt: structure holding the packet. 810 * 811 * Used mostly for debugging and error reporting. 812 **/ 813 void radeon_cs_dump_packet(struct radeon_cs_parser *p, 814 struct radeon_cs_packet *pkt) 815 { 816 volatile uint32_t *ib; 817 unsigned i; 818 unsigned idx; 819 820 ib = p->ib.ptr; 821 idx = pkt->idx; 822 for (i = 0; i <= (pkt->count + 1); i++, idx++) 823 DRM_INFO("ib[%d]=0x%08X\n", idx, ib[idx]); 824 } 825 826 /** 827 * radeon_cs_packet_next_reloc() - parse next (should be reloc) packet 828 * @parser: parser structure holding parsing context. 829 * @data: pointer to relocation data 830 * @offset_start: starting offset 831 * @offset_mask: offset mask (to align start offset on) 832 * @reloc: reloc informations 833 * 834 * Check if next packet is relocation packet3, do bo validation and compute 835 * GPU offset using the provided start. 836 **/ 837 int radeon_cs_packet_next_reloc(struct radeon_cs_parser *p, 838 struct radeon_bo_list **cs_reloc, 839 int nomm) 840 { 841 struct radeon_cs_chunk *relocs_chunk; 842 struct radeon_cs_packet p3reloc; 843 unsigned idx; 844 int r; 845 846 if (p->chunk_relocs == NULL) { 847 DRM_ERROR("No relocation chunk !\n"); 848 return -EINVAL; 849 } 850 *cs_reloc = NULL; 851 relocs_chunk = p->chunk_relocs; 852 r = radeon_cs_packet_parse(p, &p3reloc, p->idx); 853 if (r) 854 return r; 855 p->idx += p3reloc.count + 2; 856 if (p3reloc.type != RADEON_PACKET_TYPE3 || 857 p3reloc.opcode != RADEON_PACKET3_NOP) { 858 DRM_ERROR("No packet3 for relocation for packet at %d.\n", 859 p3reloc.idx); 860 radeon_cs_dump_packet(p, &p3reloc); 861 return -EINVAL; 862 } 863 idx = radeon_get_ib_value(p, p3reloc.idx + 1); 864 if (idx >= relocs_chunk->length_dw) { 865 DRM_ERROR("Relocs at %d after relocations chunk end %d !\n", 866 idx, relocs_chunk->length_dw); 867 radeon_cs_dump_packet(p, &p3reloc); 868 return -EINVAL; 869 } 870 /* FIXME: we assume reloc size is 4 dwords */ 871 if (nomm) { 872 *cs_reloc = p->relocs; 873 (*cs_reloc)->gpu_offset = 874 (u64)relocs_chunk->kdata[idx + 3] << 32; 875 (*cs_reloc)->gpu_offset |= relocs_chunk->kdata[idx + 0]; 876 } else 877 *cs_reloc = &p->relocs[(idx / 4)]; 878 return 0; 879 } 880