1 /* 2 * Copyright © 2008,2010 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Eric Anholt <eric@anholt.net> 25 * Chris Wilson <chris@chris-wilson.co.uk> 26 * 27 * $FreeBSD: src/sys/dev/drm2/i915/i915_gem_execbuffer.c,v 1.3 2012/05/28 13:58:08 kib Exp $ 28 */ 29 30 #include <sys/limits.h> 31 #include <sys/sfbuf.h> 32 33 #include <dev/drm/drmP.h> 34 #include <dev/drm/drm.h> 35 #include "i915_drm.h" 36 #include "i915_drv.h" 37 #include "intel_drv.h" 38 39 struct change_domains { 40 uint32_t invalidate_domains; 41 uint32_t flush_domains; 42 uint32_t flush_rings; 43 uint32_t flips; 44 }; 45 46 /* 47 * Set the next domain for the specified object. This 48 * may not actually perform the necessary flushing/invaliding though, 49 * as that may want to be batched with other set_domain operations 50 * 51 * This is (we hope) the only really tricky part of gem. The goal 52 * is fairly simple -- track which caches hold bits of the object 53 * and make sure they remain coherent. A few concrete examples may 54 * help to explain how it works. For shorthand, we use the notation 55 * (read_domains, write_domain), e.g. (CPU, CPU) to indicate the 56 * a pair of read and write domain masks. 57 * 58 * Case 1: the batch buffer 59 * 60 * 1. Allocated 61 * 2. Written by CPU 62 * 3. Mapped to GTT 63 * 4. Read by GPU 64 * 5. Unmapped from GTT 65 * 6. Freed 66 * 67 * Let's take these a step at a time 68 * 69 * 1. Allocated 70 * Pages allocated from the kernel may still have 71 * cache contents, so we set them to (CPU, CPU) always. 72 * 2. Written by CPU (using pwrite) 73 * The pwrite function calls set_domain (CPU, CPU) and 74 * this function does nothing (as nothing changes) 75 * 3. Mapped by GTT 76 * This function asserts that the object is not 77 * currently in any GPU-based read or write domains 78 * 4. Read by GPU 79 * i915_gem_execbuffer calls set_domain (COMMAND, 0). 80 * As write_domain is zero, this function adds in the 81 * current read domains (CPU+COMMAND, 0). 82 * flush_domains is set to CPU. 83 * invalidate_domains is set to COMMAND 84 * clflush is run to get data out of the CPU caches 85 * then i915_dev_set_domain calls i915_gem_flush to 86 * emit an MI_FLUSH and drm_agp_chipset_flush 87 * 5. Unmapped from GTT 88 * i915_gem_object_unbind calls set_domain (CPU, CPU) 89 * flush_domains and invalidate_domains end up both zero 90 * so no flushing/invalidating happens 91 * 6. Freed 92 * yay, done 93 * 94 * Case 2: The shared render buffer 95 * 96 * 1. Allocated 97 * 2. Mapped to GTT 98 * 3. Read/written by GPU 99 * 4. set_domain to (CPU,CPU) 100 * 5. Read/written by CPU 101 * 6. Read/written by GPU 102 * 103 * 1. Allocated 104 * Same as last example, (CPU, CPU) 105 * 2. Mapped to GTT 106 * Nothing changes (assertions find that it is not in the GPU) 107 * 3. Read/written by GPU 108 * execbuffer calls set_domain (RENDER, RENDER) 109 * flush_domains gets CPU 110 * invalidate_domains gets GPU 111 * clflush (obj) 112 * MI_FLUSH and drm_agp_chipset_flush 113 * 4. set_domain (CPU, CPU) 114 * flush_domains gets GPU 115 * invalidate_domains gets CPU 116 * wait_rendering (obj) to make sure all drawing is complete. 117 * This will include an MI_FLUSH to get the data from GPU 118 * to memory 119 * clflush (obj) to invalidate the CPU cache 120 * Another MI_FLUSH in i915_gem_flush (eliminate this somehow?) 121 * 5. Read/written by CPU 122 * cache lines are loaded and dirtied 123 * 6. Read written by GPU 124 * Same as last GPU access 125 * 126 * Case 3: The constant buffer 127 * 128 * 1. Allocated 129 * 2. Written by CPU 130 * 3. Read by GPU 131 * 4. Updated (written) by CPU again 132 * 5. Read by GPU 133 * 134 * 1. Allocated 135 * (CPU, CPU) 136 * 2. Written by CPU 137 * (CPU, CPU) 138 * 3. Read by GPU 139 * (CPU+RENDER, 0) 140 * flush_domains = CPU 141 * invalidate_domains = RENDER 142 * clflush (obj) 143 * MI_FLUSH 144 * drm_agp_chipset_flush 145 * 4. Updated (written) by CPU again 146 * (CPU, CPU) 147 * flush_domains = 0 (no previous write domain) 148 * invalidate_domains = 0 (no new read domains) 149 * 5. Read by GPU 150 * (CPU+RENDER, 0) 151 * flush_domains = CPU 152 * invalidate_domains = RENDER 153 * clflush (obj) 154 * MI_FLUSH 155 * drm_agp_chipset_flush 156 */ 157 static void 158 i915_gem_object_set_to_gpu_domain(struct drm_i915_gem_object *obj, 159 struct intel_ring_buffer *ring, 160 struct change_domains *cd) 161 { 162 uint32_t invalidate_domains = 0, flush_domains = 0; 163 164 /* 165 * If the object isn't moving to a new write domain, 166 * let the object stay in multiple read domains 167 */ 168 if (obj->base.pending_write_domain == 0) 169 obj->base.pending_read_domains |= obj->base.read_domains; 170 171 /* 172 * Flush the current write domain if 173 * the new read domains don't match. Invalidate 174 * any read domains which differ from the old 175 * write domain 176 */ 177 if (obj->base.write_domain && 178 (((obj->base.write_domain != obj->base.pending_read_domains || 179 obj->ring != ring)) || 180 (obj->fenced_gpu_access && !obj->pending_fenced_gpu_access))) { 181 flush_domains |= obj->base.write_domain; 182 invalidate_domains |= 183 obj->base.pending_read_domains & ~obj->base.write_domain; 184 } 185 /* 186 * Invalidate any read caches which may have 187 * stale data. That is, any new read domains. 188 */ 189 invalidate_domains |= obj->base.pending_read_domains & ~obj->base.read_domains; 190 if ((flush_domains | invalidate_domains) & I915_GEM_DOMAIN_CPU) 191 i915_gem_clflush_object(obj); 192 193 if (obj->base.pending_write_domain) 194 cd->flips |= atomic_load_acq_int(&obj->pending_flip); 195 196 /* The actual obj->write_domain will be updated with 197 * pending_write_domain after we emit the accumulated flush for all 198 * of our domain changes in execbuffers (which clears objects' 199 * write_domains). So if we have a current write domain that we 200 * aren't changing, set pending_write_domain to that. 201 */ 202 if (flush_domains == 0 && obj->base.pending_write_domain == 0) 203 obj->base.pending_write_domain = obj->base.write_domain; 204 205 cd->invalidate_domains |= invalidate_domains; 206 cd->flush_domains |= flush_domains; 207 if (flush_domains & I915_GEM_GPU_DOMAINS) 208 cd->flush_rings |= intel_ring_flag(obj->ring); 209 if (invalidate_domains & I915_GEM_GPU_DOMAINS) 210 cd->flush_rings |= intel_ring_flag(ring); 211 } 212 213 struct eb_objects { 214 u_long hashmask; 215 LIST_HEAD(, drm_i915_gem_object) *buckets; 216 }; 217 218 static struct eb_objects * 219 eb_create(int size) 220 { 221 struct eb_objects *eb; 222 223 eb = kmalloc(sizeof(*eb), DRM_I915_GEM, M_WAITOK | M_ZERO); 224 eb->buckets = hashinit(size, DRM_I915_GEM, &eb->hashmask); 225 return (eb); 226 } 227 228 static void 229 eb_reset(struct eb_objects *eb) 230 { 231 int i; 232 233 for (i = 0; i <= eb->hashmask; i++) 234 LIST_INIT(&eb->buckets[i]); 235 } 236 237 static void 238 eb_add_object(struct eb_objects *eb, struct drm_i915_gem_object *obj) 239 { 240 241 LIST_INSERT_HEAD(&eb->buckets[obj->exec_handle & eb->hashmask], 242 obj, exec_node); 243 } 244 245 static struct drm_i915_gem_object * 246 eb_get_object(struct eb_objects *eb, unsigned long handle) 247 { 248 struct drm_i915_gem_object *obj; 249 250 LIST_FOREACH(obj, &eb->buckets[handle & eb->hashmask], exec_node) { 251 if (obj->exec_handle == handle) 252 return (obj); 253 } 254 return (NULL); 255 } 256 257 static void 258 eb_destroy(struct eb_objects *eb) 259 { 260 261 drm_free(eb->buckets, DRM_I915_GEM); 262 drm_free(eb, DRM_I915_GEM); 263 } 264 265 static int 266 i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj, 267 struct eb_objects *eb, 268 struct drm_i915_gem_relocation_entry *reloc) 269 { 270 struct drm_device *dev = obj->base.dev; 271 struct drm_gem_object *target_obj; 272 uint32_t target_offset; 273 int ret = -EINVAL; 274 275 /* we've already hold a reference to all valid objects */ 276 target_obj = &eb_get_object(eb, reloc->target_handle)->base; 277 if (unlikely(target_obj == NULL)) 278 return -ENOENT; 279 280 target_offset = to_intel_bo(target_obj)->gtt_offset; 281 282 #if WATCH_RELOC 283 DRM_INFO("%s: obj %p offset %08x target %d " 284 "read %08x write %08x gtt %08x " 285 "presumed %08x delta %08x\n", 286 __func__, 287 obj, 288 (int) reloc->offset, 289 (int) reloc->target_handle, 290 (int) reloc->read_domains, 291 (int) reloc->write_domain, 292 (int) target_offset, 293 (int) reloc->presumed_offset, 294 reloc->delta); 295 #endif 296 297 /* The target buffer should have appeared before us in the 298 * exec_object list, so it should have a GTT space bound by now. 299 */ 300 if (unlikely(target_offset == 0)) { 301 DRM_DEBUG("No GTT space found for object %d\n", 302 reloc->target_handle); 303 return ret; 304 } 305 306 /* Validate that the target is in a valid r/w GPU domain */ 307 if (unlikely(reloc->write_domain & (reloc->write_domain - 1))) { 308 DRM_DEBUG("reloc with multiple write domains: " 309 "obj %p target %d offset %d " 310 "read %08x write %08x", 311 obj, reloc->target_handle, 312 (int) reloc->offset, 313 reloc->read_domains, 314 reloc->write_domain); 315 return ret; 316 } 317 if (unlikely((reloc->write_domain | reloc->read_domains) 318 & ~I915_GEM_GPU_DOMAINS)) { 319 DRM_DEBUG("reloc with read/write non-GPU domains: " 320 "obj %p target %d offset %d " 321 "read %08x write %08x", 322 obj, reloc->target_handle, 323 (int) reloc->offset, 324 reloc->read_domains, 325 reloc->write_domain); 326 return ret; 327 } 328 if (unlikely(reloc->write_domain && target_obj->pending_write_domain && 329 reloc->write_domain != target_obj->pending_write_domain)) { 330 DRM_DEBUG("Write domain conflict: " 331 "obj %p target %d offset %d " 332 "new %08x old %08x\n", 333 obj, reloc->target_handle, 334 (int) reloc->offset, 335 reloc->write_domain, 336 target_obj->pending_write_domain); 337 return ret; 338 } 339 340 target_obj->pending_read_domains |= reloc->read_domains; 341 target_obj->pending_write_domain |= reloc->write_domain; 342 343 /* If the relocation already has the right value in it, no 344 * more work needs to be done. 345 */ 346 if (target_offset == reloc->presumed_offset) 347 return 0; 348 349 /* Check that the relocation address is valid... */ 350 if (unlikely(reloc->offset > obj->base.size - 4)) { 351 DRM_DEBUG("Relocation beyond object bounds: " 352 "obj %p target %d offset %d size %d.\n", 353 obj, reloc->target_handle, 354 (int) reloc->offset, 355 (int) obj->base.size); 356 return ret; 357 } 358 if (unlikely(reloc->offset & 3)) { 359 DRM_DEBUG("Relocation not 4-byte aligned: " 360 "obj %p target %d offset %d.\n", 361 obj, reloc->target_handle, 362 (int) reloc->offset); 363 return ret; 364 } 365 366 reloc->delta += target_offset; 367 if (obj->base.write_domain == I915_GEM_DOMAIN_CPU) { 368 uint32_t page_offset = reloc->offset & PAGE_MASK; 369 char *vaddr; 370 struct sf_buf *sf; 371 372 sf = sf_buf_alloc(obj->pages[OFF_TO_IDX(reloc->offset)]); 373 if (sf == NULL) 374 return (-ENOMEM); 375 vaddr = (void *)sf_buf_kva(sf); 376 *(uint32_t *)(vaddr + page_offset) = reloc->delta; 377 sf_buf_free(sf); 378 } else { 379 uint32_t *reloc_entry; 380 char *reloc_page; 381 382 /* We can't wait for rendering with pagefaults disabled */ 383 if (obj->active && (curthread->td_flags & TDF_NOFAULT)) 384 return (-EFAULT); 385 ret = i915_gem_object_set_to_gtt_domain(obj, 1); 386 if (ret) 387 return ret; 388 389 /* 390 * Map the page containing the relocation we're going 391 * to perform. 392 */ 393 reloc->offset += obj->gtt_offset; 394 reloc_page = pmap_mapdev_attr(dev->agp->base + (reloc->offset & 395 ~PAGE_MASK), PAGE_SIZE, PAT_WRITE_COMBINING); 396 reloc_entry = (uint32_t *)(reloc_page + (reloc->offset & 397 PAGE_MASK)); 398 *(volatile uint32_t *)reloc_entry = reloc->delta; 399 pmap_unmapdev((vm_offset_t)reloc_page, PAGE_SIZE); 400 } 401 402 /* and update the user's relocation entry */ 403 reloc->presumed_offset = target_offset; 404 405 return 0; 406 } 407 408 static int 409 i915_gem_execbuffer_relocate_object(struct drm_i915_gem_object *obj, 410 struct eb_objects *eb) 411 { 412 struct drm_i915_gem_relocation_entry *user_relocs; 413 struct drm_i915_gem_exec_object2 *entry = obj->exec_entry; 414 struct drm_i915_gem_relocation_entry reloc; 415 int i, ret; 416 417 user_relocs = (void *)(uintptr_t)entry->relocs_ptr; 418 for (i = 0; i < entry->relocation_count; i++) { 419 ret = -copyin_nofault(user_relocs + i, &reloc, sizeof(reloc)); 420 if (ret != 0) 421 return (ret); 422 423 ret = i915_gem_execbuffer_relocate_entry(obj, eb, &reloc); 424 if (ret != 0) 425 return (ret); 426 427 ret = -copyout_nofault(&reloc.presumed_offset, 428 &user_relocs[i].presumed_offset, 429 sizeof(reloc.presumed_offset)); 430 if (ret != 0) 431 return (ret); 432 } 433 434 return (0); 435 } 436 437 static int 438 i915_gem_execbuffer_relocate_object_slow(struct drm_i915_gem_object *obj, 439 struct eb_objects *eb, struct drm_i915_gem_relocation_entry *relocs) 440 { 441 const struct drm_i915_gem_exec_object2 *entry = obj->exec_entry; 442 int i, ret; 443 444 for (i = 0; i < entry->relocation_count; i++) { 445 ret = i915_gem_execbuffer_relocate_entry(obj, eb, &relocs[i]); 446 if (ret) 447 return ret; 448 } 449 450 return 0; 451 } 452 453 static int 454 i915_gem_execbuffer_relocate(struct drm_device *dev, 455 struct eb_objects *eb, 456 struct list_head *objects) 457 { 458 struct drm_i915_gem_object *obj; 459 thread_t td = curthread; 460 int ret; 461 int pflags; 462 463 /* Try to move as many of the relocation targets off the active list 464 * to avoid unnecessary fallbacks to the slow path, as we cannot wait 465 * for the retirement with pagefaults disabled. 466 */ 467 i915_gem_retire_requests(dev); 468 469 ret = 0; 470 pflags = td->td_flags & TDF_NOFAULT; 471 atomic_set_int(&td->td_flags, TDF_NOFAULT); 472 473 /* This is the fast path and we cannot handle a pagefault whilst 474 * holding the device lock lest the user pass in the relocations 475 * contained within a mmaped bo. For in such a case we, the page 476 * fault handler would call i915_gem_fault() and we would try to 477 * acquire the device lock again. Obviously this is bad. 478 */ 479 480 list_for_each_entry(obj, objects, exec_list) { 481 ret = i915_gem_execbuffer_relocate_object(obj, eb); 482 if (ret != 0) 483 break; 484 } 485 486 if ((pflags & TDF_NOFAULT) == 0) 487 atomic_clear_int(&td->td_flags, TDF_NOFAULT); 488 489 return (ret); 490 } 491 492 #define __EXEC_OBJECT_HAS_FENCE (1<<31) 493 494 static int 495 pin_and_fence_object(struct drm_i915_gem_object *obj, 496 struct intel_ring_buffer *ring) 497 { 498 struct drm_i915_gem_exec_object2 *entry = obj->exec_entry; 499 bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4; 500 bool need_fence, need_mappable; 501 int ret; 502 503 need_fence = 504 has_fenced_gpu_access && 505 entry->flags & EXEC_OBJECT_NEEDS_FENCE && 506 obj->tiling_mode != I915_TILING_NONE; 507 need_mappable = 508 entry->relocation_count ? true : need_fence; 509 510 ret = i915_gem_object_pin(obj, entry->alignment, need_mappable); 511 if (ret) 512 return ret; 513 514 if (has_fenced_gpu_access) { 515 if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) { 516 if (obj->tiling_mode) { 517 ret = i915_gem_object_get_fence(obj, ring); 518 if (ret) 519 goto err_unpin; 520 521 entry->flags |= __EXEC_OBJECT_HAS_FENCE; 522 i915_gem_object_pin_fence(obj); 523 } else { 524 ret = i915_gem_object_put_fence(obj); 525 if (ret) 526 goto err_unpin; 527 } 528 obj->pending_fenced_gpu_access = true; 529 } 530 } 531 532 entry->offset = obj->gtt_offset; 533 return 0; 534 535 err_unpin: 536 i915_gem_object_unpin(obj); 537 return ret; 538 } 539 540 static int 541 i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring, 542 struct drm_file *file, 543 struct list_head *objects) 544 { 545 drm_i915_private_t *dev_priv; 546 struct drm_i915_gem_object *obj; 547 int ret, retry; 548 bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4; 549 struct list_head ordered_objects; 550 551 dev_priv = ring->dev->dev_private; 552 INIT_LIST_HEAD(&ordered_objects); 553 while (!list_empty(objects)) { 554 struct drm_i915_gem_exec_object2 *entry; 555 bool need_fence, need_mappable; 556 557 obj = list_first_entry(objects, 558 struct drm_i915_gem_object, 559 exec_list); 560 entry = obj->exec_entry; 561 562 need_fence = 563 has_fenced_gpu_access && 564 entry->flags & EXEC_OBJECT_NEEDS_FENCE && 565 obj->tiling_mode != I915_TILING_NONE; 566 need_mappable = 567 entry->relocation_count ? true : need_fence; 568 569 if (need_mappable) 570 list_move(&obj->exec_list, &ordered_objects); 571 else 572 list_move_tail(&obj->exec_list, &ordered_objects); 573 574 obj->base.pending_read_domains = 0; 575 obj->base.pending_write_domain = 0; 576 } 577 list_splice(&ordered_objects, objects); 578 579 /* Attempt to pin all of the buffers into the GTT. 580 * This is done in 3 phases: 581 * 582 * 1a. Unbind all objects that do not match the GTT constraints for 583 * the execbuffer (fenceable, mappable, alignment etc). 584 * 1b. Increment pin count for already bound objects and obtain 585 * a fence register if required. 586 * 2. Bind new objects. 587 * 3. Decrement pin count. 588 * 589 * This avoid unnecessary unbinding of later objects in order to makr 590 * room for the earlier objects *unless* we need to defragment. 591 */ 592 retry = 0; 593 do { 594 ret = 0; 595 596 /* Unbind any ill-fitting objects or pin. */ 597 list_for_each_entry(obj, objects, exec_list) { 598 struct drm_i915_gem_exec_object2 *entry = obj->exec_entry; 599 bool need_fence, need_mappable; 600 601 if (!obj->gtt_space) 602 continue; 603 604 need_fence = 605 has_fenced_gpu_access && 606 entry->flags & EXEC_OBJECT_NEEDS_FENCE && 607 obj->tiling_mode != I915_TILING_NONE; 608 need_mappable = 609 entry->relocation_count ? true : need_fence; 610 611 if ((entry->alignment && obj->gtt_offset & (entry->alignment - 1)) || 612 (need_mappable && !obj->map_and_fenceable)) 613 ret = i915_gem_object_unbind(obj); 614 else 615 ret = pin_and_fence_object(obj, ring); 616 if (ret) 617 goto err; 618 } 619 620 /* Bind fresh objects */ 621 list_for_each_entry(obj, objects, exec_list) { 622 if (obj->gtt_space) 623 continue; 624 625 ret = pin_and_fence_object(obj, ring); 626 if (ret) { 627 int ret_ignore; 628 629 /* This can potentially raise a harmless 630 * -EINVAL if we failed to bind in the above 631 * call. It cannot raise -EINTR since we know 632 * that the bo is freshly bound and so will 633 * not need to be flushed or waited upon. 634 */ 635 ret_ignore = i915_gem_object_unbind(obj); 636 (void)ret_ignore; 637 if (obj->gtt_space != NULL) 638 kprintf("%s: gtt_space\n", __func__); 639 break; 640 } 641 } 642 643 /* Decrement pin count for bound objects */ 644 list_for_each_entry(obj, objects, exec_list) { 645 struct drm_i915_gem_exec_object2 *entry; 646 647 if (!obj->gtt_space) 648 continue; 649 650 entry = obj->exec_entry; 651 if (entry->flags & __EXEC_OBJECT_HAS_FENCE) { 652 i915_gem_object_unpin_fence(obj); 653 entry->flags &= ~__EXEC_OBJECT_HAS_FENCE; 654 } 655 656 i915_gem_object_unpin(obj); 657 658 /* ... and ensure ppgtt mapping exist if needed. */ 659 if (dev_priv->mm.aliasing_ppgtt && !obj->has_aliasing_ppgtt_mapping) { 660 i915_ppgtt_bind_object(dev_priv->mm.aliasing_ppgtt, 661 obj, obj->cache_level); 662 663 obj->has_aliasing_ppgtt_mapping = 1; 664 } 665 } 666 667 if (ret != -ENOSPC || retry > 1) 668 return ret; 669 670 /* First attempt, just clear anything that is purgeable. 671 * Second attempt, clear the entire GTT. 672 */ 673 ret = i915_gem_evict_everything(ring->dev, retry == 0); 674 if (ret) 675 return ret; 676 677 retry++; 678 } while (1); 679 680 err: 681 list_for_each_entry_continue_reverse(obj, objects, exec_list) { 682 struct drm_i915_gem_exec_object2 *entry; 683 684 if (!obj->gtt_space) 685 continue; 686 687 entry = obj->exec_entry; 688 if (entry->flags & __EXEC_OBJECT_HAS_FENCE) { 689 i915_gem_object_unpin_fence(obj); 690 entry->flags &= ~__EXEC_OBJECT_HAS_FENCE; 691 } 692 693 i915_gem_object_unpin(obj); 694 } 695 696 return ret; 697 } 698 699 static int 700 i915_gem_execbuffer_relocate_slow(struct drm_device *dev, 701 struct drm_file *file, struct intel_ring_buffer *ring, 702 struct list_head *objects, struct eb_objects *eb, 703 struct drm_i915_gem_exec_object2 *exec, int count) 704 { 705 struct drm_i915_gem_relocation_entry *reloc; 706 struct drm_i915_gem_object *obj; 707 int *reloc_offset; 708 int i, total, ret; 709 710 /* We may process another execbuffer during the unlock... */ 711 while (!list_empty(objects)) { 712 obj = list_first_entry(objects, 713 struct drm_i915_gem_object, 714 exec_list); 715 list_del_init(&obj->exec_list); 716 drm_gem_object_unreference(&obj->base); 717 } 718 719 DRM_UNLOCK(dev); 720 721 total = 0; 722 for (i = 0; i < count; i++) 723 total += exec[i].relocation_count; 724 725 reloc_offset = kmalloc(count * sizeof(*reloc_offset), DRM_I915_GEM, 726 M_WAITOK | M_ZERO); 727 reloc = kmalloc(total * sizeof(*reloc), DRM_I915_GEM, M_WAITOK | M_ZERO); 728 729 total = 0; 730 for (i = 0; i < count; i++) { 731 struct drm_i915_gem_relocation_entry *user_relocs; 732 733 user_relocs = (void *)(uintptr_t)exec[i].relocs_ptr; 734 ret = -copyin(user_relocs, reloc + total, 735 exec[i].relocation_count * sizeof(*reloc)); 736 if (ret != 0) { 737 DRM_LOCK(dev); 738 goto err; 739 } 740 741 reloc_offset[i] = total; 742 total += exec[i].relocation_count; 743 } 744 745 ret = i915_mutex_lock_interruptible(dev); 746 if (ret) { 747 DRM_LOCK(dev); 748 goto err; 749 } 750 751 /* reacquire the objects */ 752 eb_reset(eb); 753 for (i = 0; i < count; i++) { 754 struct drm_i915_gem_object *obj; 755 756 obj = to_intel_bo(drm_gem_object_lookup(dev, file, 757 exec[i].handle)); 758 if (&obj->base == NULL) { 759 DRM_DEBUG("Invalid object handle %d at index %d\n", 760 exec[i].handle, i); 761 ret = -ENOENT; 762 goto err; 763 } 764 765 list_add_tail(&obj->exec_list, objects); 766 obj->exec_handle = exec[i].handle; 767 obj->exec_entry = &exec[i]; 768 eb_add_object(eb, obj); 769 } 770 771 ret = i915_gem_execbuffer_reserve(ring, file, objects); 772 if (ret) 773 goto err; 774 775 list_for_each_entry(obj, objects, exec_list) { 776 int offset = obj->exec_entry - exec; 777 ret = i915_gem_execbuffer_relocate_object_slow(obj, eb, 778 reloc + reloc_offset[offset]); 779 if (ret) 780 goto err; 781 } 782 783 /* Leave the user relocations as are, this is the painfully slow path, 784 * and we want to avoid the complication of dropping the lock whilst 785 * having buffers reserved in the aperture and so causing spurious 786 * ENOSPC for random operations. 787 */ 788 789 err: 790 drm_free(reloc, DRM_I915_GEM); 791 drm_free(reloc_offset, DRM_I915_GEM); 792 return ret; 793 } 794 795 static int 796 i915_gem_execbuffer_flush(struct drm_device *dev, 797 uint32_t invalidate_domains, 798 uint32_t flush_domains, 799 uint32_t flush_rings) 800 { 801 drm_i915_private_t *dev_priv = dev->dev_private; 802 int i, ret; 803 804 if (flush_domains & I915_GEM_DOMAIN_CPU) 805 intel_gtt_chipset_flush(); 806 807 if (flush_domains & I915_GEM_DOMAIN_GTT) 808 cpu_sfence(); 809 810 if ((flush_domains | invalidate_domains) & I915_GEM_GPU_DOMAINS) { 811 for (i = 0; i < I915_NUM_RINGS; i++) 812 if (flush_rings & (1 << i)) { 813 ret = i915_gem_flush_ring(&dev_priv->rings[i], 814 invalidate_domains, flush_domains); 815 if (ret) 816 return ret; 817 } 818 } 819 820 return 0; 821 } 822 823 static bool 824 intel_enable_semaphores(struct drm_device *dev) 825 { 826 if (INTEL_INFO(dev)->gen < 6) 827 return 0; 828 829 if (i915_semaphores >= 0) 830 return i915_semaphores; 831 832 /* Enable semaphores on SNB when IO remapping is off */ 833 if (INTEL_INFO(dev)->gen == 6) 834 return !intel_iommu_enabled; 835 836 return 1; 837 } 838 839 static int 840 i915_gem_execbuffer_sync_rings(struct drm_i915_gem_object *obj, 841 struct intel_ring_buffer *to) 842 { 843 struct intel_ring_buffer *from = obj->ring; 844 u32 seqno; 845 int ret, idx; 846 847 if (from == NULL || to == from) 848 return 0; 849 850 /* XXX gpu semaphores are implicated in various hard hangs on SNB */ 851 if (!intel_enable_semaphores(obj->base.dev)) 852 return i915_gem_object_wait_rendering(obj); 853 854 idx = intel_ring_sync_index(from, to); 855 856 seqno = obj->last_rendering_seqno; 857 if (seqno <= from->sync_seqno[idx]) 858 return 0; 859 860 if (seqno == from->outstanding_lazy_request) { 861 struct drm_i915_gem_request *request; 862 863 request = kmalloc(sizeof(*request), DRM_I915_GEM, 864 M_WAITOK | M_ZERO); 865 ret = i915_add_request(from, NULL, request); 866 if (ret) { 867 drm_free(request, DRM_I915_GEM); 868 return ret; 869 } 870 871 seqno = request->seqno; 872 } 873 874 from->sync_seqno[idx] = seqno; 875 876 return to->sync_to(to, from, seqno - 1); 877 } 878 879 static int 880 i915_gem_execbuffer_wait_for_flips(struct intel_ring_buffer *ring, u32 flips) 881 { 882 u32 plane, flip_mask; 883 int ret; 884 885 /* Check for any pending flips. As we only maintain a flip queue depth 886 * of 1, we can simply insert a WAIT for the next display flip prior 887 * to executing the batch and avoid stalling the CPU. 888 */ 889 890 for (plane = 0; flips >> plane; plane++) { 891 if (((flips >> plane) & 1) == 0) 892 continue; 893 894 if (plane) 895 flip_mask = MI_WAIT_FOR_PLANE_B_FLIP; 896 else 897 flip_mask = MI_WAIT_FOR_PLANE_A_FLIP; 898 899 ret = intel_ring_begin(ring, 2); 900 if (ret) 901 return ret; 902 903 intel_ring_emit(ring, MI_WAIT_FOR_EVENT | flip_mask); 904 intel_ring_emit(ring, MI_NOOP); 905 intel_ring_advance(ring); 906 } 907 908 return 0; 909 } 910 911 static int 912 i915_gem_execbuffer_move_to_gpu(struct intel_ring_buffer *ring, 913 struct list_head *objects) 914 { 915 struct drm_i915_gem_object *obj; 916 struct change_domains cd; 917 int ret; 918 919 memset(&cd, 0, sizeof(cd)); 920 list_for_each_entry(obj, objects, exec_list) 921 i915_gem_object_set_to_gpu_domain(obj, ring, &cd); 922 923 if (cd.invalidate_domains | cd.flush_domains) { 924 #if WATCH_EXEC 925 DRM_INFO("%s: invalidate_domains %08x flush_domains %08x\n", 926 __func__, 927 cd.invalidate_domains, 928 cd.flush_domains); 929 #endif 930 ret = i915_gem_execbuffer_flush(ring->dev, 931 cd.invalidate_domains, 932 cd.flush_domains, 933 cd.flush_rings); 934 if (ret) 935 return ret; 936 } 937 938 if (cd.flips) { 939 ret = i915_gem_execbuffer_wait_for_flips(ring, cd.flips); 940 if (ret) 941 return ret; 942 } 943 944 list_for_each_entry(obj, objects, exec_list) { 945 ret = i915_gem_execbuffer_sync_rings(obj, ring); 946 if (ret) 947 return ret; 948 } 949 950 return 0; 951 } 952 953 static bool 954 i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec) 955 { 956 return ((exec->batch_start_offset | exec->batch_len) & 0x7) == 0; 957 } 958 959 static int 960 validate_exec_list(struct drm_i915_gem_exec_object2 *exec, int count, 961 vm_page_t ***map) 962 { 963 vm_page_t *ma; 964 int i, length, page_count; 965 966 /* XXXKIB various limits checking is missing there */ 967 *map = kmalloc(count * sizeof(*ma), DRM_I915_GEM, M_WAITOK | M_ZERO); 968 for (i = 0; i < count; i++) { 969 /* First check for malicious input causing overflow */ 970 if (exec[i].relocation_count > 971 INT_MAX / sizeof(struct drm_i915_gem_relocation_entry)) 972 return -EINVAL; 973 974 length = exec[i].relocation_count * 975 sizeof(struct drm_i915_gem_relocation_entry); 976 if (length == 0) { 977 (*map)[i] = NULL; 978 continue; 979 } 980 /* 981 * Since both start and end of the relocation region 982 * may be not aligned on the page boundary, be 983 * conservative and request a page slot for each 984 * partial page. Thus +2. 985 */ 986 page_count = howmany(length, PAGE_SIZE) + 2; 987 ma = (*map)[i] = kmalloc(page_count * sizeof(vm_page_t), 988 DRM_I915_GEM, M_WAITOK | M_ZERO); 989 if (vm_fault_quick_hold_pages(&curproc->p_vmspace->vm_map, 990 exec[i].relocs_ptr, length, VM_PROT_READ | VM_PROT_WRITE, 991 ma, page_count) == -1) { 992 drm_free(ma, DRM_I915_GEM); 993 (*map)[i] = NULL; 994 return (-EFAULT); 995 } 996 } 997 998 return 0; 999 } 1000 1001 static void 1002 i915_gem_execbuffer_move_to_active(struct list_head *objects, 1003 struct intel_ring_buffer *ring, 1004 u32 seqno) 1005 { 1006 struct drm_i915_gem_object *obj; 1007 uint32_t old_read, old_write; 1008 1009 list_for_each_entry(obj, objects, exec_list) { 1010 old_read = obj->base.read_domains; 1011 old_write = obj->base.write_domain; 1012 1013 obj->base.read_domains = obj->base.pending_read_domains; 1014 obj->base.write_domain = obj->base.pending_write_domain; 1015 obj->fenced_gpu_access = obj->pending_fenced_gpu_access; 1016 1017 i915_gem_object_move_to_active(obj, ring, seqno); 1018 if (obj->base.write_domain) { 1019 obj->dirty = 1; 1020 obj->pending_gpu_write = true; 1021 list_move_tail(&obj->gpu_write_list, 1022 &ring->gpu_write_list); 1023 intel_mark_busy(ring->dev, obj); 1024 } 1025 } 1026 } 1027 1028 int i915_gem_sync_exec_requests; 1029 1030 static void 1031 i915_gem_execbuffer_retire_commands(struct drm_device *dev, 1032 struct drm_file *file, 1033 struct intel_ring_buffer *ring) 1034 { 1035 struct drm_i915_gem_request *request; 1036 u32 invalidate; 1037 1038 /* 1039 * Ensure that the commands in the batch buffer are 1040 * finished before the interrupt fires. 1041 * 1042 * The sampler always gets flushed on i965 (sigh). 1043 */ 1044 invalidate = I915_GEM_DOMAIN_COMMAND; 1045 if (INTEL_INFO(dev)->gen >= 4) 1046 invalidate |= I915_GEM_DOMAIN_SAMPLER; 1047 if (ring->flush(ring, invalidate, 0)) { 1048 i915_gem_next_request_seqno(ring); 1049 return; 1050 } 1051 1052 /* Add a breadcrumb for the completion of the batch buffer */ 1053 request = kmalloc(sizeof(*request), DRM_I915_GEM, M_WAITOK | M_ZERO); 1054 if (request == NULL || i915_add_request(ring, file, request)) { 1055 i915_gem_next_request_seqno(ring); 1056 drm_free(request, DRM_I915_GEM); 1057 } else if (i915_gem_sync_exec_requests) 1058 i915_wait_request(ring, request->seqno, true); 1059 } 1060 1061 static void 1062 i915_gem_fix_mi_batchbuffer_end(struct drm_i915_gem_object *batch_obj, 1063 uint32_t batch_start_offset, uint32_t batch_len) 1064 { 1065 char *mkva; 1066 uint64_t po_r, po_w; 1067 uint32_t cmd; 1068 1069 po_r = batch_obj->base.dev->agp->base + batch_obj->gtt_offset + 1070 batch_start_offset + batch_len; 1071 if (batch_len > 0) 1072 po_r -= 4; 1073 mkva = pmap_mapdev_attr(trunc_page(po_r), 2 * PAGE_SIZE, 1074 PAT_WRITE_COMBINING); 1075 po_r &= PAGE_MASK; 1076 cmd = *(uint32_t *)(mkva + po_r); 1077 1078 if (cmd != MI_BATCH_BUFFER_END) { 1079 /* 1080 * batch_len != 0 due to the check at the start of 1081 * i915_gem_do_execbuffer 1082 */ 1083 if (batch_obj->base.size > batch_start_offset + batch_len) { 1084 po_w = po_r + 4; 1085 /* DRM_DEBUG("batchbuffer does not end by MI_BATCH_BUFFER_END !\n"); */ 1086 } else { 1087 po_w = po_r; 1088 DRM_DEBUG("batchbuffer does not end by MI_BATCH_BUFFER_END, overwriting last bo cmd !\n"); 1089 } 1090 *(uint32_t *)(mkva + po_w) = MI_BATCH_BUFFER_END; 1091 } 1092 1093 pmap_unmapdev((vm_offset_t)mkva, 2 * PAGE_SIZE); 1094 } 1095 1096 int i915_fix_mi_batchbuffer_end = 0; 1097 1098 static int 1099 i915_reset_gen7_sol_offsets(struct drm_device *dev, 1100 struct intel_ring_buffer *ring) 1101 { 1102 drm_i915_private_t *dev_priv = dev->dev_private; 1103 int ret, i; 1104 1105 if (!IS_GEN7(dev) || ring != &dev_priv->rings[RCS]) 1106 return 0; 1107 1108 ret = intel_ring_begin(ring, 4 * 3); 1109 if (ret) 1110 return ret; 1111 1112 for (i = 0; i < 4; i++) { 1113 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); 1114 intel_ring_emit(ring, GEN7_SO_WRITE_OFFSET(i)); 1115 intel_ring_emit(ring, 0); 1116 } 1117 1118 intel_ring_advance(ring); 1119 1120 return 0; 1121 } 1122 1123 static int 1124 i915_gem_do_execbuffer(struct drm_device *dev, void *data, 1125 struct drm_file *file, 1126 struct drm_i915_gem_execbuffer2 *args, 1127 struct drm_i915_gem_exec_object2 *exec) 1128 { 1129 drm_i915_private_t *dev_priv = dev->dev_private; 1130 struct list_head objects; 1131 struct eb_objects *eb; 1132 struct drm_i915_gem_object *batch_obj; 1133 struct drm_clip_rect *cliprects = NULL; 1134 struct intel_ring_buffer *ring; 1135 vm_page_t **relocs_ma; 1136 u32 exec_start, exec_len; 1137 u32 seqno; 1138 u32 mask; 1139 int ret, mode, i; 1140 1141 if (!i915_gem_check_execbuffer(args)) { 1142 DRM_DEBUG("execbuf with invalid offset/length\n"); 1143 return -EINVAL; 1144 } 1145 1146 if (args->batch_len == 0) 1147 return (0); 1148 1149 ret = validate_exec_list(exec, args->buffer_count, &relocs_ma); 1150 if (ret != 0) 1151 goto pre_struct_lock_err; 1152 1153 switch (args->flags & I915_EXEC_RING_MASK) { 1154 case I915_EXEC_DEFAULT: 1155 case I915_EXEC_RENDER: 1156 ring = &dev_priv->rings[RCS]; 1157 break; 1158 case I915_EXEC_BSD: 1159 if (!HAS_BSD(dev)) { 1160 DRM_DEBUG("execbuf with invalid ring (BSD)\n"); 1161 return -EINVAL; 1162 } 1163 ring = &dev_priv->rings[VCS]; 1164 break; 1165 case I915_EXEC_BLT: 1166 if (!HAS_BLT(dev)) { 1167 DRM_DEBUG("execbuf with invalid ring (BLT)\n"); 1168 return -EINVAL; 1169 } 1170 ring = &dev_priv->rings[BCS]; 1171 break; 1172 default: 1173 DRM_DEBUG("execbuf with unknown ring: %d\n", 1174 (int)(args->flags & I915_EXEC_RING_MASK)); 1175 ret = -EINVAL; 1176 goto pre_struct_lock_err; 1177 } 1178 1179 mode = args->flags & I915_EXEC_CONSTANTS_MASK; 1180 mask = I915_EXEC_CONSTANTS_MASK; 1181 switch (mode) { 1182 case I915_EXEC_CONSTANTS_REL_GENERAL: 1183 case I915_EXEC_CONSTANTS_ABSOLUTE: 1184 case I915_EXEC_CONSTANTS_REL_SURFACE: 1185 if (ring == &dev_priv->rings[RCS] && 1186 mode != dev_priv->relative_constants_mode) { 1187 if (INTEL_INFO(dev)->gen < 4) { 1188 ret = -EINVAL; 1189 goto pre_struct_lock_err; 1190 } 1191 1192 if (INTEL_INFO(dev)->gen > 5 && 1193 mode == I915_EXEC_CONSTANTS_REL_SURFACE) { 1194 ret = -EINVAL; 1195 goto pre_struct_lock_err; 1196 } 1197 1198 /* The HW changed the meaning on this bit on gen6 */ 1199 if (INTEL_INFO(dev)->gen >= 6) 1200 mask &= ~I915_EXEC_CONSTANTS_REL_SURFACE; 1201 } 1202 break; 1203 default: 1204 DRM_DEBUG("execbuf with unknown constants: %d\n", mode); 1205 ret = -EINVAL; 1206 goto pre_struct_lock_err; 1207 } 1208 1209 if (args->buffer_count < 1) { 1210 DRM_DEBUG("execbuf with %d buffers\n", args->buffer_count); 1211 ret = -EINVAL; 1212 goto pre_struct_lock_err; 1213 } 1214 1215 if (args->num_cliprects != 0) { 1216 if (ring != &dev_priv->rings[RCS]) { 1217 DRM_DEBUG("clip rectangles are only valid with the render ring\n"); 1218 ret = -EINVAL; 1219 goto pre_struct_lock_err; 1220 } 1221 1222 if (args->num_cliprects > UINT_MAX / sizeof(*cliprects)) { 1223 DRM_DEBUG("execbuf with %u cliprects\n", 1224 args->num_cliprects); 1225 ret = -EINVAL; 1226 goto pre_struct_lock_err; 1227 } 1228 cliprects = kmalloc( sizeof(*cliprects) * args->num_cliprects, 1229 DRM_I915_GEM, M_WAITOK | M_ZERO); 1230 ret = -copyin((void *)(uintptr_t)args->cliprects_ptr, cliprects, 1231 sizeof(*cliprects) * args->num_cliprects); 1232 if (ret != 0) 1233 goto pre_struct_lock_err; 1234 } 1235 1236 ret = i915_mutex_lock_interruptible(dev); 1237 if (ret) 1238 goto pre_struct_lock_err; 1239 1240 if (dev_priv->mm.suspended) { 1241 ret = -EBUSY; 1242 goto struct_lock_err; 1243 } 1244 1245 eb = eb_create(args->buffer_count); 1246 if (eb == NULL) { 1247 ret = -ENOMEM; 1248 goto struct_lock_err; 1249 } 1250 1251 /* Look up object handles */ 1252 INIT_LIST_HEAD(&objects); 1253 for (i = 0; i < args->buffer_count; i++) { 1254 struct drm_i915_gem_object *obj; 1255 obj = to_intel_bo(drm_gem_object_lookup(dev, file, 1256 exec[i].handle)); 1257 if (&obj->base == NULL) { 1258 DRM_DEBUG("Invalid object handle %d at index %d\n", 1259 exec[i].handle, i); 1260 /* prevent error path from reading uninitialized data */ 1261 ret = -ENOENT; 1262 goto err; 1263 } 1264 1265 if (!list_empty(&obj->exec_list)) { 1266 DRM_DEBUG("Object %p [handle %d, index %d] appears more than once in object list\n", 1267 obj, exec[i].handle, i); 1268 ret = -EINVAL; 1269 goto err; 1270 } 1271 1272 list_add_tail(&obj->exec_list, &objects); 1273 obj->exec_handle = exec[i].handle; 1274 obj->exec_entry = &exec[i]; 1275 eb_add_object(eb, obj); 1276 } 1277 1278 /* take note of the batch buffer before we might reorder the lists */ 1279 batch_obj = list_entry(objects.prev, 1280 struct drm_i915_gem_object, 1281 exec_list); 1282 1283 /* Move the objects en-masse into the GTT, evicting if necessary. */ 1284 ret = i915_gem_execbuffer_reserve(ring, file, &objects); 1285 if (ret) 1286 goto err; 1287 1288 /* The objects are in their final locations, apply the relocations. */ 1289 ret = i915_gem_execbuffer_relocate(dev, eb, &objects); 1290 if (ret) { 1291 if (ret == -EFAULT) { 1292 ret = i915_gem_execbuffer_relocate_slow(dev, file, ring, 1293 &objects, eb, exec, args->buffer_count); 1294 DRM_LOCK_ASSERT(dev); 1295 } 1296 if (ret) 1297 goto err; 1298 } 1299 1300 /* Set the pending read domains for the batch buffer to COMMAND */ 1301 if (batch_obj->base.pending_write_domain) { 1302 DRM_DEBUG("Attempting to use self-modifying batch buffer\n"); 1303 ret = -EINVAL; 1304 goto err; 1305 } 1306 batch_obj->base.pending_read_domains |= I915_GEM_DOMAIN_COMMAND; 1307 1308 ret = i915_gem_execbuffer_move_to_gpu(ring, &objects); 1309 if (ret) 1310 goto err; 1311 1312 seqno = i915_gem_next_request_seqno(ring); 1313 for (i = 0; i < I915_NUM_RINGS - 1; i++) { 1314 if (seqno < ring->sync_seqno[i]) { 1315 /* The GPU can not handle its semaphore value wrapping, 1316 * so every billion or so execbuffers, we need to stall 1317 * the GPU in order to reset the counters. 1318 */ 1319 ret = i915_gpu_idle(dev, true); 1320 if (ret) 1321 goto err; 1322 1323 KASSERT(ring->sync_seqno[i] == 0, ("Non-zero sync_seqno")); 1324 } 1325 } 1326 1327 if (ring == &dev_priv->rings[RCS] && 1328 mode != dev_priv->relative_constants_mode) { 1329 ret = intel_ring_begin(ring, 4); 1330 if (ret) 1331 goto err; 1332 1333 intel_ring_emit(ring, MI_NOOP); 1334 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); 1335 intel_ring_emit(ring, INSTPM); 1336 intel_ring_emit(ring, mask << 16 | mode); 1337 intel_ring_advance(ring); 1338 1339 dev_priv->relative_constants_mode = mode; 1340 } 1341 1342 if (args->flags & I915_EXEC_GEN7_SOL_RESET) { 1343 ret = i915_reset_gen7_sol_offsets(dev, ring); 1344 if (ret) 1345 goto err; 1346 } 1347 1348 exec_start = batch_obj->gtt_offset + args->batch_start_offset; 1349 exec_len = args->batch_len; 1350 1351 if (i915_fix_mi_batchbuffer_end) { 1352 i915_gem_fix_mi_batchbuffer_end(batch_obj, 1353 args->batch_start_offset, args->batch_len); 1354 } 1355 1356 if (cliprects) { 1357 for (i = 0; i < args->num_cliprects; i++) { 1358 ret = i915_emit_box_p(dev, &cliprects[i], 1359 args->DR1, args->DR4); 1360 if (ret) 1361 goto err; 1362 1363 ret = ring->dispatch_execbuffer(ring, exec_start, 1364 exec_len); 1365 if (ret) 1366 goto err; 1367 } 1368 } else { 1369 ret = ring->dispatch_execbuffer(ring, exec_start, exec_len); 1370 if (ret) 1371 goto err; 1372 } 1373 1374 i915_gem_execbuffer_move_to_active(&objects, ring, seqno); 1375 i915_gem_execbuffer_retire_commands(dev, file, ring); 1376 1377 err: 1378 eb_destroy(eb); 1379 while (!list_empty(&objects)) { 1380 struct drm_i915_gem_object *obj; 1381 1382 obj = list_first_entry(&objects, struct drm_i915_gem_object, 1383 exec_list); 1384 list_del_init(&obj->exec_list); 1385 drm_gem_object_unreference(&obj->base); 1386 } 1387 struct_lock_err: 1388 DRM_UNLOCK(dev); 1389 1390 pre_struct_lock_err: 1391 for (i = 0; i < args->buffer_count; i++) { 1392 if (relocs_ma[i] != NULL) { 1393 vm_page_unhold_pages(relocs_ma[i], howmany( 1394 exec[i].relocation_count * 1395 sizeof(struct drm_i915_gem_relocation_entry), 1396 PAGE_SIZE)); 1397 drm_free(relocs_ma[i], DRM_I915_GEM); 1398 } 1399 } 1400 drm_free(relocs_ma, DRM_I915_GEM); 1401 drm_free(cliprects, DRM_I915_GEM); 1402 return ret; 1403 } 1404 1405 /* 1406 * Legacy execbuffer just creates an exec2 list from the original exec object 1407 * list array and passes it to the real function. 1408 */ 1409 int 1410 i915_gem_execbuffer(struct drm_device *dev, void *data, 1411 struct drm_file *file) 1412 { 1413 struct drm_i915_gem_execbuffer *args = data; 1414 struct drm_i915_gem_execbuffer2 exec2; 1415 struct drm_i915_gem_exec_object *exec_list = NULL; 1416 struct drm_i915_gem_exec_object2 *exec2_list = NULL; 1417 int ret, i; 1418 1419 DRM_DEBUG("buffers_ptr %d buffer_count %d len %08x\n", 1420 (int) args->buffers_ptr, args->buffer_count, args->batch_len); 1421 1422 if (args->buffer_count < 1) { 1423 DRM_DEBUG("execbuf with %d buffers\n", args->buffer_count); 1424 return -EINVAL; 1425 } 1426 1427 /* Copy in the exec list from userland */ 1428 /* XXXKIB user-controlled malloc size */ 1429 exec_list = kmalloc(sizeof(*exec_list) * args->buffer_count, 1430 DRM_I915_GEM, M_WAITOK); 1431 exec2_list = kmalloc(sizeof(*exec2_list) * args->buffer_count, 1432 DRM_I915_GEM, M_WAITOK); 1433 ret = -copyin((void *)(uintptr_t)args->buffers_ptr, exec_list, 1434 sizeof(*exec_list) * args->buffer_count); 1435 if (ret != 0) { 1436 DRM_DEBUG("copy %d exec entries failed %d\n", 1437 args->buffer_count, ret); 1438 drm_free(exec_list, DRM_I915_GEM); 1439 drm_free(exec2_list, DRM_I915_GEM); 1440 return (ret); 1441 } 1442 1443 for (i = 0; i < args->buffer_count; i++) { 1444 exec2_list[i].handle = exec_list[i].handle; 1445 exec2_list[i].relocation_count = exec_list[i].relocation_count; 1446 exec2_list[i].relocs_ptr = exec_list[i].relocs_ptr; 1447 exec2_list[i].alignment = exec_list[i].alignment; 1448 exec2_list[i].offset = exec_list[i].offset; 1449 if (INTEL_INFO(dev)->gen < 4) 1450 exec2_list[i].flags = EXEC_OBJECT_NEEDS_FENCE; 1451 else 1452 exec2_list[i].flags = 0; 1453 } 1454 1455 exec2.buffers_ptr = args->buffers_ptr; 1456 exec2.buffer_count = args->buffer_count; 1457 exec2.batch_start_offset = args->batch_start_offset; 1458 exec2.batch_len = args->batch_len; 1459 exec2.DR1 = args->DR1; 1460 exec2.DR4 = args->DR4; 1461 exec2.num_cliprects = args->num_cliprects; 1462 exec2.cliprects_ptr = args->cliprects_ptr; 1463 exec2.flags = I915_EXEC_RENDER; 1464 1465 ret = i915_gem_do_execbuffer(dev, data, file, &exec2, exec2_list); 1466 if (!ret) { 1467 /* Copy the new buffer offsets back to the user's exec list. */ 1468 for (i = 0; i < args->buffer_count; i++) 1469 exec_list[i].offset = exec2_list[i].offset; 1470 /* ... and back out to userspace */ 1471 ret = -copyout(exec_list, (void *)(uintptr_t)args->buffers_ptr, 1472 sizeof(*exec_list) * args->buffer_count); 1473 if (ret != 0) { 1474 DRM_DEBUG("failed to copy %d exec entries " 1475 "back to user (%d)\n", 1476 args->buffer_count, ret); 1477 } 1478 } 1479 1480 drm_free(exec_list, DRM_I915_GEM); 1481 drm_free(exec2_list, DRM_I915_GEM); 1482 return ret; 1483 } 1484 1485 int 1486 i915_gem_execbuffer2(struct drm_device *dev, void *data, 1487 struct drm_file *file) 1488 { 1489 struct drm_i915_gem_execbuffer2 *args = data; 1490 struct drm_i915_gem_exec_object2 *exec2_list = NULL; 1491 int ret; 1492 1493 DRM_DEBUG("buffers_ptr %jx buffer_count %d len %08x\n", 1494 (uintmax_t)args->buffers_ptr, args->buffer_count, args->batch_len); 1495 1496 if (args->buffer_count < 1 || 1497 args->buffer_count > UINT_MAX / sizeof(*exec2_list)) { 1498 DRM_DEBUG("execbuf2 with %d buffers\n", args->buffer_count); 1499 return -EINVAL; 1500 } 1501 1502 /* XXXKIB user-controllable kmalloc size */ 1503 exec2_list = kmalloc(sizeof(*exec2_list) * args->buffer_count, 1504 DRM_I915_GEM, M_WAITOK); 1505 ret = -copyin((void *)(uintptr_t)args->buffers_ptr, exec2_list, 1506 sizeof(*exec2_list) * args->buffer_count); 1507 if (ret != 0) { 1508 DRM_DEBUG("copy %d exec entries failed %d\n", 1509 args->buffer_count, ret); 1510 drm_free(exec2_list, DRM_I915_GEM); 1511 return (ret); 1512 } 1513 1514 ret = i915_gem_do_execbuffer(dev, data, file, args, exec2_list); 1515 if (!ret) { 1516 /* Copy the new buffer offsets back to the user's exec list. */ 1517 ret = -copyout(exec2_list, (void *)(uintptr_t)args->buffers_ptr, 1518 sizeof(*exec2_list) * args->buffer_count); 1519 if (ret) { 1520 DRM_DEBUG("failed to copy %d exec entries " 1521 "back to user (%d)\n", 1522 args->buffer_count, ret); 1523 } 1524 } 1525 1526 drm_free(exec2_list, DRM_I915_GEM); 1527 return ret; 1528 } 1529