1 /* 2 * Copyright © 2008,2010 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Eric Anholt <eric@anholt.net> 25 * Chris Wilson <chris@chris-wilson.co.uk> 26 * 27 * $FreeBSD: src/sys/dev/drm2/i915/i915_gem_execbuffer.c,v 1.3 2012/05/28 13:58:08 kib Exp $ 28 */ 29 30 #include <sys/limits.h> 31 #include <sys/sfbuf.h> 32 33 #include <drm/drmP.h> 34 #include "i915_drm.h" 35 #include "i915_drv.h" 36 #include "intel_drv.h" 37 38 struct change_domains { 39 uint32_t invalidate_domains; 40 uint32_t flush_domains; 41 uint32_t flush_rings; 42 uint32_t flips; 43 }; 44 45 /* 46 * Set the next domain for the specified object. This 47 * may not actually perform the necessary flushing/invaliding though, 48 * as that may want to be batched with other set_domain operations 49 * 50 * This is (we hope) the only really tricky part of gem. The goal 51 * is fairly simple -- track which caches hold bits of the object 52 * and make sure they remain coherent. A few concrete examples may 53 * help to explain how it works. For shorthand, we use the notation 54 * (read_domains, write_domain), e.g. (CPU, CPU) to indicate the 55 * a pair of read and write domain masks. 56 * 57 * Case 1: the batch buffer 58 * 59 * 1. Allocated 60 * 2. Written by CPU 61 * 3. Mapped to GTT 62 * 4. Read by GPU 63 * 5. Unmapped from GTT 64 * 6. Freed 65 * 66 * Let's take these a step at a time 67 * 68 * 1. Allocated 69 * Pages allocated from the kernel may still have 70 * cache contents, so we set them to (CPU, CPU) always. 71 * 2. Written by CPU (using pwrite) 72 * The pwrite function calls set_domain (CPU, CPU) and 73 * this function does nothing (as nothing changes) 74 * 3. Mapped by GTT 75 * This function asserts that the object is not 76 * currently in any GPU-based read or write domains 77 * 4. Read by GPU 78 * i915_gem_execbuffer calls set_domain (COMMAND, 0). 79 * As write_domain is zero, this function adds in the 80 * current read domains (CPU+COMMAND, 0). 81 * flush_domains is set to CPU. 82 * invalidate_domains is set to COMMAND 83 * clflush is run to get data out of the CPU caches 84 * then i915_dev_set_domain calls i915_gem_flush to 85 * emit an MI_FLUSH and drm_agp_chipset_flush 86 * 5. Unmapped from GTT 87 * i915_gem_object_unbind calls set_domain (CPU, CPU) 88 * flush_domains and invalidate_domains end up both zero 89 * so no flushing/invalidating happens 90 * 6. Freed 91 * yay, done 92 * 93 * Case 2: The shared render buffer 94 * 95 * 1. Allocated 96 * 2. Mapped to GTT 97 * 3. Read/written by GPU 98 * 4. set_domain to (CPU,CPU) 99 * 5. Read/written by CPU 100 * 6. Read/written by GPU 101 * 102 * 1. Allocated 103 * Same as last example, (CPU, CPU) 104 * 2. Mapped to GTT 105 * Nothing changes (assertions find that it is not in the GPU) 106 * 3. Read/written by GPU 107 * execbuffer calls set_domain (RENDER, RENDER) 108 * flush_domains gets CPU 109 * invalidate_domains gets GPU 110 * clflush (obj) 111 * MI_FLUSH and drm_agp_chipset_flush 112 * 4. set_domain (CPU, CPU) 113 * flush_domains gets GPU 114 * invalidate_domains gets CPU 115 * wait_rendering (obj) to make sure all drawing is complete. 116 * This will include an MI_FLUSH to get the data from GPU 117 * to memory 118 * clflush (obj) to invalidate the CPU cache 119 * Another MI_FLUSH in i915_gem_flush (eliminate this somehow?) 120 * 5. Read/written by CPU 121 * cache lines are loaded and dirtied 122 * 6. Read written by GPU 123 * Same as last GPU access 124 * 125 * Case 3: The constant buffer 126 * 127 * 1. Allocated 128 * 2. Written by CPU 129 * 3. Read by GPU 130 * 4. Updated (written) by CPU again 131 * 5. Read by GPU 132 * 133 * 1. Allocated 134 * (CPU, CPU) 135 * 2. Written by CPU 136 * (CPU, CPU) 137 * 3. Read by GPU 138 * (CPU+RENDER, 0) 139 * flush_domains = CPU 140 * invalidate_domains = RENDER 141 * clflush (obj) 142 * MI_FLUSH 143 * drm_agp_chipset_flush 144 * 4. Updated (written) by CPU again 145 * (CPU, CPU) 146 * flush_domains = 0 (no previous write domain) 147 * invalidate_domains = 0 (no new read domains) 148 * 5. Read by GPU 149 * (CPU+RENDER, 0) 150 * flush_domains = CPU 151 * invalidate_domains = RENDER 152 * clflush (obj) 153 * MI_FLUSH 154 * drm_agp_chipset_flush 155 */ 156 static void 157 i915_gem_object_set_to_gpu_domain(struct drm_i915_gem_object *obj, 158 struct intel_ring_buffer *ring, 159 struct change_domains *cd) 160 { 161 uint32_t invalidate_domains = 0, flush_domains = 0; 162 163 /* 164 * If the object isn't moving to a new write domain, 165 * let the object stay in multiple read domains 166 */ 167 if (obj->base.pending_write_domain == 0) 168 obj->base.pending_read_domains |= obj->base.read_domains; 169 170 /* 171 * Flush the current write domain if 172 * the new read domains don't match. Invalidate 173 * any read domains which differ from the old 174 * write domain 175 */ 176 if (obj->base.write_domain && 177 (((obj->base.write_domain != obj->base.pending_read_domains || 178 obj->ring != ring)) || 179 (obj->fenced_gpu_access && !obj->pending_fenced_gpu_access))) { 180 flush_domains |= obj->base.write_domain; 181 invalidate_domains |= 182 obj->base.pending_read_domains & ~obj->base.write_domain; 183 } 184 /* 185 * Invalidate any read caches which may have 186 * stale data. That is, any new read domains. 187 */ 188 invalidate_domains |= obj->base.pending_read_domains & ~obj->base.read_domains; 189 if ((flush_domains | invalidate_domains) & I915_GEM_DOMAIN_CPU) 190 i915_gem_clflush_object(obj); 191 192 if (obj->base.pending_write_domain) 193 cd->flips |= atomic_load_acq_int(&obj->pending_flip); 194 195 /* The actual obj->write_domain will be updated with 196 * pending_write_domain after we emit the accumulated flush for all 197 * of our domain changes in execbuffers (which clears objects' 198 * write_domains). So if we have a current write domain that we 199 * aren't changing, set pending_write_domain to that. 200 */ 201 if (flush_domains == 0 && obj->base.pending_write_domain == 0) 202 obj->base.pending_write_domain = obj->base.write_domain; 203 204 cd->invalidate_domains |= invalidate_domains; 205 cd->flush_domains |= flush_domains; 206 if (flush_domains & I915_GEM_GPU_DOMAINS) 207 cd->flush_rings |= intel_ring_flag(obj->ring); 208 if (invalidate_domains & I915_GEM_GPU_DOMAINS) 209 cd->flush_rings |= intel_ring_flag(ring); 210 } 211 212 struct eb_objects { 213 u_long hashmask; 214 LIST_HEAD(, drm_i915_gem_object) *buckets; 215 }; 216 217 static struct eb_objects * 218 eb_create(int size) 219 { 220 struct eb_objects *eb; 221 222 eb = kmalloc(sizeof(*eb), DRM_I915_GEM, M_WAITOK | M_ZERO); 223 eb->buckets = hashinit(size, DRM_I915_GEM, &eb->hashmask); 224 return (eb); 225 } 226 227 static void 228 eb_reset(struct eb_objects *eb) 229 { 230 int i; 231 232 for (i = 0; i <= eb->hashmask; i++) 233 LIST_INIT(&eb->buckets[i]); 234 } 235 236 static void 237 eb_add_object(struct eb_objects *eb, struct drm_i915_gem_object *obj) 238 { 239 240 LIST_INSERT_HEAD(&eb->buckets[obj->exec_handle & eb->hashmask], 241 obj, exec_node); 242 } 243 244 static struct drm_i915_gem_object * 245 eb_get_object(struct eb_objects *eb, unsigned long handle) 246 { 247 struct drm_i915_gem_object *obj; 248 249 LIST_FOREACH(obj, &eb->buckets[handle & eb->hashmask], exec_node) { 250 if (obj->exec_handle == handle) 251 return (obj); 252 } 253 return (NULL); 254 } 255 256 static void 257 eb_destroy(struct eb_objects *eb) 258 { 259 260 drm_free(eb->buckets, DRM_I915_GEM); 261 drm_free(eb, DRM_I915_GEM); 262 } 263 264 static int 265 i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj, 266 struct eb_objects *eb, 267 struct drm_i915_gem_relocation_entry *reloc) 268 { 269 struct drm_device *dev = obj->base.dev; 270 struct drm_gem_object *target_obj; 271 uint32_t target_offset; 272 int ret = -EINVAL; 273 274 /* we've already hold a reference to all valid objects */ 275 target_obj = &eb_get_object(eb, reloc->target_handle)->base; 276 if (unlikely(target_obj == NULL)) 277 return -ENOENT; 278 279 target_offset = to_intel_bo(target_obj)->gtt_offset; 280 281 #if WATCH_RELOC 282 DRM_INFO("%s: obj %p offset %08x target %d " 283 "read %08x write %08x gtt %08x " 284 "presumed %08x delta %08x\n", 285 __func__, 286 obj, 287 (int) reloc->offset, 288 (int) reloc->target_handle, 289 (int) reloc->read_domains, 290 (int) reloc->write_domain, 291 (int) target_offset, 292 (int) reloc->presumed_offset, 293 reloc->delta); 294 #endif 295 296 /* The target buffer should have appeared before us in the 297 * exec_object list, so it should have a GTT space bound by now. 298 */ 299 if (unlikely(target_offset == 0)) { 300 DRM_DEBUG("No GTT space found for object %d\n", 301 reloc->target_handle); 302 return ret; 303 } 304 305 /* Validate that the target is in a valid r/w GPU domain */ 306 if (unlikely(reloc->write_domain & (reloc->write_domain - 1))) { 307 DRM_DEBUG("reloc with multiple write domains: " 308 "obj %p target %d offset %d " 309 "read %08x write %08x", 310 obj, reloc->target_handle, 311 (int) reloc->offset, 312 reloc->read_domains, 313 reloc->write_domain); 314 return ret; 315 } 316 if (unlikely((reloc->write_domain | reloc->read_domains) 317 & ~I915_GEM_GPU_DOMAINS)) { 318 DRM_DEBUG("reloc with read/write non-GPU domains: " 319 "obj %p target %d offset %d " 320 "read %08x write %08x", 321 obj, reloc->target_handle, 322 (int) reloc->offset, 323 reloc->read_domains, 324 reloc->write_domain); 325 return ret; 326 } 327 if (unlikely(reloc->write_domain && target_obj->pending_write_domain && 328 reloc->write_domain != target_obj->pending_write_domain)) { 329 DRM_DEBUG("Write domain conflict: " 330 "obj %p target %d offset %d " 331 "new %08x old %08x\n", 332 obj, reloc->target_handle, 333 (int) reloc->offset, 334 reloc->write_domain, 335 target_obj->pending_write_domain); 336 return ret; 337 } 338 339 target_obj->pending_read_domains |= reloc->read_domains; 340 target_obj->pending_write_domain |= reloc->write_domain; 341 342 /* If the relocation already has the right value in it, no 343 * more work needs to be done. 344 */ 345 if (target_offset == reloc->presumed_offset) 346 return 0; 347 348 /* Check that the relocation address is valid... */ 349 if (unlikely(reloc->offset > obj->base.size - 4)) { 350 DRM_DEBUG("Relocation beyond object bounds: " 351 "obj %p target %d offset %d size %d.\n", 352 obj, reloc->target_handle, 353 (int) reloc->offset, 354 (int) obj->base.size); 355 return ret; 356 } 357 if (unlikely(reloc->offset & 3)) { 358 DRM_DEBUG("Relocation not 4-byte aligned: " 359 "obj %p target %d offset %d.\n", 360 obj, reloc->target_handle, 361 (int) reloc->offset); 362 return ret; 363 } 364 365 reloc->delta += target_offset; 366 if (obj->base.write_domain == I915_GEM_DOMAIN_CPU) { 367 uint32_t page_offset = reloc->offset & PAGE_MASK; 368 char *vaddr; 369 struct sf_buf *sf; 370 371 sf = sf_buf_alloc(obj->pages[OFF_TO_IDX(reloc->offset)]); 372 if (sf == NULL) 373 return (-ENOMEM); 374 vaddr = (void *)sf_buf_kva(sf); 375 *(uint32_t *)(vaddr + page_offset) = reloc->delta; 376 sf_buf_free(sf); 377 } else { 378 uint32_t *reloc_entry; 379 char *reloc_page; 380 381 /* We can't wait for rendering with pagefaults disabled */ 382 if (obj->active && (curthread->td_flags & TDF_NOFAULT)) 383 return (-EFAULT); 384 ret = i915_gem_object_set_to_gtt_domain(obj, 1); 385 if (ret) 386 return ret; 387 388 /* 389 * Map the page containing the relocation we're going 390 * to perform. 391 */ 392 reloc->offset += obj->gtt_offset; 393 reloc_page = pmap_mapdev_attr(dev->agp->base + (reloc->offset & 394 ~PAGE_MASK), PAGE_SIZE, PAT_WRITE_COMBINING); 395 reloc_entry = (uint32_t *)(reloc_page + (reloc->offset & 396 PAGE_MASK)); 397 *(volatile uint32_t *)reloc_entry = reloc->delta; 398 pmap_unmapdev((vm_offset_t)reloc_page, PAGE_SIZE); 399 } 400 401 /* and update the user's relocation entry */ 402 reloc->presumed_offset = target_offset; 403 404 return 0; 405 } 406 407 static int 408 i915_gem_execbuffer_relocate_object(struct drm_i915_gem_object *obj, 409 struct eb_objects *eb) 410 { 411 struct drm_i915_gem_relocation_entry *user_relocs; 412 struct drm_i915_gem_exec_object2 *entry = obj->exec_entry; 413 struct drm_i915_gem_relocation_entry reloc; 414 int i, ret; 415 416 user_relocs = (void *)(uintptr_t)entry->relocs_ptr; 417 for (i = 0; i < entry->relocation_count; i++) { 418 ret = -copyin_nofault(user_relocs + i, &reloc, sizeof(reloc)); 419 if (ret != 0) 420 return (ret); 421 422 ret = i915_gem_execbuffer_relocate_entry(obj, eb, &reloc); 423 if (ret != 0) 424 return (ret); 425 426 ret = -copyout_nofault(&reloc.presumed_offset, 427 &user_relocs[i].presumed_offset, 428 sizeof(reloc.presumed_offset)); 429 if (ret != 0) 430 return (ret); 431 } 432 433 return (0); 434 } 435 436 static int 437 i915_gem_execbuffer_relocate_object_slow(struct drm_i915_gem_object *obj, 438 struct eb_objects *eb, struct drm_i915_gem_relocation_entry *relocs) 439 { 440 const struct drm_i915_gem_exec_object2 *entry = obj->exec_entry; 441 int i, ret; 442 443 for (i = 0; i < entry->relocation_count; i++) { 444 ret = i915_gem_execbuffer_relocate_entry(obj, eb, &relocs[i]); 445 if (ret) 446 return ret; 447 } 448 449 return 0; 450 } 451 452 static int 453 i915_gem_execbuffer_relocate(struct drm_device *dev, 454 struct eb_objects *eb, 455 struct list_head *objects) 456 { 457 struct drm_i915_gem_object *obj; 458 thread_t td = curthread; 459 int ret; 460 int pflags; 461 462 /* Try to move as many of the relocation targets off the active list 463 * to avoid unnecessary fallbacks to the slow path, as we cannot wait 464 * for the retirement with pagefaults disabled. 465 */ 466 i915_gem_retire_requests(dev); 467 468 ret = 0; 469 pflags = td->td_flags & TDF_NOFAULT; 470 atomic_set_int(&td->td_flags, TDF_NOFAULT); 471 472 /* This is the fast path and we cannot handle a pagefault whilst 473 * holding the device lock lest the user pass in the relocations 474 * contained within a mmaped bo. For in such a case we, the page 475 * fault handler would call i915_gem_fault() and we would try to 476 * acquire the device lock again. Obviously this is bad. 477 */ 478 479 list_for_each_entry(obj, objects, exec_list) { 480 ret = i915_gem_execbuffer_relocate_object(obj, eb); 481 if (ret != 0) 482 break; 483 } 484 485 if ((pflags & TDF_NOFAULT) == 0) 486 atomic_clear_int(&td->td_flags, TDF_NOFAULT); 487 488 return (ret); 489 } 490 491 #define __EXEC_OBJECT_HAS_FENCE (1<<31) 492 493 static int 494 pin_and_fence_object(struct drm_i915_gem_object *obj, 495 struct intel_ring_buffer *ring) 496 { 497 struct drm_i915_gem_exec_object2 *entry = obj->exec_entry; 498 bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4; 499 bool need_fence, need_mappable; 500 int ret; 501 502 need_fence = 503 has_fenced_gpu_access && 504 entry->flags & EXEC_OBJECT_NEEDS_FENCE && 505 obj->tiling_mode != I915_TILING_NONE; 506 need_mappable = 507 entry->relocation_count ? true : need_fence; 508 509 ret = i915_gem_object_pin(obj, entry->alignment, need_mappable); 510 if (ret) 511 return ret; 512 513 if (has_fenced_gpu_access) { 514 if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) { 515 if (obj->tiling_mode) { 516 ret = i915_gem_object_get_fence(obj, ring); 517 if (ret) 518 goto err_unpin; 519 520 entry->flags |= __EXEC_OBJECT_HAS_FENCE; 521 i915_gem_object_pin_fence(obj); 522 } else { 523 ret = i915_gem_object_put_fence(obj); 524 if (ret) 525 goto err_unpin; 526 } 527 obj->pending_fenced_gpu_access = true; 528 } 529 } 530 531 entry->offset = obj->gtt_offset; 532 return 0; 533 534 err_unpin: 535 i915_gem_object_unpin(obj); 536 return ret; 537 } 538 539 static int 540 i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring, 541 struct drm_file *file, 542 struct list_head *objects) 543 { 544 drm_i915_private_t *dev_priv; 545 struct drm_i915_gem_object *obj; 546 int ret, retry; 547 bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4; 548 struct list_head ordered_objects; 549 550 dev_priv = ring->dev->dev_private; 551 INIT_LIST_HEAD(&ordered_objects); 552 while (!list_empty(objects)) { 553 struct drm_i915_gem_exec_object2 *entry; 554 bool need_fence, need_mappable; 555 556 obj = list_first_entry(objects, 557 struct drm_i915_gem_object, 558 exec_list); 559 entry = obj->exec_entry; 560 561 need_fence = 562 has_fenced_gpu_access && 563 entry->flags & EXEC_OBJECT_NEEDS_FENCE && 564 obj->tiling_mode != I915_TILING_NONE; 565 need_mappable = 566 entry->relocation_count ? true : need_fence; 567 568 if (need_mappable) 569 list_move(&obj->exec_list, &ordered_objects); 570 else 571 list_move_tail(&obj->exec_list, &ordered_objects); 572 573 obj->base.pending_read_domains = 0; 574 obj->base.pending_write_domain = 0; 575 } 576 list_splice(&ordered_objects, objects); 577 578 /* Attempt to pin all of the buffers into the GTT. 579 * This is done in 3 phases: 580 * 581 * 1a. Unbind all objects that do not match the GTT constraints for 582 * the execbuffer (fenceable, mappable, alignment etc). 583 * 1b. Increment pin count for already bound objects and obtain 584 * a fence register if required. 585 * 2. Bind new objects. 586 * 3. Decrement pin count. 587 * 588 * This avoid unnecessary unbinding of later objects in order to makr 589 * room for the earlier objects *unless* we need to defragment. 590 */ 591 retry = 0; 592 do { 593 ret = 0; 594 595 /* Unbind any ill-fitting objects or pin. */ 596 list_for_each_entry(obj, objects, exec_list) { 597 struct drm_i915_gem_exec_object2 *entry = obj->exec_entry; 598 bool need_fence, need_mappable; 599 600 if (!obj->gtt_space) 601 continue; 602 603 need_fence = 604 has_fenced_gpu_access && 605 entry->flags & EXEC_OBJECT_NEEDS_FENCE && 606 obj->tiling_mode != I915_TILING_NONE; 607 need_mappable = 608 entry->relocation_count ? true : need_fence; 609 610 if ((entry->alignment && obj->gtt_offset & (entry->alignment - 1)) || 611 (need_mappable && !obj->map_and_fenceable)) 612 ret = i915_gem_object_unbind(obj); 613 else 614 ret = pin_and_fence_object(obj, ring); 615 if (ret) 616 goto err; 617 } 618 619 /* Bind fresh objects */ 620 list_for_each_entry(obj, objects, exec_list) { 621 if (obj->gtt_space) 622 continue; 623 624 ret = pin_and_fence_object(obj, ring); 625 if (ret) { 626 int ret_ignore; 627 628 /* This can potentially raise a harmless 629 * -EINVAL if we failed to bind in the above 630 * call. It cannot raise -EINTR since we know 631 * that the bo is freshly bound and so will 632 * not need to be flushed or waited upon. 633 */ 634 ret_ignore = i915_gem_object_unbind(obj); 635 (void)ret_ignore; 636 if (obj->gtt_space != NULL) 637 kprintf("%s: gtt_space\n", __func__); 638 break; 639 } 640 } 641 642 /* Decrement pin count for bound objects */ 643 list_for_each_entry(obj, objects, exec_list) { 644 struct drm_i915_gem_exec_object2 *entry; 645 646 if (!obj->gtt_space) 647 continue; 648 649 entry = obj->exec_entry; 650 if (entry->flags & __EXEC_OBJECT_HAS_FENCE) { 651 i915_gem_object_unpin_fence(obj); 652 entry->flags &= ~__EXEC_OBJECT_HAS_FENCE; 653 } 654 655 i915_gem_object_unpin(obj); 656 657 /* ... and ensure ppgtt mapping exist if needed. */ 658 if (dev_priv->mm.aliasing_ppgtt && !obj->has_aliasing_ppgtt_mapping) { 659 i915_ppgtt_bind_object(dev_priv->mm.aliasing_ppgtt, 660 obj, obj->cache_level); 661 662 obj->has_aliasing_ppgtt_mapping = 1; 663 } 664 } 665 666 if (ret != -ENOSPC || retry > 1) 667 return ret; 668 669 /* First attempt, just clear anything that is purgeable. 670 * Second attempt, clear the entire GTT. 671 */ 672 ret = i915_gem_evict_everything(ring->dev, retry == 0); 673 if (ret) 674 return ret; 675 676 retry++; 677 } while (1); 678 679 err: 680 list_for_each_entry_continue_reverse(obj, objects, exec_list) { 681 struct drm_i915_gem_exec_object2 *entry; 682 683 if (!obj->gtt_space) 684 continue; 685 686 entry = obj->exec_entry; 687 if (entry->flags & __EXEC_OBJECT_HAS_FENCE) { 688 i915_gem_object_unpin_fence(obj); 689 entry->flags &= ~__EXEC_OBJECT_HAS_FENCE; 690 } 691 692 i915_gem_object_unpin(obj); 693 } 694 695 return ret; 696 } 697 698 static int 699 i915_gem_execbuffer_relocate_slow(struct drm_device *dev, 700 struct drm_file *file, struct intel_ring_buffer *ring, 701 struct list_head *objects, struct eb_objects *eb, 702 struct drm_i915_gem_exec_object2 *exec, int count) 703 { 704 struct drm_i915_gem_relocation_entry *reloc; 705 struct drm_i915_gem_object *obj; 706 int *reloc_offset; 707 int i, total, ret; 708 709 /* We may process another execbuffer during the unlock... */ 710 while (!list_empty(objects)) { 711 obj = list_first_entry(objects, 712 struct drm_i915_gem_object, 713 exec_list); 714 list_del_init(&obj->exec_list); 715 drm_gem_object_unreference(&obj->base); 716 } 717 718 DRM_UNLOCK(dev); 719 720 total = 0; 721 for (i = 0; i < count; i++) 722 total += exec[i].relocation_count; 723 724 reloc_offset = kmalloc(count * sizeof(*reloc_offset), DRM_I915_GEM, 725 M_WAITOK | M_ZERO); 726 reloc = kmalloc(total * sizeof(*reloc), DRM_I915_GEM, M_WAITOK | M_ZERO); 727 728 total = 0; 729 for (i = 0; i < count; i++) { 730 struct drm_i915_gem_relocation_entry *user_relocs; 731 732 user_relocs = (void *)(uintptr_t)exec[i].relocs_ptr; 733 ret = -copyin(user_relocs, reloc + total, 734 exec[i].relocation_count * sizeof(*reloc)); 735 if (ret != 0) { 736 DRM_LOCK(dev); 737 goto err; 738 } 739 740 reloc_offset[i] = total; 741 total += exec[i].relocation_count; 742 } 743 744 ret = i915_mutex_lock_interruptible(dev); 745 if (ret) { 746 DRM_LOCK(dev); 747 goto err; 748 } 749 750 /* reacquire the objects */ 751 eb_reset(eb); 752 for (i = 0; i < count; i++) { 753 struct drm_i915_gem_object *obj; 754 755 obj = to_intel_bo(drm_gem_object_lookup(dev, file, 756 exec[i].handle)); 757 if (&obj->base == NULL) { 758 DRM_DEBUG("Invalid object handle %d at index %d\n", 759 exec[i].handle, i); 760 ret = -ENOENT; 761 goto err; 762 } 763 764 list_add_tail(&obj->exec_list, objects); 765 obj->exec_handle = exec[i].handle; 766 obj->exec_entry = &exec[i]; 767 eb_add_object(eb, obj); 768 } 769 770 ret = i915_gem_execbuffer_reserve(ring, file, objects); 771 if (ret) 772 goto err; 773 774 list_for_each_entry(obj, objects, exec_list) { 775 int offset = obj->exec_entry - exec; 776 ret = i915_gem_execbuffer_relocate_object_slow(obj, eb, 777 reloc + reloc_offset[offset]); 778 if (ret) 779 goto err; 780 } 781 782 /* Leave the user relocations as are, this is the painfully slow path, 783 * and we want to avoid the complication of dropping the lock whilst 784 * having buffers reserved in the aperture and so causing spurious 785 * ENOSPC for random operations. 786 */ 787 788 err: 789 drm_free(reloc, DRM_I915_GEM); 790 drm_free(reloc_offset, DRM_I915_GEM); 791 return ret; 792 } 793 794 static int 795 i915_gem_execbuffer_flush(struct drm_device *dev, 796 uint32_t invalidate_domains, 797 uint32_t flush_domains, 798 uint32_t flush_rings) 799 { 800 drm_i915_private_t *dev_priv = dev->dev_private; 801 int i, ret; 802 803 if (flush_domains & I915_GEM_DOMAIN_CPU) 804 intel_gtt_chipset_flush(); 805 806 if (flush_domains & I915_GEM_DOMAIN_GTT) 807 cpu_sfence(); 808 809 if ((flush_domains | invalidate_domains) & I915_GEM_GPU_DOMAINS) { 810 for (i = 0; i < I915_NUM_RINGS; i++) 811 if (flush_rings & (1 << i)) { 812 ret = i915_gem_flush_ring(&dev_priv->rings[i], 813 invalidate_domains, flush_domains); 814 if (ret) 815 return ret; 816 } 817 } 818 819 return 0; 820 } 821 822 static bool 823 intel_enable_semaphores(struct drm_device *dev) 824 { 825 if (INTEL_INFO(dev)->gen < 6) 826 return 0; 827 828 if (i915_semaphores >= 0) 829 return i915_semaphores; 830 831 /* Enable semaphores on SNB when IO remapping is off */ 832 if (INTEL_INFO(dev)->gen == 6) 833 return !intel_iommu_enabled; 834 835 return 1; 836 } 837 838 static int 839 i915_gem_execbuffer_sync_rings(struct drm_i915_gem_object *obj, 840 struct intel_ring_buffer *to) 841 { 842 struct intel_ring_buffer *from = obj->ring; 843 u32 seqno; 844 int ret, idx; 845 846 if (from == NULL || to == from) 847 return 0; 848 849 /* XXX gpu semaphores are implicated in various hard hangs on SNB */ 850 if (!intel_enable_semaphores(obj->base.dev)) 851 return i915_gem_object_wait_rendering(obj); 852 853 idx = intel_ring_sync_index(from, to); 854 855 seqno = obj->last_rendering_seqno; 856 if (seqno <= from->sync_seqno[idx]) 857 return 0; 858 859 if (seqno == from->outstanding_lazy_request) { 860 struct drm_i915_gem_request *request; 861 862 request = kmalloc(sizeof(*request), DRM_I915_GEM, 863 M_WAITOK | M_ZERO); 864 ret = i915_add_request(from, NULL, request); 865 if (ret) { 866 drm_free(request, DRM_I915_GEM); 867 return ret; 868 } 869 870 seqno = request->seqno; 871 } 872 873 from->sync_seqno[idx] = seqno; 874 875 return to->sync_to(to, from, seqno - 1); 876 } 877 878 static int 879 i915_gem_execbuffer_wait_for_flips(struct intel_ring_buffer *ring, u32 flips) 880 { 881 u32 plane, flip_mask; 882 int ret; 883 884 /* Check for any pending flips. As we only maintain a flip queue depth 885 * of 1, we can simply insert a WAIT for the next display flip prior 886 * to executing the batch and avoid stalling the CPU. 887 */ 888 889 for (plane = 0; flips >> plane; plane++) { 890 if (((flips >> plane) & 1) == 0) 891 continue; 892 893 if (plane) 894 flip_mask = MI_WAIT_FOR_PLANE_B_FLIP; 895 else 896 flip_mask = MI_WAIT_FOR_PLANE_A_FLIP; 897 898 ret = intel_ring_begin(ring, 2); 899 if (ret) 900 return ret; 901 902 intel_ring_emit(ring, MI_WAIT_FOR_EVENT | flip_mask); 903 intel_ring_emit(ring, MI_NOOP); 904 intel_ring_advance(ring); 905 } 906 907 return 0; 908 } 909 910 static int 911 i915_gem_execbuffer_move_to_gpu(struct intel_ring_buffer *ring, 912 struct list_head *objects) 913 { 914 struct drm_i915_gem_object *obj; 915 struct change_domains cd; 916 int ret; 917 918 memset(&cd, 0, sizeof(cd)); 919 list_for_each_entry(obj, objects, exec_list) 920 i915_gem_object_set_to_gpu_domain(obj, ring, &cd); 921 922 if (cd.invalidate_domains | cd.flush_domains) { 923 #if WATCH_EXEC 924 DRM_INFO("%s: invalidate_domains %08x flush_domains %08x\n", 925 __func__, 926 cd.invalidate_domains, 927 cd.flush_domains); 928 #endif 929 ret = i915_gem_execbuffer_flush(ring->dev, 930 cd.invalidate_domains, 931 cd.flush_domains, 932 cd.flush_rings); 933 if (ret) 934 return ret; 935 } 936 937 if (cd.flips) { 938 ret = i915_gem_execbuffer_wait_for_flips(ring, cd.flips); 939 if (ret) 940 return ret; 941 } 942 943 list_for_each_entry(obj, objects, exec_list) { 944 ret = i915_gem_execbuffer_sync_rings(obj, ring); 945 if (ret) 946 return ret; 947 } 948 949 return 0; 950 } 951 952 static bool 953 i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec) 954 { 955 return ((exec->batch_start_offset | exec->batch_len) & 0x7) == 0; 956 } 957 958 static int 959 validate_exec_list(struct drm_i915_gem_exec_object2 *exec, int count, 960 vm_page_t ***map) 961 { 962 vm_page_t *ma; 963 int i, length, page_count; 964 965 /* XXXKIB various limits checking is missing there */ 966 *map = kmalloc(count * sizeof(*ma), DRM_I915_GEM, M_WAITOK | M_ZERO); 967 for (i = 0; i < count; i++) { 968 /* First check for malicious input causing overflow */ 969 if (exec[i].relocation_count > 970 INT_MAX / sizeof(struct drm_i915_gem_relocation_entry)) 971 return -EINVAL; 972 973 length = exec[i].relocation_count * 974 sizeof(struct drm_i915_gem_relocation_entry); 975 if (length == 0) { 976 (*map)[i] = NULL; 977 continue; 978 } 979 /* 980 * Since both start and end of the relocation region 981 * may be not aligned on the page boundary, be 982 * conservative and request a page slot for each 983 * partial page. Thus +2. 984 */ 985 page_count = howmany(length, PAGE_SIZE) + 2; 986 ma = (*map)[i] = kmalloc(page_count * sizeof(vm_page_t), 987 DRM_I915_GEM, M_WAITOK | M_ZERO); 988 if (vm_fault_quick_hold_pages(&curproc->p_vmspace->vm_map, 989 exec[i].relocs_ptr, length, VM_PROT_READ | VM_PROT_WRITE, 990 ma, page_count) == -1) { 991 drm_free(ma, DRM_I915_GEM); 992 (*map)[i] = NULL; 993 return (-EFAULT); 994 } 995 } 996 997 return 0; 998 } 999 1000 static void 1001 i915_gem_execbuffer_move_to_active(struct list_head *objects, 1002 struct intel_ring_buffer *ring, 1003 u32 seqno) 1004 { 1005 struct drm_i915_gem_object *obj; 1006 uint32_t old_read, old_write; 1007 1008 list_for_each_entry(obj, objects, exec_list) { 1009 old_read = obj->base.read_domains; 1010 old_write = obj->base.write_domain; 1011 1012 obj->base.read_domains = obj->base.pending_read_domains; 1013 obj->base.write_domain = obj->base.pending_write_domain; 1014 obj->fenced_gpu_access = obj->pending_fenced_gpu_access; 1015 1016 i915_gem_object_move_to_active(obj, ring, seqno); 1017 if (obj->base.write_domain) { 1018 obj->dirty = 1; 1019 obj->pending_gpu_write = true; 1020 list_move_tail(&obj->gpu_write_list, 1021 &ring->gpu_write_list); 1022 intel_mark_busy(ring->dev, obj); 1023 } 1024 } 1025 } 1026 1027 int i915_gem_sync_exec_requests; 1028 1029 static void 1030 i915_gem_execbuffer_retire_commands(struct drm_device *dev, 1031 struct drm_file *file, 1032 struct intel_ring_buffer *ring) 1033 { 1034 struct drm_i915_gem_request *request; 1035 u32 invalidate; 1036 1037 /* 1038 * Ensure that the commands in the batch buffer are 1039 * finished before the interrupt fires. 1040 * 1041 * The sampler always gets flushed on i965 (sigh). 1042 */ 1043 invalidate = I915_GEM_DOMAIN_COMMAND; 1044 if (INTEL_INFO(dev)->gen >= 4) 1045 invalidate |= I915_GEM_DOMAIN_SAMPLER; 1046 if (ring->flush(ring, invalidate, 0)) { 1047 i915_gem_next_request_seqno(ring); 1048 return; 1049 } 1050 1051 /* Add a breadcrumb for the completion of the batch buffer */ 1052 request = kmalloc(sizeof(*request), DRM_I915_GEM, M_WAITOK | M_ZERO); 1053 if (request == NULL || i915_add_request(ring, file, request)) { 1054 i915_gem_next_request_seqno(ring); 1055 drm_free(request, DRM_I915_GEM); 1056 } else if (i915_gem_sync_exec_requests) 1057 i915_wait_request(ring, request->seqno, true); 1058 } 1059 1060 static void 1061 i915_gem_fix_mi_batchbuffer_end(struct drm_i915_gem_object *batch_obj, 1062 uint32_t batch_start_offset, uint32_t batch_len) 1063 { 1064 char *mkva; 1065 uint64_t po_r, po_w; 1066 uint32_t cmd; 1067 1068 po_r = batch_obj->base.dev->agp->base + batch_obj->gtt_offset + 1069 batch_start_offset + batch_len; 1070 if (batch_len > 0) 1071 po_r -= 4; 1072 mkva = pmap_mapdev_attr(trunc_page(po_r), 2 * PAGE_SIZE, 1073 PAT_WRITE_COMBINING); 1074 po_r &= PAGE_MASK; 1075 cmd = *(uint32_t *)(mkva + po_r); 1076 1077 if (cmd != MI_BATCH_BUFFER_END) { 1078 /* 1079 * batch_len != 0 due to the check at the start of 1080 * i915_gem_do_execbuffer 1081 */ 1082 if (batch_obj->base.size > batch_start_offset + batch_len) { 1083 po_w = po_r + 4; 1084 /* DRM_DEBUG("batchbuffer does not end by MI_BATCH_BUFFER_END !\n"); */ 1085 } else { 1086 po_w = po_r; 1087 DRM_DEBUG("batchbuffer does not end by MI_BATCH_BUFFER_END, overwriting last bo cmd !\n"); 1088 } 1089 *(uint32_t *)(mkva + po_w) = MI_BATCH_BUFFER_END; 1090 } 1091 1092 pmap_unmapdev((vm_offset_t)mkva, 2 * PAGE_SIZE); 1093 } 1094 1095 int i915_fix_mi_batchbuffer_end = 0; 1096 1097 static int 1098 i915_reset_gen7_sol_offsets(struct drm_device *dev, 1099 struct intel_ring_buffer *ring) 1100 { 1101 drm_i915_private_t *dev_priv = dev->dev_private; 1102 int ret, i; 1103 1104 if (!IS_GEN7(dev) || ring != &dev_priv->rings[RCS]) 1105 return 0; 1106 1107 ret = intel_ring_begin(ring, 4 * 3); 1108 if (ret) 1109 return ret; 1110 1111 for (i = 0; i < 4; i++) { 1112 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); 1113 intel_ring_emit(ring, GEN7_SO_WRITE_OFFSET(i)); 1114 intel_ring_emit(ring, 0); 1115 } 1116 1117 intel_ring_advance(ring); 1118 1119 return 0; 1120 } 1121 1122 static int 1123 i915_gem_do_execbuffer(struct drm_device *dev, void *data, 1124 struct drm_file *file, 1125 struct drm_i915_gem_execbuffer2 *args, 1126 struct drm_i915_gem_exec_object2 *exec) 1127 { 1128 drm_i915_private_t *dev_priv = dev->dev_private; 1129 struct list_head objects; 1130 struct eb_objects *eb; 1131 struct drm_i915_gem_object *batch_obj; 1132 struct drm_clip_rect *cliprects = NULL; 1133 struct intel_ring_buffer *ring; 1134 vm_page_t **relocs_ma; 1135 u32 exec_start, exec_len; 1136 u32 seqno; 1137 u32 mask; 1138 int ret, mode, i; 1139 1140 if (!i915_gem_check_execbuffer(args)) { 1141 DRM_DEBUG("execbuf with invalid offset/length\n"); 1142 return -EINVAL; 1143 } 1144 1145 if (args->batch_len == 0) 1146 return (0); 1147 1148 ret = validate_exec_list(exec, args->buffer_count, &relocs_ma); 1149 if (ret != 0) 1150 goto pre_struct_lock_err; 1151 1152 switch (args->flags & I915_EXEC_RING_MASK) { 1153 case I915_EXEC_DEFAULT: 1154 case I915_EXEC_RENDER: 1155 ring = &dev_priv->rings[RCS]; 1156 break; 1157 case I915_EXEC_BSD: 1158 if (!HAS_BSD(dev)) { 1159 DRM_DEBUG("execbuf with invalid ring (BSD)\n"); 1160 return -EINVAL; 1161 } 1162 ring = &dev_priv->rings[VCS]; 1163 break; 1164 case I915_EXEC_BLT: 1165 if (!HAS_BLT(dev)) { 1166 DRM_DEBUG("execbuf with invalid ring (BLT)\n"); 1167 return -EINVAL; 1168 } 1169 ring = &dev_priv->rings[BCS]; 1170 break; 1171 default: 1172 DRM_DEBUG("execbuf with unknown ring: %d\n", 1173 (int)(args->flags & I915_EXEC_RING_MASK)); 1174 ret = -EINVAL; 1175 goto pre_struct_lock_err; 1176 } 1177 1178 mode = args->flags & I915_EXEC_CONSTANTS_MASK; 1179 mask = I915_EXEC_CONSTANTS_MASK; 1180 switch (mode) { 1181 case I915_EXEC_CONSTANTS_REL_GENERAL: 1182 case I915_EXEC_CONSTANTS_ABSOLUTE: 1183 case I915_EXEC_CONSTANTS_REL_SURFACE: 1184 if (ring == &dev_priv->rings[RCS] && 1185 mode != dev_priv->relative_constants_mode) { 1186 if (INTEL_INFO(dev)->gen < 4) { 1187 ret = -EINVAL; 1188 goto pre_struct_lock_err; 1189 } 1190 1191 if (INTEL_INFO(dev)->gen > 5 && 1192 mode == I915_EXEC_CONSTANTS_REL_SURFACE) { 1193 ret = -EINVAL; 1194 goto pre_struct_lock_err; 1195 } 1196 1197 /* The HW changed the meaning on this bit on gen6 */ 1198 if (INTEL_INFO(dev)->gen >= 6) 1199 mask &= ~I915_EXEC_CONSTANTS_REL_SURFACE; 1200 } 1201 break; 1202 default: 1203 DRM_DEBUG("execbuf with unknown constants: %d\n", mode); 1204 ret = -EINVAL; 1205 goto pre_struct_lock_err; 1206 } 1207 1208 if (args->buffer_count < 1) { 1209 DRM_DEBUG("execbuf with %d buffers\n", args->buffer_count); 1210 ret = -EINVAL; 1211 goto pre_struct_lock_err; 1212 } 1213 1214 if (args->num_cliprects != 0) { 1215 if (ring != &dev_priv->rings[RCS]) { 1216 DRM_DEBUG("clip rectangles are only valid with the render ring\n"); 1217 ret = -EINVAL; 1218 goto pre_struct_lock_err; 1219 } 1220 1221 if (args->num_cliprects > UINT_MAX / sizeof(*cliprects)) { 1222 DRM_DEBUG("execbuf with %u cliprects\n", 1223 args->num_cliprects); 1224 ret = -EINVAL; 1225 goto pre_struct_lock_err; 1226 } 1227 cliprects = kmalloc( sizeof(*cliprects) * args->num_cliprects, 1228 DRM_I915_GEM, M_WAITOK | M_ZERO); 1229 ret = -copyin((void *)(uintptr_t)args->cliprects_ptr, cliprects, 1230 sizeof(*cliprects) * args->num_cliprects); 1231 if (ret != 0) 1232 goto pre_struct_lock_err; 1233 } 1234 1235 ret = i915_mutex_lock_interruptible(dev); 1236 if (ret) 1237 goto pre_struct_lock_err; 1238 1239 if (dev_priv->mm.suspended) { 1240 ret = -EBUSY; 1241 goto struct_lock_err; 1242 } 1243 1244 eb = eb_create(args->buffer_count); 1245 if (eb == NULL) { 1246 ret = -ENOMEM; 1247 goto struct_lock_err; 1248 } 1249 1250 /* Look up object handles */ 1251 INIT_LIST_HEAD(&objects); 1252 for (i = 0; i < args->buffer_count; i++) { 1253 struct drm_i915_gem_object *obj; 1254 obj = to_intel_bo(drm_gem_object_lookup(dev, file, 1255 exec[i].handle)); 1256 if (&obj->base == NULL) { 1257 DRM_DEBUG("Invalid object handle %d at index %d\n", 1258 exec[i].handle, i); 1259 /* prevent error path from reading uninitialized data */ 1260 ret = -ENOENT; 1261 goto err; 1262 } 1263 1264 if (!list_empty(&obj->exec_list)) { 1265 DRM_DEBUG("Object %p [handle %d, index %d] appears more than once in object list\n", 1266 obj, exec[i].handle, i); 1267 ret = -EINVAL; 1268 goto err; 1269 } 1270 1271 list_add_tail(&obj->exec_list, &objects); 1272 obj->exec_handle = exec[i].handle; 1273 obj->exec_entry = &exec[i]; 1274 eb_add_object(eb, obj); 1275 } 1276 1277 /* take note of the batch buffer before we might reorder the lists */ 1278 batch_obj = list_entry(objects.prev, 1279 struct drm_i915_gem_object, 1280 exec_list); 1281 1282 /* Move the objects en-masse into the GTT, evicting if necessary. */ 1283 ret = i915_gem_execbuffer_reserve(ring, file, &objects); 1284 if (ret) 1285 goto err; 1286 1287 /* The objects are in their final locations, apply the relocations. */ 1288 ret = i915_gem_execbuffer_relocate(dev, eb, &objects); 1289 if (ret) { 1290 if (ret == -EFAULT) { 1291 ret = i915_gem_execbuffer_relocate_slow(dev, file, ring, 1292 &objects, eb, exec, args->buffer_count); 1293 DRM_LOCK_ASSERT(dev); 1294 } 1295 if (ret) 1296 goto err; 1297 } 1298 1299 /* Set the pending read domains for the batch buffer to COMMAND */ 1300 if (batch_obj->base.pending_write_domain) { 1301 DRM_DEBUG("Attempting to use self-modifying batch buffer\n"); 1302 ret = -EINVAL; 1303 goto err; 1304 } 1305 batch_obj->base.pending_read_domains |= I915_GEM_DOMAIN_COMMAND; 1306 1307 ret = i915_gem_execbuffer_move_to_gpu(ring, &objects); 1308 if (ret) 1309 goto err; 1310 1311 seqno = i915_gem_next_request_seqno(ring); 1312 for (i = 0; i < I915_NUM_RINGS - 1; i++) { 1313 if (seqno < ring->sync_seqno[i]) { 1314 /* The GPU can not handle its semaphore value wrapping, 1315 * so every billion or so execbuffers, we need to stall 1316 * the GPU in order to reset the counters. 1317 */ 1318 ret = i915_gpu_idle(dev, true); 1319 if (ret) 1320 goto err; 1321 1322 KASSERT(ring->sync_seqno[i] == 0, ("Non-zero sync_seqno")); 1323 } 1324 } 1325 1326 if (ring == &dev_priv->rings[RCS] && 1327 mode != dev_priv->relative_constants_mode) { 1328 ret = intel_ring_begin(ring, 4); 1329 if (ret) 1330 goto err; 1331 1332 intel_ring_emit(ring, MI_NOOP); 1333 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); 1334 intel_ring_emit(ring, INSTPM); 1335 intel_ring_emit(ring, mask << 16 | mode); 1336 intel_ring_advance(ring); 1337 1338 dev_priv->relative_constants_mode = mode; 1339 } 1340 1341 if (args->flags & I915_EXEC_GEN7_SOL_RESET) { 1342 ret = i915_reset_gen7_sol_offsets(dev, ring); 1343 if (ret) 1344 goto err; 1345 } 1346 1347 exec_start = batch_obj->gtt_offset + args->batch_start_offset; 1348 exec_len = args->batch_len; 1349 1350 if (i915_fix_mi_batchbuffer_end) { 1351 i915_gem_fix_mi_batchbuffer_end(batch_obj, 1352 args->batch_start_offset, args->batch_len); 1353 } 1354 1355 if (cliprects) { 1356 for (i = 0; i < args->num_cliprects; i++) { 1357 ret = i915_emit_box_p(dev, &cliprects[i], 1358 args->DR1, args->DR4); 1359 if (ret) 1360 goto err; 1361 1362 ret = ring->dispatch_execbuffer(ring, exec_start, 1363 exec_len); 1364 if (ret) 1365 goto err; 1366 } 1367 } else { 1368 ret = ring->dispatch_execbuffer(ring, exec_start, exec_len); 1369 if (ret) 1370 goto err; 1371 } 1372 1373 i915_gem_execbuffer_move_to_active(&objects, ring, seqno); 1374 i915_gem_execbuffer_retire_commands(dev, file, ring); 1375 1376 err: 1377 eb_destroy(eb); 1378 while (!list_empty(&objects)) { 1379 struct drm_i915_gem_object *obj; 1380 1381 obj = list_first_entry(&objects, struct drm_i915_gem_object, 1382 exec_list); 1383 list_del_init(&obj->exec_list); 1384 drm_gem_object_unreference(&obj->base); 1385 } 1386 struct_lock_err: 1387 DRM_UNLOCK(dev); 1388 1389 pre_struct_lock_err: 1390 for (i = 0; i < args->buffer_count; i++) { 1391 if (relocs_ma[i] != NULL) { 1392 vm_page_unhold_pages(relocs_ma[i], howmany( 1393 exec[i].relocation_count * 1394 sizeof(struct drm_i915_gem_relocation_entry), 1395 PAGE_SIZE)); 1396 drm_free(relocs_ma[i], DRM_I915_GEM); 1397 } 1398 } 1399 drm_free(relocs_ma, DRM_I915_GEM); 1400 drm_free(cliprects, DRM_I915_GEM); 1401 return ret; 1402 } 1403 1404 /* 1405 * Legacy execbuffer just creates an exec2 list from the original exec object 1406 * list array and passes it to the real function. 1407 */ 1408 int 1409 i915_gem_execbuffer(struct drm_device *dev, void *data, 1410 struct drm_file *file) 1411 { 1412 struct drm_i915_gem_execbuffer *args = data; 1413 struct drm_i915_gem_execbuffer2 exec2; 1414 struct drm_i915_gem_exec_object *exec_list = NULL; 1415 struct drm_i915_gem_exec_object2 *exec2_list = NULL; 1416 int ret, i; 1417 1418 DRM_DEBUG("buffers_ptr %d buffer_count %d len %08x\n", 1419 (int) args->buffers_ptr, args->buffer_count, args->batch_len); 1420 1421 if (args->buffer_count < 1) { 1422 DRM_DEBUG("execbuf with %d buffers\n", args->buffer_count); 1423 return -EINVAL; 1424 } 1425 1426 /* Copy in the exec list from userland */ 1427 /* XXXKIB user-controlled malloc size */ 1428 exec_list = kmalloc(sizeof(*exec_list) * args->buffer_count, 1429 DRM_I915_GEM, M_WAITOK); 1430 exec2_list = kmalloc(sizeof(*exec2_list) * args->buffer_count, 1431 DRM_I915_GEM, M_WAITOK); 1432 ret = -copyin((void *)(uintptr_t)args->buffers_ptr, exec_list, 1433 sizeof(*exec_list) * args->buffer_count); 1434 if (ret != 0) { 1435 DRM_DEBUG("copy %d exec entries failed %d\n", 1436 args->buffer_count, ret); 1437 drm_free(exec_list, DRM_I915_GEM); 1438 drm_free(exec2_list, DRM_I915_GEM); 1439 return (ret); 1440 } 1441 1442 for (i = 0; i < args->buffer_count; i++) { 1443 exec2_list[i].handle = exec_list[i].handle; 1444 exec2_list[i].relocation_count = exec_list[i].relocation_count; 1445 exec2_list[i].relocs_ptr = exec_list[i].relocs_ptr; 1446 exec2_list[i].alignment = exec_list[i].alignment; 1447 exec2_list[i].offset = exec_list[i].offset; 1448 if (INTEL_INFO(dev)->gen < 4) 1449 exec2_list[i].flags = EXEC_OBJECT_NEEDS_FENCE; 1450 else 1451 exec2_list[i].flags = 0; 1452 } 1453 1454 exec2.buffers_ptr = args->buffers_ptr; 1455 exec2.buffer_count = args->buffer_count; 1456 exec2.batch_start_offset = args->batch_start_offset; 1457 exec2.batch_len = args->batch_len; 1458 exec2.DR1 = args->DR1; 1459 exec2.DR4 = args->DR4; 1460 exec2.num_cliprects = args->num_cliprects; 1461 exec2.cliprects_ptr = args->cliprects_ptr; 1462 exec2.flags = I915_EXEC_RENDER; 1463 1464 ret = i915_gem_do_execbuffer(dev, data, file, &exec2, exec2_list); 1465 if (!ret) { 1466 /* Copy the new buffer offsets back to the user's exec list. */ 1467 for (i = 0; i < args->buffer_count; i++) 1468 exec_list[i].offset = exec2_list[i].offset; 1469 /* ... and back out to userspace */ 1470 ret = -copyout(exec_list, (void *)(uintptr_t)args->buffers_ptr, 1471 sizeof(*exec_list) * args->buffer_count); 1472 if (ret != 0) { 1473 DRM_DEBUG("failed to copy %d exec entries " 1474 "back to user (%d)\n", 1475 args->buffer_count, ret); 1476 } 1477 } 1478 1479 drm_free(exec_list, DRM_I915_GEM); 1480 drm_free(exec2_list, DRM_I915_GEM); 1481 return ret; 1482 } 1483 1484 int 1485 i915_gem_execbuffer2(struct drm_device *dev, void *data, 1486 struct drm_file *file) 1487 { 1488 struct drm_i915_gem_execbuffer2 *args = data; 1489 struct drm_i915_gem_exec_object2 *exec2_list = NULL; 1490 int ret; 1491 1492 DRM_DEBUG("buffers_ptr %jx buffer_count %d len %08x\n", 1493 (uintmax_t)args->buffers_ptr, args->buffer_count, args->batch_len); 1494 1495 if (args->buffer_count < 1 || 1496 args->buffer_count > UINT_MAX / sizeof(*exec2_list)) { 1497 DRM_DEBUG("execbuf2 with %d buffers\n", args->buffer_count); 1498 return -EINVAL; 1499 } 1500 1501 /* XXXKIB user-controllable kmalloc size */ 1502 exec2_list = kmalloc(sizeof(*exec2_list) * args->buffer_count, 1503 DRM_I915_GEM, M_WAITOK); 1504 ret = -copyin((void *)(uintptr_t)args->buffers_ptr, exec2_list, 1505 sizeof(*exec2_list) * args->buffer_count); 1506 if (ret != 0) { 1507 DRM_DEBUG("copy %d exec entries failed %d\n", 1508 args->buffer_count, ret); 1509 drm_free(exec2_list, DRM_I915_GEM); 1510 return (ret); 1511 } 1512 1513 ret = i915_gem_do_execbuffer(dev, data, file, args, exec2_list); 1514 if (!ret) { 1515 /* Copy the new buffer offsets back to the user's exec list. */ 1516 ret = -copyout(exec2_list, (void *)(uintptr_t)args->buffers_ptr, 1517 sizeof(*exec2_list) * args->buffer_count); 1518 if (ret) { 1519 DRM_DEBUG("failed to copy %d exec entries " 1520 "back to user (%d)\n", 1521 args->buffer_count, ret); 1522 } 1523 } 1524 1525 drm_free(exec2_list, DRM_I915_GEM); 1526 return ret; 1527 } 1528