1 /* 2 * Copyright © 2008,2010 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Eric Anholt <eric@anholt.net> 25 * Chris Wilson <chris@chris-wilson.co.uk> 26 * 27 */ 28 29 #include <drm/drmP.h> 30 #include <drm/i915_drm.h> 31 #include "i915_drv.h" 32 #include "i915_trace.h" 33 #include "intel_drv.h" 34 #include <linux/highmem.h> 35 36 struct eb_objects { 37 struct list_head objects; 38 int and; 39 union { 40 struct drm_i915_gem_object *lut[0]; 41 struct hlist_head buckets[0]; 42 }; 43 }; 44 45 static struct eb_objects * 46 eb_create(struct drm_i915_gem_execbuffer2 *args) 47 { 48 struct eb_objects *eb = NULL; 49 50 if (args->flags & I915_EXEC_HANDLE_LUT) { 51 int size = args->buffer_count; 52 size *= sizeof(struct drm_i915_gem_object *); 53 size += sizeof(struct eb_objects); 54 eb = kmalloc(size, M_DRM, M_WAITOK); 55 } 56 57 if (eb == NULL) { 58 int size = args->buffer_count; 59 int count = PAGE_SIZE / sizeof(struct hlist_head) / 2; 60 BUILD_BUG_ON_NOT_POWER_OF_2(PAGE_SIZE / sizeof(struct hlist_head)); 61 while (count > 2*size) 62 count >>= 1; 63 eb = kmalloc(count*sizeof(struct hlist_head) + 64 sizeof(struct eb_objects), 65 M_DRM, M_WAITOK | M_ZERO); 66 if (eb == NULL) 67 return eb; 68 69 eb->and = count - 1; 70 } else 71 eb->and = -args->buffer_count; 72 73 INIT_LIST_HEAD(&eb->objects); 74 return eb; 75 } 76 77 static void 78 eb_reset(struct eb_objects *eb) 79 { 80 if (eb->and >= 0) 81 memset(eb->buckets, 0, (eb->and+1)*sizeof(struct hlist_head)); 82 } 83 84 static int 85 eb_lookup_objects(struct eb_objects *eb, 86 struct drm_i915_gem_exec_object2 *exec, 87 const struct drm_i915_gem_execbuffer2 *args, 88 struct drm_file *file) 89 { 90 int i; 91 92 lockmgr(&file->table_lock, LK_EXCLUSIVE); 93 for (i = 0; i < args->buffer_count; i++) { 94 struct drm_i915_gem_object *obj; 95 96 obj = to_intel_bo(idr_find(&file->object_idr, exec[i].handle)); 97 if (obj == NULL) { 98 lockmgr(&file->table_lock, LK_RELEASE); 99 DRM_DEBUG("Invalid object handle %d at index %d\n", 100 exec[i].handle, i); 101 return -ENOENT; 102 } 103 104 if (!list_empty(&obj->exec_list)) { 105 lockmgr(&file->table_lock, LK_RELEASE); 106 DRM_DEBUG("Object %p [handle %d, index %d] appears more than once in object list\n", 107 obj, exec[i].handle, i); 108 return -EINVAL; 109 } 110 111 drm_gem_object_reference(&obj->base); 112 list_add_tail(&obj->exec_list, &eb->objects); 113 114 obj->exec_entry = &exec[i]; 115 if (eb->and < 0) { 116 eb->lut[i] = obj; 117 } else { 118 uint32_t handle = args->flags & I915_EXEC_HANDLE_LUT ? i : exec[i].handle; 119 obj->exec_handle = handle; 120 hlist_add_head(&obj->exec_node, 121 &eb->buckets[handle & eb->and]); 122 } 123 } 124 lockmgr(&file->table_lock, LK_RELEASE); 125 126 return 0; 127 } 128 129 static struct drm_i915_gem_object * 130 eb_get_object(struct eb_objects *eb, unsigned long handle) 131 { 132 if (eb->and < 0) { 133 if (handle >= -eb->and) 134 return NULL; 135 return eb->lut[handle]; 136 } else { 137 struct hlist_head *head; 138 struct hlist_node *node; 139 140 head = &eb->buckets[handle & eb->and]; 141 hlist_for_each(node, head) { 142 struct drm_i915_gem_object *obj; 143 144 obj = hlist_entry(node, struct drm_i915_gem_object, exec_node); 145 if (obj->exec_handle == handle) 146 return obj; 147 } 148 return NULL; 149 } 150 } 151 152 static void 153 eb_destroy(struct eb_objects *eb) 154 { 155 while (!list_empty(&eb->objects)) { 156 struct drm_i915_gem_object *obj; 157 158 obj = list_first_entry(&eb->objects, 159 struct drm_i915_gem_object, 160 exec_list); 161 list_del_init(&obj->exec_list); 162 drm_gem_object_unreference(&obj->base); 163 } 164 drm_free(eb, M_DRM); 165 } 166 167 static inline int use_cpu_reloc(struct drm_i915_gem_object *obj) 168 { 169 return (obj->base.write_domain == I915_GEM_DOMAIN_CPU || 170 !obj->map_and_fenceable || 171 obj->cache_level != I915_CACHE_NONE); 172 } 173 174 static int 175 i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj, 176 struct eb_objects *eb, 177 struct drm_i915_gem_relocation_entry *reloc) 178 { 179 struct drm_device *dev = obj->base.dev; 180 struct drm_gem_object *target_obj; 181 struct drm_i915_gem_object *target_i915_obj; 182 uint32_t target_offset; 183 int ret = -EINVAL; 184 185 /* we've already hold a reference to all valid objects */ 186 target_obj = &eb_get_object(eb, reloc->target_handle)->base; 187 if (unlikely(target_obj == NULL)) 188 return -ENOENT; 189 190 target_i915_obj = to_intel_bo(target_obj); 191 target_offset = target_i915_obj->gtt_offset; 192 193 /* Sandybridge PPGTT errata: We need a global gtt mapping for MI and 194 * pipe_control writes because the gpu doesn't properly redirect them 195 * through the ppgtt for non_secure batchbuffers. */ 196 if (unlikely(IS_GEN6(dev) && 197 reloc->write_domain == I915_GEM_DOMAIN_INSTRUCTION && 198 !target_i915_obj->has_global_gtt_mapping)) { 199 i915_gem_gtt_bind_object(target_i915_obj, 200 target_i915_obj->cache_level); 201 } 202 203 /* Validate that the target is in a valid r/w GPU domain */ 204 if (unlikely(reloc->write_domain & (reloc->write_domain - 1))) { 205 DRM_DEBUG("reloc with multiple write domains: " 206 "obj %p target %d offset %d " 207 "read %08x write %08x", 208 obj, reloc->target_handle, 209 (int) reloc->offset, 210 reloc->read_domains, 211 reloc->write_domain); 212 return ret; 213 } 214 if (unlikely((reloc->write_domain | reloc->read_domains) 215 & ~I915_GEM_GPU_DOMAINS)) { 216 DRM_DEBUG("reloc with read/write non-GPU domains: " 217 "obj %p target %d offset %d " 218 "read %08x write %08x", 219 obj, reloc->target_handle, 220 (int) reloc->offset, 221 reloc->read_domains, 222 reloc->write_domain); 223 return ret; 224 } 225 226 target_obj->pending_read_domains |= reloc->read_domains; 227 target_obj->pending_write_domain |= reloc->write_domain; 228 229 /* If the relocation already has the right value in it, no 230 * more work needs to be done. 231 */ 232 if (target_offset == reloc->presumed_offset) 233 return 0; 234 235 /* Check that the relocation address is valid... */ 236 if (unlikely(reloc->offset > obj->base.size - 4)) { 237 DRM_DEBUG("Relocation beyond object bounds: " 238 "obj %p target %d offset %d size %d.\n", 239 obj, reloc->target_handle, 240 (int) reloc->offset, 241 (int) obj->base.size); 242 return ret; 243 } 244 if (unlikely(reloc->offset & 3)) { 245 DRM_DEBUG("Relocation not 4-byte aligned: " 246 "obj %p target %d offset %d.\n", 247 obj, reloc->target_handle, 248 (int) reloc->offset); 249 return ret; 250 } 251 252 /* We can't wait for rendering with pagefaults disabled */ 253 if (obj->active && (curthread->td_flags & TDF_NOFAULT)) 254 return -EFAULT; 255 256 reloc->delta += target_offset; 257 if (use_cpu_reloc(obj)) { 258 uint32_t page_offset = reloc->offset & PAGE_MASK; 259 char *vaddr; 260 261 ret = i915_gem_object_set_to_cpu_domain(obj, 1); 262 if (ret) 263 return ret; 264 265 vaddr = kmap_atomic(i915_gem_object_get_page(obj, 266 reloc->offset >> PAGE_SHIFT)); 267 *(uint32_t *)(vaddr + page_offset) = reloc->delta; 268 kunmap_atomic(vaddr); 269 } else { 270 uint32_t __iomem *reloc_entry; 271 char __iomem *reloc_page; 272 273 ret = i915_gem_object_set_to_gtt_domain(obj, true); 274 if (ret) 275 return ret; 276 277 ret = i915_gem_object_put_fence(obj); 278 if (ret) 279 return ret; 280 281 /* Map the page containing the relocation we're going to perform. */ 282 reloc->offset += obj->gtt_offset; 283 reloc_page = pmap_mapdev_attr(dev->agp->base + (reloc->offset & 284 ~PAGE_MASK), PAGE_SIZE, PAT_WRITE_COMBINING); 285 reloc_entry = (uint32_t *)(reloc_page + (reloc->offset & 286 PAGE_MASK)); 287 iowrite32(reloc->delta, reloc_entry); 288 pmap_unmapdev((vm_offset_t)reloc_page, PAGE_SIZE); 289 } 290 291 /* and update the user's relocation entry */ 292 reloc->presumed_offset = target_offset; 293 294 return 0; 295 } 296 297 static int 298 i915_gem_execbuffer_relocate_object(struct drm_i915_gem_object *obj, 299 struct eb_objects *eb) 300 { 301 #define N_RELOC(x) ((x) / sizeof(struct drm_i915_gem_relocation_entry)) 302 struct drm_i915_gem_relocation_entry stack_reloc[N_RELOC(512)]; 303 struct drm_i915_gem_relocation_entry __user *user_relocs; 304 struct drm_i915_gem_exec_object2 *entry = obj->exec_entry; 305 int remain, ret; 306 307 user_relocs = (void __user *)(uintptr_t)entry->relocs_ptr; 308 309 remain = entry->relocation_count; 310 while (remain) { 311 struct drm_i915_gem_relocation_entry *r = stack_reloc; 312 int count = remain; 313 if (count > ARRAY_SIZE(stack_reloc)) 314 count = ARRAY_SIZE(stack_reloc); 315 remain -= count; 316 317 if (copyin_nofault(user_relocs, r, count*sizeof(r[0]))) 318 return -EFAULT; 319 320 do { 321 u64 offset = r->presumed_offset; 322 323 ret = i915_gem_execbuffer_relocate_entry(obj, eb, r); 324 if (ret) 325 return ret; 326 327 if (r->presumed_offset != offset && 328 copyout_nofault(&r->presumed_offset, 329 &user_relocs->presumed_offset, 330 sizeof(r->presumed_offset))) { 331 return -EFAULT; 332 } 333 334 user_relocs++; 335 r++; 336 } while (--count); 337 } 338 339 return 0; 340 #undef N_RELOC 341 } 342 343 static int 344 i915_gem_execbuffer_relocate_object_slow(struct drm_i915_gem_object *obj, 345 struct eb_objects *eb, 346 struct drm_i915_gem_relocation_entry *relocs) 347 { 348 const struct drm_i915_gem_exec_object2 *entry = obj->exec_entry; 349 int i, ret; 350 351 for (i = 0; i < entry->relocation_count; i++) { 352 ret = i915_gem_execbuffer_relocate_entry(obj, eb, &relocs[i]); 353 if (ret) 354 return ret; 355 } 356 357 return 0; 358 } 359 360 static int 361 i915_gem_execbuffer_relocate(struct drm_device *dev, 362 struct eb_objects *eb) 363 { 364 struct drm_i915_gem_object *obj; 365 int ret = 0; 366 367 /* This is the fast path and we cannot handle a pagefault whilst 368 * holding the struct mutex lest the user pass in the relocations 369 * contained within a mmaped bo. For in such a case we, the page 370 * fault handler would call i915_gem_fault() and we would try to 371 * acquire the struct mutex again. Obviously this is bad and so 372 * lockdep complains vehemently. 373 */ 374 #if 0 375 pagefault_disable(); 376 #endif 377 list_for_each_entry(obj, &eb->objects, exec_list) { 378 ret = i915_gem_execbuffer_relocate_object(obj, eb); 379 if (ret) 380 break; 381 } 382 #if 0 383 pagefault_enable(); 384 #endif 385 386 return ret; 387 } 388 389 #define __EXEC_OBJECT_HAS_PIN (1<<31) 390 #define __EXEC_OBJECT_HAS_FENCE (1<<30) 391 392 static int 393 need_reloc_mappable(struct drm_i915_gem_object *obj) 394 { 395 struct drm_i915_gem_exec_object2 *entry = obj->exec_entry; 396 return entry->relocation_count && !use_cpu_reloc(obj); 397 } 398 399 static int 400 i915_gem_execbuffer_reserve_object(struct drm_i915_gem_object *obj, 401 struct intel_ring_buffer *ring, 402 bool *need_reloc) 403 { 404 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 405 struct drm_i915_gem_exec_object2 *entry = obj->exec_entry; 406 bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4; 407 bool need_fence, need_mappable; 408 int ret; 409 410 need_fence = 411 has_fenced_gpu_access && 412 entry->flags & EXEC_OBJECT_NEEDS_FENCE && 413 obj->tiling_mode != I915_TILING_NONE; 414 need_mappable = need_fence || need_reloc_mappable(obj); 415 416 ret = i915_gem_object_pin(obj, entry->alignment, need_mappable, false); 417 if (ret) 418 return ret; 419 420 entry->flags |= __EXEC_OBJECT_HAS_PIN; 421 422 if (has_fenced_gpu_access) { 423 if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) { 424 ret = i915_gem_object_get_fence(obj); 425 if (ret) 426 return ret; 427 428 if (i915_gem_object_pin_fence(obj)) 429 entry->flags |= __EXEC_OBJECT_HAS_FENCE; 430 431 obj->pending_fenced_gpu_access = true; 432 } 433 } 434 435 /* Ensure ppgtt mapping exists if needed */ 436 if (dev_priv->mm.aliasing_ppgtt && !obj->has_aliasing_ppgtt_mapping) { 437 i915_ppgtt_bind_object(dev_priv->mm.aliasing_ppgtt, 438 obj, obj->cache_level); 439 440 obj->has_aliasing_ppgtt_mapping = 1; 441 } 442 443 if (entry->offset != obj->gtt_offset) { 444 entry->offset = obj->gtt_offset; 445 *need_reloc = true; 446 } 447 448 if (entry->flags & EXEC_OBJECT_WRITE) { 449 obj->base.pending_read_domains = I915_GEM_DOMAIN_RENDER; 450 obj->base.pending_write_domain = I915_GEM_DOMAIN_RENDER; 451 } 452 453 if (entry->flags & EXEC_OBJECT_NEEDS_GTT && 454 !obj->has_global_gtt_mapping) 455 i915_gem_gtt_bind_object(obj, obj->cache_level); 456 457 return 0; 458 } 459 460 static void 461 i915_gem_execbuffer_unreserve_object(struct drm_i915_gem_object *obj) 462 { 463 struct drm_i915_gem_exec_object2 *entry; 464 465 if (!obj->gtt_space) 466 return; 467 468 entry = obj->exec_entry; 469 470 if (entry->flags & __EXEC_OBJECT_HAS_FENCE) 471 i915_gem_object_unpin_fence(obj); 472 473 if (entry->flags & __EXEC_OBJECT_HAS_PIN) 474 i915_gem_object_unpin(obj); 475 476 entry->flags &= ~(__EXEC_OBJECT_HAS_FENCE | __EXEC_OBJECT_HAS_PIN); 477 } 478 479 static int 480 i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring, 481 struct drm_file *file, 482 struct list_head *objects, 483 bool *need_relocs) 484 { 485 struct drm_i915_gem_object *obj; 486 struct list_head ordered_objects; 487 bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4; 488 int retry; 489 490 INIT_LIST_HEAD(&ordered_objects); 491 while (!list_empty(objects)) { 492 struct drm_i915_gem_exec_object2 *entry; 493 bool need_fence, need_mappable; 494 495 obj = list_first_entry(objects, 496 struct drm_i915_gem_object, 497 exec_list); 498 entry = obj->exec_entry; 499 500 need_fence = 501 has_fenced_gpu_access && 502 entry->flags & EXEC_OBJECT_NEEDS_FENCE && 503 obj->tiling_mode != I915_TILING_NONE; 504 need_mappable = need_fence || need_reloc_mappable(obj); 505 506 if (need_mappable) 507 list_move(&obj->exec_list, &ordered_objects); 508 else 509 list_move_tail(&obj->exec_list, &ordered_objects); 510 511 obj->base.pending_read_domains = I915_GEM_GPU_DOMAINS & ~I915_GEM_DOMAIN_COMMAND; 512 obj->base.pending_write_domain = 0; 513 obj->pending_fenced_gpu_access = false; 514 } 515 list_splice(&ordered_objects, objects); 516 517 /* Attempt to pin all of the buffers into the GTT. 518 * This is done in 3 phases: 519 * 520 * 1a. Unbind all objects that do not match the GTT constraints for 521 * the execbuffer (fenceable, mappable, alignment etc). 522 * 1b. Increment pin count for already bound objects. 523 * 2. Bind new objects. 524 * 3. Decrement pin count. 525 * 526 * This avoid unnecessary unbinding of later objects in order to make 527 * room for the earlier objects *unless* we need to defragment. 528 */ 529 retry = 0; 530 do { 531 int ret = 0; 532 533 /* Unbind any ill-fitting objects or pin. */ 534 list_for_each_entry(obj, objects, exec_list) { 535 struct drm_i915_gem_exec_object2 *entry = obj->exec_entry; 536 bool need_fence, need_mappable; 537 538 if (!obj->gtt_space) 539 continue; 540 541 need_fence = 542 has_fenced_gpu_access && 543 entry->flags & EXEC_OBJECT_NEEDS_FENCE && 544 obj->tiling_mode != I915_TILING_NONE; 545 need_mappable = need_fence || need_reloc_mappable(obj); 546 547 if ((entry->alignment && obj->gtt_offset & (entry->alignment - 1)) || 548 (need_mappable && !obj->map_and_fenceable)) 549 ret = i915_gem_object_unbind(obj); 550 else 551 ret = i915_gem_execbuffer_reserve_object(obj, ring, need_relocs); 552 if (ret) 553 goto err; 554 } 555 556 /* Bind fresh objects */ 557 list_for_each_entry(obj, objects, exec_list) { 558 if (obj->gtt_space) 559 continue; 560 561 ret = i915_gem_execbuffer_reserve_object(obj, ring, need_relocs); 562 if (ret) 563 goto err; 564 } 565 566 err: /* Decrement pin count for bound objects */ 567 list_for_each_entry(obj, objects, exec_list) 568 i915_gem_execbuffer_unreserve_object(obj); 569 570 if (ret != -ENOSPC || retry++) 571 return ret; 572 573 ret = i915_gem_evict_everything(ring->dev); 574 if (ret) 575 return ret; 576 } while (1); 577 } 578 579 static int 580 i915_gem_execbuffer_relocate_slow(struct drm_device *dev, 581 struct drm_i915_gem_execbuffer2 *args, 582 struct drm_file *file, 583 struct intel_ring_buffer *ring, 584 struct eb_objects *eb, 585 struct drm_i915_gem_exec_object2 *exec) 586 { 587 struct drm_i915_gem_relocation_entry *reloc; 588 struct drm_i915_gem_object *obj; 589 bool need_relocs; 590 int *reloc_offset; 591 int i, total, ret; 592 int count = args->buffer_count; 593 594 /* We may process another execbuffer during the unlock... */ 595 while (!list_empty(&eb->objects)) { 596 obj = list_first_entry(&eb->objects, 597 struct drm_i915_gem_object, 598 exec_list); 599 list_del_init(&obj->exec_list); 600 drm_gem_object_unreference(&obj->base); 601 } 602 603 mutex_unlock(&dev->struct_mutex); 604 605 total = 0; 606 for (i = 0; i < count; i++) 607 total += exec[i].relocation_count; 608 609 reloc_offset = drm_malloc_ab(count, sizeof(*reloc_offset)); 610 reloc = drm_malloc_ab(total, sizeof(*reloc)); 611 if (reloc == NULL || reloc_offset == NULL) { 612 drm_free_large(reloc); 613 drm_free_large(reloc_offset); 614 mutex_lock(&dev->struct_mutex); 615 return -ENOMEM; 616 } 617 618 total = 0; 619 for (i = 0; i < count; i++) { 620 struct drm_i915_gem_relocation_entry __user *user_relocs; 621 u64 invalid_offset = (u64)-1; 622 int j; 623 624 user_relocs = (void __user *)(uintptr_t)exec[i].relocs_ptr; 625 626 if (copy_from_user(reloc+total, user_relocs, 627 exec[i].relocation_count * sizeof(*reloc))) { 628 ret = -EFAULT; 629 mutex_lock(&dev->struct_mutex); 630 goto err; 631 } 632 633 /* As we do not update the known relocation offsets after 634 * relocating (due to the complexities in lock handling), 635 * we need to mark them as invalid now so that we force the 636 * relocation processing next time. Just in case the target 637 * object is evicted and then rebound into its old 638 * presumed_offset before the next execbuffer - if that 639 * happened we would make the mistake of assuming that the 640 * relocations were valid. 641 */ 642 for (j = 0; j < exec[i].relocation_count; j++) { 643 if (copy_to_user(&user_relocs[j].presumed_offset, 644 &invalid_offset, 645 sizeof(invalid_offset))) { 646 ret = -EFAULT; 647 mutex_lock(&dev->struct_mutex); 648 goto err; 649 } 650 } 651 652 reloc_offset[i] = total; 653 total += exec[i].relocation_count; 654 } 655 656 ret = i915_mutex_lock_interruptible(dev); 657 if (ret) { 658 mutex_lock(&dev->struct_mutex); 659 goto err; 660 } 661 662 /* reacquire the objects */ 663 eb_reset(eb); 664 ret = eb_lookup_objects(eb, exec, args, file); 665 if (ret) 666 goto err; 667 668 need_relocs = (args->flags & I915_EXEC_NO_RELOC) == 0; 669 ret = i915_gem_execbuffer_reserve(ring, file, &eb->objects, &need_relocs); 670 if (ret) 671 goto err; 672 673 list_for_each_entry(obj, &eb->objects, exec_list) { 674 int offset = obj->exec_entry - exec; 675 ret = i915_gem_execbuffer_relocate_object_slow(obj, eb, 676 reloc + reloc_offset[offset]); 677 if (ret) 678 goto err; 679 } 680 681 /* Leave the user relocations as are, this is the painfully slow path, 682 * and we want to avoid the complication of dropping the lock whilst 683 * having buffers reserved in the aperture and so causing spurious 684 * ENOSPC for random operations. 685 */ 686 687 err: 688 drm_free_large(reloc); 689 drm_free_large(reloc_offset); 690 return ret; 691 } 692 693 static int 694 i915_gem_execbuffer_move_to_gpu(struct intel_ring_buffer *ring, 695 struct list_head *objects) 696 { 697 struct drm_i915_gem_object *obj; 698 uint32_t flush_domains = 0; 699 int ret; 700 701 list_for_each_entry(obj, objects, exec_list) { 702 ret = i915_gem_object_sync(obj, ring); 703 if (ret) 704 return ret; 705 706 if (obj->base.write_domain & I915_GEM_DOMAIN_CPU) 707 i915_gem_clflush_object(obj); 708 709 flush_domains |= obj->base.write_domain; 710 } 711 712 if (flush_domains & I915_GEM_DOMAIN_CPU) 713 i915_gem_chipset_flush(ring->dev); 714 715 if (flush_domains & I915_GEM_DOMAIN_GTT) 716 cpu_sfence(); 717 718 /* Unconditionally invalidate gpu caches and ensure that we do flush 719 * any residual writes from the previous batch. 720 */ 721 return intel_ring_invalidate_all_caches(ring); 722 } 723 724 static bool 725 i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec) 726 { 727 if (exec->flags & __I915_EXEC_UNKNOWN_FLAGS) 728 return false; 729 730 return ((exec->batch_start_offset | exec->batch_len) & 0x7) == 0; 731 } 732 733 static int 734 validate_exec_list(struct drm_i915_gem_exec_object2 *exec, 735 int count) 736 { 737 int i; 738 int relocs_total = 0; 739 int relocs_max = INT_MAX / sizeof(struct drm_i915_gem_relocation_entry); 740 741 for (i = 0; i < count; i++) { 742 #if 0 743 char __user *ptr = (char __user *)(uintptr_t)exec[i].relocs_ptr; 744 #endif 745 int length; /* limited by fault_in_pages_readable() */ 746 747 if (exec[i].flags & __EXEC_OBJECT_UNKNOWN_FLAGS) 748 return -EINVAL; 749 750 /* First check for malicious input causing overflow in 751 * the worst case where we need to allocate the entire 752 * relocation tree as a single array. 753 */ 754 if (exec[i].relocation_count > relocs_max - relocs_total) 755 return -EINVAL; 756 relocs_total += exec[i].relocation_count; 757 758 length = exec[i].relocation_count * 759 sizeof(struct drm_i915_gem_relocation_entry); 760 #if 0 761 /* we may also need to update the presumed offsets */ 762 if (!access_ok(VERIFY_WRITE, ptr, length)) 763 return -EFAULT; 764 765 if (fault_in_multipages_readable(ptr, length)) 766 return -EFAULT; 767 #endif 768 } 769 770 return 0; 771 } 772 773 static void 774 i915_gem_execbuffer_move_to_active(struct list_head *objects, 775 struct intel_ring_buffer *ring) 776 { 777 struct drm_i915_gem_object *obj; 778 779 list_for_each_entry(obj, objects, exec_list) { 780 781 obj->base.write_domain = obj->base.pending_write_domain; 782 if (obj->base.write_domain == 0) 783 obj->base.pending_read_domains |= obj->base.read_domains; 784 obj->base.read_domains = obj->base.pending_read_domains; 785 obj->fenced_gpu_access = obj->pending_fenced_gpu_access; 786 787 i915_gem_object_move_to_active(obj, ring); 788 if (obj->base.write_domain) { 789 obj->dirty = 1; 790 obj->last_write_seqno = intel_ring_get_seqno(ring); 791 if (obj->pin_count) /* check for potential scanout */ 792 intel_mark_fb_busy(obj); 793 } 794 795 trace_i915_gem_object_change_domain(obj, old_read, old_write); 796 } 797 } 798 799 static void 800 i915_gem_execbuffer_retire_commands(struct drm_device *dev, 801 struct drm_file *file, 802 struct intel_ring_buffer *ring) 803 { 804 /* Unconditionally force add_request to emit a full flush. */ 805 ring->gpu_caches_dirty = true; 806 807 /* Add a breadcrumb for the completion of the batch buffer */ 808 (void)i915_add_request(ring, file, NULL); 809 } 810 811 static int 812 i915_reset_gen7_sol_offsets(struct drm_device *dev, 813 struct intel_ring_buffer *ring) 814 { 815 drm_i915_private_t *dev_priv = dev->dev_private; 816 int ret, i; 817 818 if (!IS_GEN7(dev) || ring != &dev_priv->ring[RCS]) 819 return 0; 820 821 ret = intel_ring_begin(ring, 4 * 3); 822 if (ret) 823 return ret; 824 825 for (i = 0; i < 4; i++) { 826 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); 827 intel_ring_emit(ring, GEN7_SO_WRITE_OFFSET(i)); 828 intel_ring_emit(ring, 0); 829 } 830 831 intel_ring_advance(ring); 832 833 return 0; 834 } 835 836 static int 837 i915_gem_do_execbuffer(struct drm_device *dev, void *data, 838 struct drm_file *file, 839 struct drm_i915_gem_execbuffer2 *args, 840 struct drm_i915_gem_exec_object2 *exec) 841 { 842 drm_i915_private_t *dev_priv = dev->dev_private; 843 struct eb_objects *eb; 844 struct drm_i915_gem_object *batch_obj; 845 struct drm_clip_rect *cliprects = NULL; 846 struct intel_ring_buffer *ring; 847 u32 ctx_id = i915_execbuffer2_get_context_id(*args); 848 u32 exec_start, exec_len; 849 u32 mask, flags; 850 int ret, mode, i; 851 bool need_relocs; 852 853 if (!i915_gem_check_execbuffer(args)) 854 return -EINVAL; 855 856 ret = validate_exec_list(exec, args->buffer_count); 857 if (ret) 858 return ret; 859 860 flags = 0; 861 if (args->flags & I915_EXEC_SECURE) { 862 flags |= I915_DISPATCH_SECURE; 863 } 864 if (args->flags & I915_EXEC_IS_PINNED) 865 flags |= I915_DISPATCH_PINNED; 866 867 switch (args->flags & I915_EXEC_RING_MASK) { 868 case I915_EXEC_DEFAULT: 869 case I915_EXEC_RENDER: 870 ring = &dev_priv->ring[RCS]; 871 break; 872 case I915_EXEC_BSD: 873 ring = &dev_priv->ring[VCS]; 874 if (ctx_id != 0) { 875 DRM_DEBUG("Ring %s doesn't support contexts\n", 876 ring->name); 877 return -EPERM; 878 } 879 break; 880 case I915_EXEC_BLT: 881 ring = &dev_priv->ring[BCS]; 882 if (ctx_id != 0) { 883 DRM_DEBUG("Ring %s doesn't support contexts\n", 884 ring->name); 885 return -EPERM; 886 } 887 break; 888 default: 889 DRM_DEBUG("execbuf with unknown ring: %d\n", 890 (int)(args->flags & I915_EXEC_RING_MASK)); 891 return -EINVAL; 892 } 893 if (!intel_ring_initialized(ring)) { 894 DRM_DEBUG("execbuf with invalid ring: %d\n", 895 (int)(args->flags & I915_EXEC_RING_MASK)); 896 return -EINVAL; 897 } 898 899 mode = args->flags & I915_EXEC_CONSTANTS_MASK; 900 mask = I915_EXEC_CONSTANTS_MASK; 901 switch (mode) { 902 case I915_EXEC_CONSTANTS_REL_GENERAL: 903 case I915_EXEC_CONSTANTS_ABSOLUTE: 904 case I915_EXEC_CONSTANTS_REL_SURFACE: 905 if (ring == &dev_priv->ring[RCS] && 906 mode != dev_priv->relative_constants_mode) { 907 if (INTEL_INFO(dev)->gen < 4) 908 return -EINVAL; 909 910 if (INTEL_INFO(dev)->gen > 5 && 911 mode == I915_EXEC_CONSTANTS_REL_SURFACE) 912 return -EINVAL; 913 914 /* The HW changed the meaning on this bit on gen6 */ 915 if (INTEL_INFO(dev)->gen >= 6) 916 mask &= ~I915_EXEC_CONSTANTS_REL_SURFACE; 917 } 918 break; 919 default: 920 DRM_DEBUG("execbuf with unknown constants: %d\n", mode); 921 return -EINVAL; 922 } 923 924 if (args->buffer_count < 1) { 925 DRM_DEBUG("execbuf with %d buffers\n", args->buffer_count); 926 return -EINVAL; 927 } 928 929 if (args->num_cliprects != 0) { 930 if (ring != &dev_priv->ring[RCS]) { 931 DRM_DEBUG("clip rectangles are only valid with the render ring\n"); 932 return -EINVAL; 933 } 934 935 if (INTEL_INFO(dev)->gen >= 5) { 936 DRM_DEBUG("clip rectangles are only valid on pre-gen5\n"); 937 return -EINVAL; 938 } 939 940 if (args->num_cliprects > UINT_MAX / sizeof(*cliprects)) { 941 DRM_DEBUG("execbuf with %u cliprects\n", 942 args->num_cliprects); 943 return -EINVAL; 944 } 945 946 cliprects = kmalloc(args->num_cliprects * sizeof(*cliprects), 947 M_DRM, M_WAITOK); 948 if (cliprects == NULL) { 949 ret = -ENOMEM; 950 goto pre_mutex_err; 951 } 952 953 if (copy_from_user(cliprects, 954 (struct drm_clip_rect __user *)(uintptr_t) 955 args->cliprects_ptr, 956 sizeof(*cliprects)*args->num_cliprects)) { 957 ret = -EFAULT; 958 goto pre_mutex_err; 959 } 960 } 961 962 ret = i915_mutex_lock_interruptible(dev); 963 if (ret) 964 goto pre_mutex_err; 965 966 if (dev_priv->mm.suspended) { 967 mutex_unlock(&dev->struct_mutex); 968 ret = -EBUSY; 969 goto pre_mutex_err; 970 } 971 972 eb = eb_create(args); 973 if (eb == NULL) { 974 mutex_unlock(&dev->struct_mutex); 975 ret = -ENOMEM; 976 goto pre_mutex_err; 977 } 978 979 /* Look up object handles */ 980 ret = eb_lookup_objects(eb, exec, args, file); 981 if (ret) 982 goto err; 983 984 /* take note of the batch buffer before we might reorder the lists */ 985 batch_obj = list_entry(eb->objects.prev, 986 struct drm_i915_gem_object, 987 exec_list); 988 989 /* Move the objects en-masse into the GTT, evicting if necessary. */ 990 need_relocs = (args->flags & I915_EXEC_NO_RELOC) == 0; 991 ret = i915_gem_execbuffer_reserve(ring, file, &eb->objects, &need_relocs); 992 if (ret) 993 goto err; 994 995 /* The objects are in their final locations, apply the relocations. */ 996 if (need_relocs) 997 ret = i915_gem_execbuffer_relocate(dev, eb); 998 if (ret) { 999 if (ret == -EFAULT) { 1000 ret = i915_gem_execbuffer_relocate_slow(dev, args, file, ring, 1001 eb, exec); 1002 DRM_LOCK_ASSERT(dev); 1003 } 1004 if (ret) 1005 goto err; 1006 } 1007 1008 /* Set the pending read domains for the batch buffer to COMMAND */ 1009 if (batch_obj->base.pending_write_domain) { 1010 DRM_DEBUG("Attempting to use self-modifying batch buffer\n"); 1011 ret = -EINVAL; 1012 goto err; 1013 } 1014 batch_obj->base.pending_read_domains |= I915_GEM_DOMAIN_COMMAND; 1015 1016 /* snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure 1017 * batch" bit. Hence we need to pin secure batches into the global gtt. 1018 * hsw should have this fixed, but let's be paranoid and do it 1019 * unconditionally for now. */ 1020 if (flags & I915_DISPATCH_SECURE && !batch_obj->has_global_gtt_mapping) 1021 i915_gem_gtt_bind_object(batch_obj, batch_obj->cache_level); 1022 1023 ret = i915_gem_execbuffer_move_to_gpu(ring, &eb->objects); 1024 if (ret) 1025 goto err; 1026 1027 ret = i915_switch_context(ring, file, ctx_id); 1028 if (ret) 1029 goto err; 1030 1031 if (ring == &dev_priv->ring[RCS] && 1032 mode != dev_priv->relative_constants_mode) { 1033 ret = intel_ring_begin(ring, 4); 1034 if (ret) 1035 goto err; 1036 1037 intel_ring_emit(ring, MI_NOOP); 1038 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); 1039 intel_ring_emit(ring, INSTPM); 1040 intel_ring_emit(ring, mask << 16 | mode); 1041 intel_ring_advance(ring); 1042 1043 dev_priv->relative_constants_mode = mode; 1044 } 1045 1046 if (args->flags & I915_EXEC_GEN7_SOL_RESET) { 1047 ret = i915_reset_gen7_sol_offsets(dev, ring); 1048 if (ret) 1049 goto err; 1050 } 1051 1052 exec_start = batch_obj->gtt_offset + args->batch_start_offset; 1053 exec_len = args->batch_len; 1054 if (cliprects) { 1055 for (i = 0; i < args->num_cliprects; i++) { 1056 ret = i915_emit_box(dev, &cliprects[i], 1057 args->DR1, args->DR4); 1058 if (ret) 1059 goto err; 1060 1061 ret = ring->dispatch_execbuffer(ring, 1062 exec_start, exec_len, 1063 flags); 1064 if (ret) 1065 goto err; 1066 } 1067 } else { 1068 ret = ring->dispatch_execbuffer(ring, 1069 exec_start, exec_len, 1070 flags); 1071 if (ret) 1072 goto err; 1073 } 1074 1075 trace_i915_gem_ring_dispatch(ring, intel_ring_get_seqno(ring), flags); 1076 1077 i915_gem_execbuffer_move_to_active(&eb->objects, ring); 1078 i915_gem_execbuffer_retire_commands(dev, file, ring); 1079 1080 err: 1081 eb_destroy(eb); 1082 1083 mutex_unlock(&dev->struct_mutex); 1084 1085 pre_mutex_err: 1086 drm_free(cliprects, M_DRM); 1087 return ret; 1088 } 1089 1090 /* 1091 * Legacy execbuffer just creates an exec2 list from the original exec object 1092 * list array and passes it to the real function. 1093 */ 1094 int 1095 i915_gem_execbuffer(struct drm_device *dev, void *data, 1096 struct drm_file *file) 1097 { 1098 struct drm_i915_gem_execbuffer *args = data; 1099 struct drm_i915_gem_execbuffer2 exec2; 1100 struct drm_i915_gem_exec_object *exec_list = NULL; 1101 struct drm_i915_gem_exec_object2 *exec2_list = NULL; 1102 int ret, i; 1103 1104 if (args->buffer_count < 1) { 1105 DRM_DEBUG("execbuf with %d buffers\n", args->buffer_count); 1106 return -EINVAL; 1107 } 1108 1109 /* Copy in the exec list from userland */ 1110 exec_list = drm_malloc_ab(sizeof(*exec_list), args->buffer_count); 1111 exec2_list = drm_malloc_ab(sizeof(*exec2_list), args->buffer_count); 1112 if (exec_list == NULL || exec2_list == NULL) { 1113 DRM_DEBUG("Failed to allocate exec list for %d buffers\n", 1114 args->buffer_count); 1115 drm_free_large(exec_list); 1116 drm_free_large(exec2_list); 1117 return -ENOMEM; 1118 } 1119 ret = copy_from_user(exec_list, 1120 (void __user *)(uintptr_t)args->buffers_ptr, 1121 sizeof(*exec_list) * args->buffer_count); 1122 if (ret != 0) { 1123 DRM_DEBUG("copy %d exec entries failed %d\n", 1124 args->buffer_count, ret); 1125 drm_free_large(exec_list); 1126 drm_free_large(exec2_list); 1127 return -EFAULT; 1128 } 1129 1130 for (i = 0; i < args->buffer_count; i++) { 1131 exec2_list[i].handle = exec_list[i].handle; 1132 exec2_list[i].relocation_count = exec_list[i].relocation_count; 1133 exec2_list[i].relocs_ptr = exec_list[i].relocs_ptr; 1134 exec2_list[i].alignment = exec_list[i].alignment; 1135 exec2_list[i].offset = exec_list[i].offset; 1136 if (INTEL_INFO(dev)->gen < 4) 1137 exec2_list[i].flags = EXEC_OBJECT_NEEDS_FENCE; 1138 else 1139 exec2_list[i].flags = 0; 1140 } 1141 1142 exec2.buffers_ptr = args->buffers_ptr; 1143 exec2.buffer_count = args->buffer_count; 1144 exec2.batch_start_offset = args->batch_start_offset; 1145 exec2.batch_len = args->batch_len; 1146 exec2.DR1 = args->DR1; 1147 exec2.DR4 = args->DR4; 1148 exec2.num_cliprects = args->num_cliprects; 1149 exec2.cliprects_ptr = args->cliprects_ptr; 1150 exec2.flags = I915_EXEC_RENDER; 1151 i915_execbuffer2_set_context_id(exec2, 0); 1152 1153 ret = i915_gem_do_execbuffer(dev, data, file, &exec2, exec2_list); 1154 if (!ret) { 1155 /* Copy the new buffer offsets back to the user's exec list. */ 1156 for (i = 0; i < args->buffer_count; i++) 1157 exec_list[i].offset = exec2_list[i].offset; 1158 /* ... and back out to userspace */ 1159 ret = copy_to_user((void __user *)(uintptr_t)args->buffers_ptr, 1160 exec_list, 1161 sizeof(*exec_list) * args->buffer_count); 1162 if (ret) { 1163 ret = -EFAULT; 1164 DRM_DEBUG("failed to copy %d exec entries " 1165 "back to user (%d)\n", 1166 args->buffer_count, ret); 1167 } 1168 } 1169 1170 drm_free_large(exec_list); 1171 drm_free_large(exec2_list); 1172 return ret; 1173 } 1174 1175 int 1176 i915_gem_execbuffer2(struct drm_device *dev, void *data, 1177 struct drm_file *file) 1178 { 1179 struct drm_i915_gem_execbuffer2 *args = data; 1180 struct drm_i915_gem_exec_object2 *exec2_list = NULL; 1181 int ret; 1182 1183 if (args->buffer_count < 1 || 1184 args->buffer_count > UINT_MAX / sizeof(*exec2_list)) { 1185 DRM_DEBUG("execbuf2 with %d buffers\n", args->buffer_count); 1186 return -EINVAL; 1187 } 1188 1189 exec2_list = kmalloc(sizeof(*exec2_list)*args->buffer_count, 1190 M_DRM, M_WAITOK); 1191 if (exec2_list == NULL) 1192 exec2_list = drm_malloc_ab(sizeof(*exec2_list), 1193 args->buffer_count); 1194 if (exec2_list == NULL) { 1195 DRM_DEBUG("Failed to allocate exec list for %d buffers\n", 1196 args->buffer_count); 1197 return -ENOMEM; 1198 } 1199 ret = copy_from_user(exec2_list, 1200 (struct drm_i915_relocation_entry __user *) 1201 (uintptr_t) args->buffers_ptr, 1202 sizeof(*exec2_list) * args->buffer_count); 1203 if (ret != 0) { 1204 DRM_DEBUG("copy %d exec entries failed %d\n", 1205 args->buffer_count, ret); 1206 drm_free_large(exec2_list); 1207 return -EFAULT; 1208 } 1209 1210 ret = i915_gem_do_execbuffer(dev, data, file, args, exec2_list); 1211 if (!ret) { 1212 /* Copy the new buffer offsets back to the user's exec list. */ 1213 ret = copy_to_user((void __user *)(uintptr_t)args->buffers_ptr, 1214 exec2_list, 1215 sizeof(*exec2_list) * args->buffer_count); 1216 if (ret) { 1217 ret = -EFAULT; 1218 DRM_DEBUG("failed to copy %d exec entries " 1219 "back to user (%d)\n", 1220 args->buffer_count, ret); 1221 } 1222 } 1223 1224 drm_free_large(exec2_list); 1225 return ret; 1226 } 1227