1 /* 2 * Copyright © 2008,2010 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Eric Anholt <eric@anholt.net> 25 * Chris Wilson <chris@chris-wilson.co.uk> 26 * 27 */ 28 29 #include <drm/drmP.h> 30 #include <drm/i915_drm.h> 31 #include "i915_drv.h" 32 #include "i915_trace.h" 33 #include "intel_drv.h" 34 #include <linux/highmem.h> 35 36 struct eb_objects { 37 struct list_head objects; 38 int and; 39 union { 40 struct drm_i915_gem_object *lut[0]; 41 struct hlist_head buckets[0]; 42 }; 43 }; 44 45 static struct eb_objects * 46 eb_create(struct drm_i915_gem_execbuffer2 *args) 47 { 48 struct eb_objects *eb = NULL; 49 50 if (args->flags & I915_EXEC_HANDLE_LUT) { 51 int size = args->buffer_count; 52 size *= sizeof(struct drm_i915_gem_object *); 53 size += sizeof(struct eb_objects); 54 eb = kmalloc(size, M_DRM, M_WAITOK); 55 } 56 57 if (eb == NULL) { 58 int size = args->buffer_count; 59 int count = PAGE_SIZE / sizeof(struct hlist_head) / 2; 60 BUILD_BUG_ON_NOT_POWER_OF_2(PAGE_SIZE / sizeof(struct hlist_head)); 61 while (count > 2*size) 62 count >>= 1; 63 eb = kmalloc(count*sizeof(struct hlist_head) + 64 sizeof(struct eb_objects), 65 M_DRM, M_WAITOK | M_ZERO); 66 if (eb == NULL) 67 return eb; 68 69 eb->and = count - 1; 70 } else 71 eb->and = -args->buffer_count; 72 73 INIT_LIST_HEAD(&eb->objects); 74 return eb; 75 } 76 77 static void 78 eb_reset(struct eb_objects *eb) 79 { 80 if (eb->and >= 0) 81 memset(eb->buckets, 0, (eb->and+1)*sizeof(struct hlist_head)); 82 } 83 84 static int 85 eb_lookup_objects(struct eb_objects *eb, 86 struct drm_i915_gem_exec_object2 *exec, 87 const struct drm_i915_gem_execbuffer2 *args, 88 struct drm_file *file) 89 { 90 int i; 91 92 lockmgr(&file->table_lock, LK_EXCLUSIVE); 93 for (i = 0; i < args->buffer_count; i++) { 94 struct drm_i915_gem_object *obj; 95 96 obj = to_intel_bo(idr_find(&file->object_idr, exec[i].handle)); 97 if (obj == NULL) { 98 lockmgr(&file->table_lock, LK_RELEASE); 99 DRM_DEBUG("Invalid object handle %d at index %d\n", 100 exec[i].handle, i); 101 return -ENOENT; 102 } 103 104 if (!list_empty(&obj->exec_list)) { 105 lockmgr(&file->table_lock, LK_RELEASE); 106 DRM_DEBUG("Object %p [handle %d, index %d] appears more than once in object list\n", 107 obj, exec[i].handle, i); 108 return -EINVAL; 109 } 110 111 drm_gem_object_reference(&obj->base); 112 list_add_tail(&obj->exec_list, &eb->objects); 113 114 obj->exec_entry = &exec[i]; 115 if (eb->and < 0) { 116 eb->lut[i] = obj; 117 } else { 118 uint32_t handle = args->flags & I915_EXEC_HANDLE_LUT ? i : exec[i].handle; 119 obj->exec_handle = handle; 120 hlist_add_head(&obj->exec_node, 121 &eb->buckets[handle & eb->and]); 122 } 123 } 124 lockmgr(&file->table_lock, LK_RELEASE); 125 126 return 0; 127 } 128 129 static struct drm_i915_gem_object * 130 eb_get_object(struct eb_objects *eb, unsigned long handle) 131 { 132 if (eb->and < 0) { 133 if (handle >= -eb->and) 134 return NULL; 135 return eb->lut[handle]; 136 } else { 137 struct hlist_head *head; 138 struct hlist_node *node; 139 140 head = &eb->buckets[handle & eb->and]; 141 hlist_for_each(node, head) { 142 struct drm_i915_gem_object *obj; 143 144 obj = hlist_entry(node, struct drm_i915_gem_object, exec_node); 145 if (obj->exec_handle == handle) 146 return obj; 147 } 148 return NULL; 149 } 150 } 151 152 static void 153 eb_destroy(struct eb_objects *eb) 154 { 155 while (!list_empty(&eb->objects)) { 156 struct drm_i915_gem_object *obj; 157 158 obj = list_first_entry(&eb->objects, 159 struct drm_i915_gem_object, 160 exec_list); 161 list_del_init(&obj->exec_list); 162 drm_gem_object_unreference(&obj->base); 163 } 164 drm_free(eb, M_DRM); 165 } 166 167 static inline int use_cpu_reloc(struct drm_i915_gem_object *obj) 168 { 169 return (obj->base.write_domain == I915_GEM_DOMAIN_CPU || 170 !obj->map_and_fenceable || 171 obj->cache_level != I915_CACHE_NONE); 172 } 173 174 static int 175 i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj, 176 struct eb_objects *eb, 177 struct drm_i915_gem_relocation_entry *reloc) 178 { 179 struct drm_device *dev = obj->base.dev; 180 struct drm_gem_object *target_obj; 181 struct drm_i915_gem_object *target_i915_obj; 182 uint32_t target_offset; 183 int ret = -EINVAL; 184 185 /* we've already hold a reference to all valid objects */ 186 target_obj = &eb_get_object(eb, reloc->target_handle)->base; 187 if (unlikely(target_obj == NULL)) 188 return -ENOENT; 189 190 target_i915_obj = to_intel_bo(target_obj); 191 target_offset = target_i915_obj->gtt_offset; 192 193 /* Sandybridge PPGTT errata: We need a global gtt mapping for MI and 194 * pipe_control writes because the gpu doesn't properly redirect them 195 * through the ppgtt for non_secure batchbuffers. */ 196 if (unlikely(IS_GEN6(dev) && 197 reloc->write_domain == I915_GEM_DOMAIN_INSTRUCTION && 198 !target_i915_obj->has_global_gtt_mapping)) { 199 i915_gem_gtt_bind_object(target_i915_obj, 200 target_i915_obj->cache_level); 201 } 202 203 /* Validate that the target is in a valid r/w GPU domain */ 204 if (unlikely(reloc->write_domain & (reloc->write_domain - 1))) { 205 DRM_DEBUG("reloc with multiple write domains: " 206 "obj %p target %d offset %d " 207 "read %08x write %08x", 208 obj, reloc->target_handle, 209 (int) reloc->offset, 210 reloc->read_domains, 211 reloc->write_domain); 212 return ret; 213 } 214 if (unlikely((reloc->write_domain | reloc->read_domains) 215 & ~I915_GEM_GPU_DOMAINS)) { 216 DRM_DEBUG("reloc with read/write non-GPU domains: " 217 "obj %p target %d offset %d " 218 "read %08x write %08x", 219 obj, reloc->target_handle, 220 (int) reloc->offset, 221 reloc->read_domains, 222 reloc->write_domain); 223 return ret; 224 } 225 226 target_obj->pending_read_domains |= reloc->read_domains; 227 target_obj->pending_write_domain |= reloc->write_domain; 228 229 /* If the relocation already has the right value in it, no 230 * more work needs to be done. 231 */ 232 if (target_offset == reloc->presumed_offset) 233 return 0; 234 235 /* Check that the relocation address is valid... */ 236 if (unlikely(reloc->offset > obj->base.size - 4)) { 237 DRM_DEBUG("Relocation beyond object bounds: " 238 "obj %p target %d offset %d size %d.\n", 239 obj, reloc->target_handle, 240 (int) reloc->offset, 241 (int) obj->base.size); 242 return ret; 243 } 244 if (unlikely(reloc->offset & 3)) { 245 DRM_DEBUG("Relocation not 4-byte aligned: " 246 "obj %p target %d offset %d.\n", 247 obj, reloc->target_handle, 248 (int) reloc->offset); 249 return ret; 250 } 251 252 /* We can't wait for rendering with pagefaults disabled */ 253 if (obj->active && (curthread->td_flags & TDF_NOFAULT)) 254 return -EFAULT; 255 256 reloc->delta += target_offset; 257 if (use_cpu_reloc(obj)) { 258 uint32_t page_offset = reloc->offset & PAGE_MASK; 259 char *vaddr; 260 261 ret = i915_gem_object_set_to_cpu_domain(obj, 1); 262 if (ret) 263 return ret; 264 265 vaddr = kmap_atomic(i915_gem_object_get_page(obj, 266 reloc->offset >> PAGE_SHIFT)); 267 *(uint32_t *)(vaddr + page_offset) = reloc->delta; 268 kunmap_atomic(vaddr); 269 } else { 270 uint32_t __iomem *reloc_entry; 271 char __iomem *reloc_page; 272 273 ret = i915_gem_object_set_to_gtt_domain(obj, true); 274 if (ret) 275 return ret; 276 277 ret = i915_gem_object_put_fence(obj); 278 if (ret) 279 return ret; 280 281 /* Map the page containing the relocation we're going to perform. */ 282 reloc->offset += obj->gtt_offset; 283 reloc_page = pmap_mapdev_attr(dev->agp->base + (reloc->offset & 284 ~PAGE_MASK), PAGE_SIZE, PAT_WRITE_COMBINING); 285 reloc_entry = (uint32_t *)(reloc_page + (reloc->offset & 286 PAGE_MASK)); 287 iowrite32(reloc->delta, reloc_entry); 288 pmap_unmapdev((vm_offset_t)reloc_page, PAGE_SIZE); 289 } 290 291 /* and update the user's relocation entry */ 292 reloc->presumed_offset = target_offset; 293 294 return 0; 295 } 296 297 static int 298 i915_gem_execbuffer_relocate_object(struct drm_i915_gem_object *obj, 299 struct eb_objects *eb) 300 { 301 #define N_RELOC(x) ((x) / sizeof(struct drm_i915_gem_relocation_entry)) 302 struct drm_i915_gem_relocation_entry stack_reloc[N_RELOC(512)]; 303 struct drm_i915_gem_relocation_entry __user *user_relocs; 304 struct drm_i915_gem_exec_object2 *entry = obj->exec_entry; 305 int remain, ret; 306 307 user_relocs = to_user_ptr(entry->relocs_ptr); 308 309 remain = entry->relocation_count; 310 while (remain) { 311 struct drm_i915_gem_relocation_entry *r = stack_reloc; 312 int count = remain; 313 if (count > ARRAY_SIZE(stack_reloc)) 314 count = ARRAY_SIZE(stack_reloc); 315 remain -= count; 316 317 if (copyin_nofault(user_relocs, r, count*sizeof(r[0]))) 318 return -EFAULT; 319 320 do { 321 u64 offset = r->presumed_offset; 322 323 ret = i915_gem_execbuffer_relocate_entry(obj, eb, r); 324 if (ret) 325 return ret; 326 327 if (r->presumed_offset != offset && 328 copyout_nofault(&r->presumed_offset, 329 &user_relocs->presumed_offset, 330 sizeof(r->presumed_offset))) { 331 return -EFAULT; 332 } 333 334 user_relocs++; 335 r++; 336 } while (--count); 337 } 338 339 return 0; 340 #undef N_RELOC 341 } 342 343 static int 344 i915_gem_execbuffer_relocate_object_slow(struct drm_i915_gem_object *obj, 345 struct eb_objects *eb, 346 struct drm_i915_gem_relocation_entry *relocs) 347 { 348 const struct drm_i915_gem_exec_object2 *entry = obj->exec_entry; 349 int i, ret; 350 351 for (i = 0; i < entry->relocation_count; i++) { 352 ret = i915_gem_execbuffer_relocate_entry(obj, eb, &relocs[i]); 353 if (ret) 354 return ret; 355 } 356 357 return 0; 358 } 359 360 static int 361 i915_gem_execbuffer_relocate(struct eb_objects *eb) 362 { 363 struct drm_i915_gem_object *obj; 364 int ret = 0; 365 366 /* This is the fast path and we cannot handle a pagefault whilst 367 * holding the struct mutex lest the user pass in the relocations 368 * contained within a mmaped bo. For in such a case we, the page 369 * fault handler would call i915_gem_fault() and we would try to 370 * acquire the struct mutex again. Obviously this is bad and so 371 * lockdep complains vehemently. 372 */ 373 #if 0 374 pagefault_disable(); 375 #endif 376 list_for_each_entry(obj, &eb->objects, exec_list) { 377 ret = i915_gem_execbuffer_relocate_object(obj, eb); 378 if (ret) 379 break; 380 } 381 #if 0 382 pagefault_enable(); 383 #endif 384 385 return ret; 386 } 387 388 #define __EXEC_OBJECT_HAS_PIN (1<<31) 389 #define __EXEC_OBJECT_HAS_FENCE (1<<30) 390 391 static int 392 need_reloc_mappable(struct drm_i915_gem_object *obj) 393 { 394 struct drm_i915_gem_exec_object2 *entry = obj->exec_entry; 395 return entry->relocation_count && !use_cpu_reloc(obj); 396 } 397 398 static int 399 i915_gem_execbuffer_reserve_object(struct drm_i915_gem_object *obj, 400 struct intel_ring_buffer *ring, 401 bool *need_reloc) 402 { 403 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 404 struct drm_i915_gem_exec_object2 *entry = obj->exec_entry; 405 bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4; 406 bool need_fence, need_mappable; 407 int ret; 408 409 need_fence = 410 has_fenced_gpu_access && 411 entry->flags & EXEC_OBJECT_NEEDS_FENCE && 412 obj->tiling_mode != I915_TILING_NONE; 413 need_mappable = need_fence || need_reloc_mappable(obj); 414 415 ret = i915_gem_object_pin(obj, entry->alignment, need_mappable, false); 416 if (ret) 417 return ret; 418 419 entry->flags |= __EXEC_OBJECT_HAS_PIN; 420 421 if (has_fenced_gpu_access) { 422 if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) { 423 ret = i915_gem_object_get_fence(obj); 424 if (ret) 425 return ret; 426 427 if (i915_gem_object_pin_fence(obj)) 428 entry->flags |= __EXEC_OBJECT_HAS_FENCE; 429 430 obj->pending_fenced_gpu_access = true; 431 } 432 } 433 434 /* Ensure ppgtt mapping exists if needed */ 435 if (dev_priv->mm.aliasing_ppgtt && !obj->has_aliasing_ppgtt_mapping) { 436 i915_ppgtt_bind_object(dev_priv->mm.aliasing_ppgtt, 437 obj, obj->cache_level); 438 439 obj->has_aliasing_ppgtt_mapping = 1; 440 } 441 442 if (entry->offset != obj->gtt_offset) { 443 entry->offset = obj->gtt_offset; 444 *need_reloc = true; 445 } 446 447 if (entry->flags & EXEC_OBJECT_WRITE) { 448 obj->base.pending_read_domains = I915_GEM_DOMAIN_RENDER; 449 obj->base.pending_write_domain = I915_GEM_DOMAIN_RENDER; 450 } 451 452 if (entry->flags & EXEC_OBJECT_NEEDS_GTT && 453 !obj->has_global_gtt_mapping) 454 i915_gem_gtt_bind_object(obj, obj->cache_level); 455 456 return 0; 457 } 458 459 static void 460 i915_gem_execbuffer_unreserve_object(struct drm_i915_gem_object *obj) 461 { 462 struct drm_i915_gem_exec_object2 *entry; 463 464 if (!obj->gtt_space) 465 return; 466 467 entry = obj->exec_entry; 468 469 if (entry->flags & __EXEC_OBJECT_HAS_FENCE) 470 i915_gem_object_unpin_fence(obj); 471 472 if (entry->flags & __EXEC_OBJECT_HAS_PIN) 473 i915_gem_object_unpin(obj); 474 475 entry->flags &= ~(__EXEC_OBJECT_HAS_FENCE | __EXEC_OBJECT_HAS_PIN); 476 } 477 478 static int 479 i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring, 480 struct list_head *objects, 481 bool *need_relocs) 482 { 483 struct drm_i915_gem_object *obj; 484 struct list_head ordered_objects; 485 bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4; 486 int retry; 487 488 INIT_LIST_HEAD(&ordered_objects); 489 while (!list_empty(objects)) { 490 struct drm_i915_gem_exec_object2 *entry; 491 bool need_fence, need_mappable; 492 493 obj = list_first_entry(objects, 494 struct drm_i915_gem_object, 495 exec_list); 496 entry = obj->exec_entry; 497 498 need_fence = 499 has_fenced_gpu_access && 500 entry->flags & EXEC_OBJECT_NEEDS_FENCE && 501 obj->tiling_mode != I915_TILING_NONE; 502 need_mappable = need_fence || need_reloc_mappable(obj); 503 504 if (need_mappable) 505 list_move(&obj->exec_list, &ordered_objects); 506 else 507 list_move_tail(&obj->exec_list, &ordered_objects); 508 509 obj->base.pending_read_domains = I915_GEM_GPU_DOMAINS & ~I915_GEM_DOMAIN_COMMAND; 510 obj->base.pending_write_domain = 0; 511 obj->pending_fenced_gpu_access = false; 512 } 513 list_splice(&ordered_objects, objects); 514 515 /* Attempt to pin all of the buffers into the GTT. 516 * This is done in 3 phases: 517 * 518 * 1a. Unbind all objects that do not match the GTT constraints for 519 * the execbuffer (fenceable, mappable, alignment etc). 520 * 1b. Increment pin count for already bound objects. 521 * 2. Bind new objects. 522 * 3. Decrement pin count. 523 * 524 * This avoid unnecessary unbinding of later objects in order to make 525 * room for the earlier objects *unless* we need to defragment. 526 */ 527 retry = 0; 528 do { 529 int ret = 0; 530 531 /* Unbind any ill-fitting objects or pin. */ 532 list_for_each_entry(obj, objects, exec_list) { 533 struct drm_i915_gem_exec_object2 *entry = obj->exec_entry; 534 bool need_fence, need_mappable; 535 536 if (!obj->gtt_space) 537 continue; 538 539 need_fence = 540 has_fenced_gpu_access && 541 entry->flags & EXEC_OBJECT_NEEDS_FENCE && 542 obj->tiling_mode != I915_TILING_NONE; 543 need_mappable = need_fence || need_reloc_mappable(obj); 544 545 if ((entry->alignment && obj->gtt_offset & (entry->alignment - 1)) || 546 (need_mappable && !obj->map_and_fenceable)) 547 ret = i915_gem_object_unbind(obj); 548 else 549 ret = i915_gem_execbuffer_reserve_object(obj, ring, need_relocs); 550 if (ret) 551 goto err; 552 } 553 554 /* Bind fresh objects */ 555 list_for_each_entry(obj, objects, exec_list) { 556 if (obj->gtt_space) 557 continue; 558 559 ret = i915_gem_execbuffer_reserve_object(obj, ring, need_relocs); 560 if (ret) 561 goto err; 562 } 563 564 err: /* Decrement pin count for bound objects */ 565 list_for_each_entry(obj, objects, exec_list) 566 i915_gem_execbuffer_unreserve_object(obj); 567 568 if (ret != -ENOSPC || retry++) 569 return ret; 570 571 ret = i915_gem_evict_everything(ring->dev); 572 if (ret) 573 return ret; 574 } while (1); 575 } 576 577 static int 578 i915_gem_execbuffer_relocate_slow(struct drm_device *dev, 579 struct drm_i915_gem_execbuffer2 *args, 580 struct drm_file *file, 581 struct intel_ring_buffer *ring, 582 struct eb_objects *eb, 583 struct drm_i915_gem_exec_object2 *exec) 584 { 585 struct drm_i915_gem_relocation_entry *reloc; 586 struct drm_i915_gem_object *obj; 587 bool need_relocs; 588 int *reloc_offset; 589 int i, total, ret; 590 int count = args->buffer_count; 591 592 /* We may process another execbuffer during the unlock... */ 593 while (!list_empty(&eb->objects)) { 594 obj = list_first_entry(&eb->objects, 595 struct drm_i915_gem_object, 596 exec_list); 597 list_del_init(&obj->exec_list); 598 drm_gem_object_unreference(&obj->base); 599 } 600 601 mutex_unlock(&dev->struct_mutex); 602 603 total = 0; 604 for (i = 0; i < count; i++) 605 total += exec[i].relocation_count; 606 607 reloc_offset = drm_malloc_ab(count, sizeof(*reloc_offset)); 608 reloc = drm_malloc_ab(total, sizeof(*reloc)); 609 if (reloc == NULL || reloc_offset == NULL) { 610 drm_free_large(reloc); 611 drm_free_large(reloc_offset); 612 mutex_lock(&dev->struct_mutex); 613 return -ENOMEM; 614 } 615 616 total = 0; 617 for (i = 0; i < count; i++) { 618 struct drm_i915_gem_relocation_entry __user *user_relocs; 619 u64 invalid_offset = (u64)-1; 620 int j; 621 622 user_relocs = to_user_ptr(exec[i].relocs_ptr); 623 624 if (copy_from_user(reloc+total, user_relocs, 625 exec[i].relocation_count * sizeof(*reloc))) { 626 ret = -EFAULT; 627 mutex_lock(&dev->struct_mutex); 628 goto err; 629 } 630 631 /* As we do not update the known relocation offsets after 632 * relocating (due to the complexities in lock handling), 633 * we need to mark them as invalid now so that we force the 634 * relocation processing next time. Just in case the target 635 * object is evicted and then rebound into its old 636 * presumed_offset before the next execbuffer - if that 637 * happened we would make the mistake of assuming that the 638 * relocations were valid. 639 */ 640 for (j = 0; j < exec[i].relocation_count; j++) { 641 if (copy_to_user(&user_relocs[j].presumed_offset, 642 &invalid_offset, 643 sizeof(invalid_offset))) { 644 ret = -EFAULT; 645 mutex_lock(&dev->struct_mutex); 646 goto err; 647 } 648 } 649 650 reloc_offset[i] = total; 651 total += exec[i].relocation_count; 652 } 653 654 ret = i915_mutex_lock_interruptible(dev); 655 if (ret) { 656 mutex_lock(&dev->struct_mutex); 657 goto err; 658 } 659 660 /* reacquire the objects */ 661 eb_reset(eb); 662 ret = eb_lookup_objects(eb, exec, args, file); 663 if (ret) 664 goto err; 665 666 need_relocs = (args->flags & I915_EXEC_NO_RELOC) == 0; 667 ret = i915_gem_execbuffer_reserve(ring, &eb->objects, &need_relocs); 668 if (ret) 669 goto err; 670 671 list_for_each_entry(obj, &eb->objects, exec_list) { 672 int offset = obj->exec_entry - exec; 673 ret = i915_gem_execbuffer_relocate_object_slow(obj, eb, 674 reloc + reloc_offset[offset]); 675 if (ret) 676 goto err; 677 } 678 679 /* Leave the user relocations as are, this is the painfully slow path, 680 * and we want to avoid the complication of dropping the lock whilst 681 * having buffers reserved in the aperture and so causing spurious 682 * ENOSPC for random operations. 683 */ 684 685 err: 686 drm_free_large(reloc); 687 drm_free_large(reloc_offset); 688 return ret; 689 } 690 691 static int 692 i915_gem_execbuffer_move_to_gpu(struct intel_ring_buffer *ring, 693 struct list_head *objects) 694 { 695 struct drm_i915_gem_object *obj; 696 uint32_t flush_domains = 0; 697 int ret; 698 699 list_for_each_entry(obj, objects, exec_list) { 700 ret = i915_gem_object_sync(obj, ring); 701 if (ret) 702 return ret; 703 704 if (obj->base.write_domain & I915_GEM_DOMAIN_CPU) 705 i915_gem_clflush_object(obj); 706 707 flush_domains |= obj->base.write_domain; 708 } 709 710 if (flush_domains & I915_GEM_DOMAIN_CPU) 711 i915_gem_chipset_flush(ring->dev); 712 713 if (flush_domains & I915_GEM_DOMAIN_GTT) 714 cpu_sfence(); 715 716 /* Unconditionally invalidate gpu caches and ensure that we do flush 717 * any residual writes from the previous batch. 718 */ 719 return intel_ring_invalidate_all_caches(ring); 720 } 721 722 static bool 723 i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec) 724 { 725 if (exec->flags & __I915_EXEC_UNKNOWN_FLAGS) 726 return false; 727 728 return ((exec->batch_start_offset | exec->batch_len) & 0x7) == 0; 729 } 730 731 static int 732 validate_exec_list(struct drm_i915_gem_exec_object2 *exec, 733 int count) 734 { 735 int i; 736 int relocs_total = 0; 737 int relocs_max = INT_MAX / sizeof(struct drm_i915_gem_relocation_entry); 738 739 for (i = 0; i < count; i++) { 740 #if 0 741 char __user *ptr = to_user_ptr(exec[i].relocs_ptr); 742 #endif 743 int length; /* limited by fault_in_pages_readable() */ 744 745 if (exec[i].flags & __EXEC_OBJECT_UNKNOWN_FLAGS) 746 return -EINVAL; 747 748 /* First check for malicious input causing overflow in 749 * the worst case where we need to allocate the entire 750 * relocation tree as a single array. 751 */ 752 if (exec[i].relocation_count > relocs_max - relocs_total) 753 return -EINVAL; 754 relocs_total += exec[i].relocation_count; 755 756 length = exec[i].relocation_count * 757 sizeof(struct drm_i915_gem_relocation_entry); 758 #if 0 759 /* 760 * We must check that the entire relocation array is safe 761 * to read, but since we may need to update the presumed 762 * offsets during execution, check for full write access. 763 */ 764 if (!access_ok(VERIFY_WRITE, ptr, length)) 765 return -EFAULT; 766 767 if (fault_in_multipages_readable(ptr, length)) 768 return -EFAULT; 769 #endif 770 } 771 772 return 0; 773 } 774 775 static void 776 i915_gem_execbuffer_move_to_active(struct list_head *objects, 777 struct intel_ring_buffer *ring) 778 { 779 struct drm_i915_gem_object *obj; 780 781 list_for_each_entry(obj, objects, exec_list) { 782 783 obj->base.write_domain = obj->base.pending_write_domain; 784 if (obj->base.write_domain == 0) 785 obj->base.pending_read_domains |= obj->base.read_domains; 786 obj->base.read_domains = obj->base.pending_read_domains; 787 obj->fenced_gpu_access = obj->pending_fenced_gpu_access; 788 789 i915_gem_object_move_to_active(obj, ring); 790 if (obj->base.write_domain) { 791 obj->dirty = 1; 792 obj->last_write_seqno = intel_ring_get_seqno(ring); 793 if (obj->pin_count) /* check for potential scanout */ 794 intel_mark_fb_busy(obj); 795 } 796 797 trace_i915_gem_object_change_domain(obj, old_read, old_write); 798 } 799 } 800 801 static void 802 i915_gem_execbuffer_retire_commands(struct drm_device *dev, 803 struct drm_file *file, 804 struct intel_ring_buffer *ring) 805 { 806 /* Unconditionally force add_request to emit a full flush. */ 807 ring->gpu_caches_dirty = true; 808 809 /* Add a breadcrumb for the completion of the batch buffer */ 810 (void)i915_add_request(ring, file, NULL); 811 } 812 813 static int 814 i915_reset_gen7_sol_offsets(struct drm_device *dev, 815 struct intel_ring_buffer *ring) 816 { 817 drm_i915_private_t *dev_priv = dev->dev_private; 818 int ret, i; 819 820 if (!IS_GEN7(dev) || ring != &dev_priv->ring[RCS]) 821 return 0; 822 823 ret = intel_ring_begin(ring, 4 * 3); 824 if (ret) 825 return ret; 826 827 for (i = 0; i < 4; i++) { 828 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); 829 intel_ring_emit(ring, GEN7_SO_WRITE_OFFSET(i)); 830 intel_ring_emit(ring, 0); 831 } 832 833 intel_ring_advance(ring); 834 835 return 0; 836 } 837 838 static int 839 i915_gem_do_execbuffer(struct drm_device *dev, void *data, 840 struct drm_file *file, 841 struct drm_i915_gem_execbuffer2 *args, 842 struct drm_i915_gem_exec_object2 *exec) 843 { 844 drm_i915_private_t *dev_priv = dev->dev_private; 845 struct eb_objects *eb; 846 struct drm_i915_gem_object *batch_obj; 847 struct drm_clip_rect *cliprects = NULL; 848 struct intel_ring_buffer *ring; 849 u32 ctx_id = i915_execbuffer2_get_context_id(*args); 850 u32 exec_start, exec_len; 851 u32 mask, flags; 852 int ret, mode, i; 853 bool need_relocs; 854 855 if (!i915_gem_check_execbuffer(args)) 856 return -EINVAL; 857 858 ret = validate_exec_list(exec, args->buffer_count); 859 if (ret) 860 return ret; 861 862 flags = 0; 863 if (args->flags & I915_EXEC_SECURE) { 864 flags |= I915_DISPATCH_SECURE; 865 } 866 if (args->flags & I915_EXEC_IS_PINNED) 867 flags |= I915_DISPATCH_PINNED; 868 869 switch (args->flags & I915_EXEC_RING_MASK) { 870 case I915_EXEC_DEFAULT: 871 case I915_EXEC_RENDER: 872 ring = &dev_priv->ring[RCS]; 873 break; 874 case I915_EXEC_BSD: 875 ring = &dev_priv->ring[VCS]; 876 if (ctx_id != 0) { 877 DRM_DEBUG("Ring %s doesn't support contexts\n", 878 ring->name); 879 return -EPERM; 880 } 881 break; 882 case I915_EXEC_BLT: 883 ring = &dev_priv->ring[BCS]; 884 if (ctx_id != 0) { 885 DRM_DEBUG("Ring %s doesn't support contexts\n", 886 ring->name); 887 return -EPERM; 888 } 889 break; 890 default: 891 DRM_DEBUG("execbuf with unknown ring: %d\n", 892 (int)(args->flags & I915_EXEC_RING_MASK)); 893 return -EINVAL; 894 } 895 if (!intel_ring_initialized(ring)) { 896 DRM_DEBUG("execbuf with invalid ring: %d\n", 897 (int)(args->flags & I915_EXEC_RING_MASK)); 898 return -EINVAL; 899 } 900 901 mode = args->flags & I915_EXEC_CONSTANTS_MASK; 902 mask = I915_EXEC_CONSTANTS_MASK; 903 switch (mode) { 904 case I915_EXEC_CONSTANTS_REL_GENERAL: 905 case I915_EXEC_CONSTANTS_ABSOLUTE: 906 case I915_EXEC_CONSTANTS_REL_SURFACE: 907 if (ring == &dev_priv->ring[RCS] && 908 mode != dev_priv->relative_constants_mode) { 909 if (INTEL_INFO(dev)->gen < 4) 910 return -EINVAL; 911 912 if (INTEL_INFO(dev)->gen > 5 && 913 mode == I915_EXEC_CONSTANTS_REL_SURFACE) 914 return -EINVAL; 915 916 /* The HW changed the meaning on this bit on gen6 */ 917 if (INTEL_INFO(dev)->gen >= 6) 918 mask &= ~I915_EXEC_CONSTANTS_REL_SURFACE; 919 } 920 break; 921 default: 922 DRM_DEBUG("execbuf with unknown constants: %d\n", mode); 923 return -EINVAL; 924 } 925 926 if (args->buffer_count < 1) { 927 DRM_DEBUG("execbuf with %d buffers\n", args->buffer_count); 928 return -EINVAL; 929 } 930 931 if (args->num_cliprects != 0) { 932 if (ring != &dev_priv->ring[RCS]) { 933 DRM_DEBUG("clip rectangles are only valid with the render ring\n"); 934 return -EINVAL; 935 } 936 937 if (INTEL_INFO(dev)->gen >= 5) { 938 DRM_DEBUG("clip rectangles are only valid on pre-gen5\n"); 939 return -EINVAL; 940 } 941 942 if (args->num_cliprects > UINT_MAX / sizeof(*cliprects)) { 943 DRM_DEBUG("execbuf with %u cliprects\n", 944 args->num_cliprects); 945 return -EINVAL; 946 } 947 948 cliprects = kmalloc(args->num_cliprects * sizeof(*cliprects), 949 M_DRM, M_WAITOK); 950 if (cliprects == NULL) { 951 ret = -ENOMEM; 952 goto pre_mutex_err; 953 } 954 955 if (copy_from_user(cliprects, 956 to_user_ptr(args->cliprects_ptr), 957 sizeof(*cliprects)*args->num_cliprects)) { 958 ret = -EFAULT; 959 goto pre_mutex_err; 960 } 961 } 962 963 ret = i915_mutex_lock_interruptible(dev); 964 if (ret) 965 goto pre_mutex_err; 966 967 if (dev_priv->mm.suspended) { 968 mutex_unlock(&dev->struct_mutex); 969 ret = -EBUSY; 970 goto pre_mutex_err; 971 } 972 973 eb = eb_create(args); 974 if (eb == NULL) { 975 mutex_unlock(&dev->struct_mutex); 976 ret = -ENOMEM; 977 goto pre_mutex_err; 978 } 979 980 /* Look up object handles */ 981 ret = eb_lookup_objects(eb, exec, args, file); 982 if (ret) 983 goto err; 984 985 /* take note of the batch buffer before we might reorder the lists */ 986 batch_obj = list_entry(eb->objects.prev, 987 struct drm_i915_gem_object, 988 exec_list); 989 990 /* Move the objects en-masse into the GTT, evicting if necessary. */ 991 need_relocs = (args->flags & I915_EXEC_NO_RELOC) == 0; 992 ret = i915_gem_execbuffer_reserve(ring, &eb->objects, &need_relocs); 993 if (ret) 994 goto err; 995 996 /* The objects are in their final locations, apply the relocations. */ 997 if (need_relocs) 998 ret = i915_gem_execbuffer_relocate(eb); 999 if (ret) { 1000 if (ret == -EFAULT) { 1001 ret = i915_gem_execbuffer_relocate_slow(dev, args, file, ring, 1002 eb, exec); 1003 DRM_LOCK_ASSERT(dev); 1004 } 1005 if (ret) 1006 goto err; 1007 } 1008 1009 /* Set the pending read domains for the batch buffer to COMMAND */ 1010 if (batch_obj->base.pending_write_domain) { 1011 DRM_DEBUG("Attempting to use self-modifying batch buffer\n"); 1012 ret = -EINVAL; 1013 goto err; 1014 } 1015 batch_obj->base.pending_read_domains |= I915_GEM_DOMAIN_COMMAND; 1016 1017 /* snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure 1018 * batch" bit. Hence we need to pin secure batches into the global gtt. 1019 * hsw should have this fixed, but let's be paranoid and do it 1020 * unconditionally for now. */ 1021 if (flags & I915_DISPATCH_SECURE && !batch_obj->has_global_gtt_mapping) 1022 i915_gem_gtt_bind_object(batch_obj, batch_obj->cache_level); 1023 1024 ret = i915_gem_execbuffer_move_to_gpu(ring, &eb->objects); 1025 if (ret) 1026 goto err; 1027 1028 ret = i915_switch_context(ring, file, ctx_id); 1029 if (ret) 1030 goto err; 1031 1032 if (ring == &dev_priv->ring[RCS] && 1033 mode != dev_priv->relative_constants_mode) { 1034 ret = intel_ring_begin(ring, 4); 1035 if (ret) 1036 goto err; 1037 1038 intel_ring_emit(ring, MI_NOOP); 1039 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); 1040 intel_ring_emit(ring, INSTPM); 1041 intel_ring_emit(ring, mask << 16 | mode); 1042 intel_ring_advance(ring); 1043 1044 dev_priv->relative_constants_mode = mode; 1045 } 1046 1047 if (args->flags & I915_EXEC_GEN7_SOL_RESET) { 1048 ret = i915_reset_gen7_sol_offsets(dev, ring); 1049 if (ret) 1050 goto err; 1051 } 1052 1053 exec_start = batch_obj->gtt_offset + args->batch_start_offset; 1054 exec_len = args->batch_len; 1055 if (cliprects) { 1056 for (i = 0; i < args->num_cliprects; i++) { 1057 ret = i915_emit_box(dev, &cliprects[i], 1058 args->DR1, args->DR4); 1059 if (ret) 1060 goto err; 1061 1062 ret = ring->dispatch_execbuffer(ring, 1063 exec_start, exec_len, 1064 flags); 1065 if (ret) 1066 goto err; 1067 } 1068 } else { 1069 ret = ring->dispatch_execbuffer(ring, 1070 exec_start, exec_len, 1071 flags); 1072 if (ret) 1073 goto err; 1074 } 1075 1076 trace_i915_gem_ring_dispatch(ring, intel_ring_get_seqno(ring), flags); 1077 1078 i915_gem_execbuffer_move_to_active(&eb->objects, ring); 1079 i915_gem_execbuffer_retire_commands(dev, file, ring); 1080 1081 err: 1082 eb_destroy(eb); 1083 1084 mutex_unlock(&dev->struct_mutex); 1085 1086 pre_mutex_err: 1087 drm_free(cliprects, M_DRM); 1088 return ret; 1089 } 1090 1091 /* 1092 * Legacy execbuffer just creates an exec2 list from the original exec object 1093 * list array and passes it to the real function. 1094 */ 1095 int 1096 i915_gem_execbuffer(struct drm_device *dev, void *data, 1097 struct drm_file *file) 1098 { 1099 struct drm_i915_gem_execbuffer *args = data; 1100 struct drm_i915_gem_execbuffer2 exec2; 1101 struct drm_i915_gem_exec_object *exec_list = NULL; 1102 struct drm_i915_gem_exec_object2 *exec2_list = NULL; 1103 int ret, i; 1104 1105 if (args->buffer_count < 1) { 1106 DRM_DEBUG("execbuf with %d buffers\n", args->buffer_count); 1107 return -EINVAL; 1108 } 1109 1110 /* Copy in the exec list from userland */ 1111 exec_list = drm_malloc_ab(sizeof(*exec_list), args->buffer_count); 1112 exec2_list = drm_malloc_ab(sizeof(*exec2_list), args->buffer_count); 1113 if (exec_list == NULL || exec2_list == NULL) { 1114 DRM_DEBUG("Failed to allocate exec list for %d buffers\n", 1115 args->buffer_count); 1116 drm_free_large(exec_list); 1117 drm_free_large(exec2_list); 1118 return -ENOMEM; 1119 } 1120 ret = copy_from_user(exec_list, 1121 to_user_ptr(args->buffers_ptr), 1122 sizeof(*exec_list) * args->buffer_count); 1123 if (ret != 0) { 1124 DRM_DEBUG("copy %d exec entries failed %d\n", 1125 args->buffer_count, ret); 1126 drm_free_large(exec_list); 1127 drm_free_large(exec2_list); 1128 return -EFAULT; 1129 } 1130 1131 for (i = 0; i < args->buffer_count; i++) { 1132 exec2_list[i].handle = exec_list[i].handle; 1133 exec2_list[i].relocation_count = exec_list[i].relocation_count; 1134 exec2_list[i].relocs_ptr = exec_list[i].relocs_ptr; 1135 exec2_list[i].alignment = exec_list[i].alignment; 1136 exec2_list[i].offset = exec_list[i].offset; 1137 if (INTEL_INFO(dev)->gen < 4) 1138 exec2_list[i].flags = EXEC_OBJECT_NEEDS_FENCE; 1139 else 1140 exec2_list[i].flags = 0; 1141 } 1142 1143 exec2.buffers_ptr = args->buffers_ptr; 1144 exec2.buffer_count = args->buffer_count; 1145 exec2.batch_start_offset = args->batch_start_offset; 1146 exec2.batch_len = args->batch_len; 1147 exec2.DR1 = args->DR1; 1148 exec2.DR4 = args->DR4; 1149 exec2.num_cliprects = args->num_cliprects; 1150 exec2.cliprects_ptr = args->cliprects_ptr; 1151 exec2.flags = I915_EXEC_RENDER; 1152 i915_execbuffer2_set_context_id(exec2, 0); 1153 1154 ret = i915_gem_do_execbuffer(dev, data, file, &exec2, exec2_list); 1155 if (!ret) { 1156 /* Copy the new buffer offsets back to the user's exec list. */ 1157 for (i = 0; i < args->buffer_count; i++) 1158 exec_list[i].offset = exec2_list[i].offset; 1159 /* ... and back out to userspace */ 1160 ret = copy_to_user(to_user_ptr(args->buffers_ptr), 1161 exec_list, 1162 sizeof(*exec_list) * args->buffer_count); 1163 if (ret) { 1164 ret = -EFAULT; 1165 DRM_DEBUG("failed to copy %d exec entries " 1166 "back to user (%d)\n", 1167 args->buffer_count, ret); 1168 } 1169 } 1170 1171 drm_free_large(exec_list); 1172 drm_free_large(exec2_list); 1173 return ret; 1174 } 1175 1176 int 1177 i915_gem_execbuffer2(struct drm_device *dev, void *data, 1178 struct drm_file *file) 1179 { 1180 struct drm_i915_gem_execbuffer2 *args = data; 1181 struct drm_i915_gem_exec_object2 *exec2_list = NULL; 1182 int ret; 1183 1184 if (args->buffer_count < 1 || 1185 args->buffer_count > UINT_MAX / sizeof(*exec2_list)) { 1186 DRM_DEBUG("execbuf2 with %d buffers\n", args->buffer_count); 1187 return -EINVAL; 1188 } 1189 1190 exec2_list = kmalloc(sizeof(*exec2_list)*args->buffer_count, 1191 M_DRM, M_WAITOK); 1192 if (exec2_list == NULL) 1193 exec2_list = drm_malloc_ab(sizeof(*exec2_list), 1194 args->buffer_count); 1195 if (exec2_list == NULL) { 1196 DRM_DEBUG("Failed to allocate exec list for %d buffers\n", 1197 args->buffer_count); 1198 return -ENOMEM; 1199 } 1200 ret = copy_from_user(exec2_list, 1201 to_user_ptr(args->buffers_ptr), 1202 sizeof(*exec2_list) * args->buffer_count); 1203 if (ret != 0) { 1204 DRM_DEBUG("copy %d exec entries failed %d\n", 1205 args->buffer_count, ret); 1206 drm_free_large(exec2_list); 1207 return -EFAULT; 1208 } 1209 1210 ret = i915_gem_do_execbuffer(dev, data, file, args, exec2_list); 1211 if (!ret) { 1212 /* Copy the new buffer offsets back to the user's exec list. */ 1213 ret = copy_to_user(to_user_ptr(args->buffers_ptr), 1214 exec2_list, 1215 sizeof(*exec2_list) * args->buffer_count); 1216 if (ret) { 1217 ret = -EFAULT; 1218 DRM_DEBUG("failed to copy %d exec entries " 1219 "back to user (%d)\n", 1220 args->buffer_count, ret); 1221 } 1222 } 1223 1224 drm_free_large(exec2_list); 1225 return ret; 1226 } 1227