1 /* 2 * Copyright © 2008-2015 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Eric Anholt <eric@anholt.net> 25 * 26 */ 27 28 #include <drm/drmP.h> 29 #include <drm/drm_vma_manager.h> 30 #include <drm/i915_drm.h> 31 #include "i915_drv.h" 32 #include "i915_vgpu.h" 33 #include "i915_trace.h" 34 #include "intel_drv.h" 35 #include "intel_mocs.h" 36 #include <linux/shmem_fs.h> 37 #include <linux/slab.h> 38 #include <linux/swap.h> 39 #include <linux/pci.h> 40 41 static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj); 42 static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj); 43 static void 44 i915_gem_object_retire__write(struct drm_i915_gem_object *obj); 45 static void 46 i915_gem_object_retire__read(struct drm_i915_gem_object *obj, int ring); 47 48 static bool cpu_cache_is_coherent(struct drm_device *dev, 49 enum i915_cache_level level) 50 { 51 return HAS_LLC(dev) || level != I915_CACHE_NONE; 52 } 53 54 static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj) 55 { 56 if (!cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) 57 return true; 58 59 return obj->pin_display; 60 } 61 62 /* some bookkeeping */ 63 static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv, 64 size_t size) 65 { 66 spin_lock(&dev_priv->mm.object_stat_lock); 67 dev_priv->mm.object_count++; 68 dev_priv->mm.object_memory += size; 69 spin_unlock(&dev_priv->mm.object_stat_lock); 70 } 71 72 static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv, 73 size_t size) 74 { 75 spin_lock(&dev_priv->mm.object_stat_lock); 76 dev_priv->mm.object_count--; 77 dev_priv->mm.object_memory -= size; 78 spin_unlock(&dev_priv->mm.object_stat_lock); 79 } 80 81 static int 82 i915_gem_wait_for_error(struct i915_gpu_error *error) 83 { 84 int ret; 85 86 if (!i915_reset_in_progress(error)) 87 return 0; 88 89 /* 90 * Only wait 10 seconds for the gpu reset to complete to avoid hanging 91 * userspace. If it takes that long something really bad is going on and 92 * we should simply try to bail out and fail as gracefully as possible. 93 */ 94 ret = wait_event_interruptible_timeout(error->reset_queue, 95 !i915_reset_in_progress(error), 96 10*HZ); 97 if (ret == 0) { 98 DRM_ERROR("Timed out waiting for the gpu reset to complete\n"); 99 return -EIO; 100 } else if (ret < 0) { 101 return ret; 102 } else { 103 return 0; 104 } 105 } 106 107 int i915_mutex_lock_interruptible(struct drm_device *dev) 108 { 109 struct drm_i915_private *dev_priv = dev->dev_private; 110 int ret; 111 112 ret = i915_gem_wait_for_error(&dev_priv->gpu_error); 113 if (ret) 114 return ret; 115 116 ret = mutex_lock_interruptible(&dev->struct_mutex); 117 if (ret) 118 return ret; 119 120 WARN_ON(i915_verify_lists(dev)); 121 return 0; 122 } 123 124 int 125 i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, 126 struct drm_file *file) 127 { 128 struct drm_i915_private *dev_priv = to_i915(dev); 129 struct i915_ggtt *ggtt = &dev_priv->ggtt; 130 struct drm_i915_gem_get_aperture *args = data; 131 struct i915_vma *vma; 132 size_t pinned; 133 134 pinned = 0; 135 mutex_lock(&dev->struct_mutex); 136 list_for_each_entry(vma, &ggtt->base.active_list, vm_link) 137 if (vma->pin_count) 138 pinned += vma->node.size; 139 list_for_each_entry(vma, &ggtt->base.inactive_list, vm_link) 140 if (vma->pin_count) 141 pinned += vma->node.size; 142 mutex_unlock(&dev->struct_mutex); 143 144 args->aper_size = ggtt->base.total; 145 args->aper_available_size = args->aper_size - pinned; 146 147 return 0; 148 } 149 150 #if 0 151 static int 152 i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj) 153 { 154 struct address_space *mapping = file_inode(obj->base.filp)->i_mapping; 155 char *vaddr = obj->phys_handle->vaddr; 156 struct sg_table *st; 157 struct scatterlist *sg; 158 int i; 159 160 if (WARN_ON(i915_gem_object_needs_bit17_swizzle(obj))) 161 return -EINVAL; 162 163 for (i = 0; i < obj->base.size / PAGE_SIZE; i++) { 164 struct page *page; 165 char *src; 166 167 page = shmem_read_mapping_page(mapping, i); 168 if (IS_ERR(page)) 169 return PTR_ERR(page); 170 171 src = kmap_atomic(page); 172 memcpy(vaddr, src, PAGE_SIZE); 173 drm_clflush_virt_range(vaddr, PAGE_SIZE); 174 kunmap_atomic(src); 175 176 put_page(page); 177 vaddr += PAGE_SIZE; 178 } 179 180 i915_gem_chipset_flush(obj->base.dev); 181 182 st = kmalloc(sizeof(*st), GFP_KERNEL); 183 if (st == NULL) 184 return -ENOMEM; 185 186 if (sg_alloc_table(st, 1, GFP_KERNEL)) { 187 kfree(st); 188 return -ENOMEM; 189 } 190 191 sg = st->sgl; 192 sg->offset = 0; 193 sg->length = obj->base.size; 194 195 sg_dma_address(sg) = obj->phys_handle->busaddr; 196 sg_dma_len(sg) = obj->base.size; 197 198 obj->pages = st; 199 return 0; 200 } 201 202 static void 203 i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj) 204 { 205 int ret; 206 207 BUG_ON(obj->madv == __I915_MADV_PURGED); 208 209 ret = i915_gem_object_set_to_cpu_domain(obj, true); 210 if (WARN_ON(ret)) { 211 /* In the event of a disaster, abandon all caches and 212 * hope for the best. 213 */ 214 obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU; 215 } 216 217 if (obj->madv == I915_MADV_DONTNEED) 218 obj->dirty = 0; 219 220 if (obj->dirty) { 221 struct address_space *mapping = file_inode(obj->base.filp)->i_mapping; 222 char *vaddr = obj->phys_handle->vaddr; 223 int i; 224 225 for (i = 0; i < obj->base.size / PAGE_SIZE; i++) { 226 struct page *page; 227 char *dst; 228 229 page = shmem_read_mapping_page(mapping, i); 230 if (IS_ERR(page)) 231 continue; 232 233 dst = kmap_atomic(page); 234 drm_clflush_virt_range(vaddr, PAGE_SIZE); 235 memcpy(dst, vaddr, PAGE_SIZE); 236 kunmap_atomic(dst); 237 238 set_page_dirty(page); 239 if (obj->madv == I915_MADV_WILLNEED) 240 mark_page_accessed(page); 241 put_page(page); 242 vaddr += PAGE_SIZE; 243 } 244 obj->dirty = 0; 245 } 246 247 sg_free_table(obj->pages); 248 kfree(obj->pages); 249 } 250 251 static void 252 i915_gem_object_release_phys(struct drm_i915_gem_object *obj) 253 { 254 drm_pci_free(obj->base.dev, obj->phys_handle); 255 } 256 257 static const struct drm_i915_gem_object_ops i915_gem_phys_ops = { 258 .get_pages = i915_gem_object_get_pages_phys, 259 .put_pages = i915_gem_object_put_pages_phys, 260 .release = i915_gem_object_release_phys, 261 }; 262 #endif 263 264 static int 265 drop_pages(struct drm_i915_gem_object *obj) 266 { 267 struct i915_vma *vma, *next; 268 int ret; 269 270 drm_gem_object_reference(&obj->base); 271 list_for_each_entry_safe(vma, next, &obj->vma_list, obj_link) 272 if (i915_vma_unbind(vma)) 273 break; 274 275 ret = i915_gem_object_put_pages(obj); 276 drm_gem_object_unreference(&obj->base); 277 278 return ret; 279 } 280 281 int 282 i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, 283 int align) 284 { 285 drm_dma_handle_t *phys; 286 int ret; 287 288 if (obj->phys_handle) { 289 if ((unsigned long)obj->phys_handle->vaddr & (align -1)) 290 return -EBUSY; 291 292 return 0; 293 } 294 295 if (obj->madv != I915_MADV_WILLNEED) 296 return -EFAULT; 297 298 #if 0 299 if (obj->base.filp == NULL) 300 return -EINVAL; 301 #endif 302 303 ret = drop_pages(obj); 304 if (ret) 305 return ret; 306 307 /* create a new object */ 308 phys = drm_pci_alloc(obj->base.dev, obj->base.size, align); 309 if (!phys) 310 return -ENOMEM; 311 312 obj->phys_handle = phys; 313 #if 0 314 obj->ops = &i915_gem_phys_ops; 315 #endif 316 317 return i915_gem_object_get_pages(obj); 318 } 319 320 static int 321 i915_gem_phys_pwrite(struct drm_i915_gem_object *obj, 322 struct drm_i915_gem_pwrite *args, 323 struct drm_file *file_priv) 324 { 325 struct drm_device *dev = obj->base.dev; 326 void *vaddr = (char *)obj->phys_handle->vaddr + args->offset; 327 char __user *user_data = u64_to_user_ptr(args->data_ptr); 328 int ret = 0; 329 330 /* We manually control the domain here and pretend that it 331 * remains coherent i.e. in the GTT domain, like shmem_pwrite. 332 */ 333 ret = i915_gem_object_wait_rendering(obj, false); 334 if (ret) 335 return ret; 336 337 intel_fb_obj_invalidate(obj, ORIGIN_CPU); 338 if (__copy_from_user_inatomic_nocache(vaddr, user_data, args->size)) { 339 unsigned long unwritten; 340 341 /* The physical object once assigned is fixed for the lifetime 342 * of the obj, so we can safely drop the lock and continue 343 * to access vaddr. 344 */ 345 mutex_unlock(&dev->struct_mutex); 346 unwritten = copy_from_user(vaddr, user_data, args->size); 347 mutex_lock(&dev->struct_mutex); 348 if (unwritten) { 349 ret = -EFAULT; 350 goto out; 351 } 352 } 353 354 drm_clflush_virt_range(vaddr, args->size); 355 i915_gem_chipset_flush(dev); 356 357 out: 358 intel_fb_obj_flush(obj, false, ORIGIN_CPU); 359 return ret; 360 } 361 362 void *i915_gem_object_alloc(struct drm_device *dev) 363 { 364 return kmalloc(sizeof(struct drm_i915_gem_object), 365 M_DRM, M_WAITOK | M_ZERO); 366 } 367 368 void i915_gem_object_free(struct drm_i915_gem_object *obj) 369 { 370 kfree(obj); 371 } 372 373 static int 374 i915_gem_create(struct drm_file *file, 375 struct drm_device *dev, 376 uint64_t size, 377 uint32_t *handle_p) 378 { 379 struct drm_i915_gem_object *obj; 380 int ret; 381 u32 handle; 382 383 size = roundup(size, PAGE_SIZE); 384 if (size == 0) 385 return -EINVAL; 386 387 /* Allocate the new object */ 388 obj = i915_gem_alloc_object(dev, size); 389 if (obj == NULL) 390 return -ENOMEM; 391 392 ret = drm_gem_handle_create(file, &obj->base, &handle); 393 /* drop reference from allocate - handle holds it now */ 394 drm_gem_object_unreference_unlocked(&obj->base); 395 if (ret) 396 return ret; 397 398 *handle_p = handle; 399 return 0; 400 } 401 402 int 403 i915_gem_dumb_create(struct drm_file *file, 404 struct drm_device *dev, 405 struct drm_mode_create_dumb *args) 406 { 407 /* have to work out size/pitch and return them */ 408 args->pitch = ALIGN(args->width * DIV_ROUND_UP(args->bpp, 8), 64); 409 args->size = args->pitch * args->height; 410 return i915_gem_create(file, dev, 411 args->size, &args->handle); 412 } 413 414 /** 415 * Creates a new mm object and returns a handle to it. 416 */ 417 int 418 i915_gem_create_ioctl(struct drm_device *dev, void *data, 419 struct drm_file *file) 420 { 421 struct drm_i915_gem_create *args = data; 422 423 return i915_gem_create(file, dev, 424 args->size, &args->handle); 425 } 426 427 static inline int 428 __copy_to_user_swizzled(char __user *cpu_vaddr, 429 const char *gpu_vaddr, int gpu_offset, 430 int length) 431 { 432 int ret, cpu_offset = 0; 433 434 while (length > 0) { 435 int cacheline_end = ALIGN(gpu_offset + 1, 64); 436 int this_length = min(cacheline_end - gpu_offset, length); 437 int swizzled_gpu_offset = gpu_offset ^ 64; 438 439 ret = __copy_to_user(cpu_vaddr + cpu_offset, 440 gpu_vaddr + swizzled_gpu_offset, 441 this_length); 442 if (ret) 443 return ret + length; 444 445 cpu_offset += this_length; 446 gpu_offset += this_length; 447 length -= this_length; 448 } 449 450 return 0; 451 } 452 453 static inline int 454 __copy_from_user_swizzled(char *gpu_vaddr, int gpu_offset, 455 const char __user *cpu_vaddr, 456 int length) 457 { 458 int ret, cpu_offset = 0; 459 460 while (length > 0) { 461 int cacheline_end = ALIGN(gpu_offset + 1, 64); 462 int this_length = min(cacheline_end - gpu_offset, length); 463 int swizzled_gpu_offset = gpu_offset ^ 64; 464 465 ret = __copy_from_user(gpu_vaddr + swizzled_gpu_offset, 466 cpu_vaddr + cpu_offset, 467 this_length); 468 if (ret) 469 return ret + length; 470 471 cpu_offset += this_length; 472 gpu_offset += this_length; 473 length -= this_length; 474 } 475 476 return 0; 477 } 478 479 /* 480 * Pins the specified object's pages and synchronizes the object with 481 * GPU accesses. Sets needs_clflush to non-zero if the caller should 482 * flush the object from the CPU cache. 483 */ 484 int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj, 485 int *needs_clflush) 486 { 487 int ret; 488 489 *needs_clflush = 0; 490 491 #if 0 492 if (WARN_ON((obj->ops->flags & I915_GEM_OBJECT_HAS_STRUCT_PAGE) == 0)) 493 return -EINVAL; 494 #endif 495 496 if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)) { 497 /* If we're not in the cpu read domain, set ourself into the gtt 498 * read domain and manually flush cachelines (if required). This 499 * optimizes for the case when the gpu will dirty the data 500 * anyway again before the next pread happens. */ 501 *needs_clflush = !cpu_cache_is_coherent(obj->base.dev, 502 obj->cache_level); 503 ret = i915_gem_object_wait_rendering(obj, true); 504 if (ret) 505 return ret; 506 } 507 508 ret = i915_gem_object_get_pages(obj); 509 if (ret) 510 return ret; 511 512 i915_gem_object_pin_pages(obj); 513 514 return ret; 515 } 516 517 /* Per-page copy function for the shmem pread fastpath. 518 * Flushes invalid cachelines before reading the target if 519 * needs_clflush is set. */ 520 static int 521 shmem_pread_fast(struct vm_page *page, int shmem_page_offset, int page_length, 522 char __user *user_data, 523 bool page_do_bit17_swizzling, bool needs_clflush) 524 { 525 char *vaddr; 526 int ret; 527 528 if (unlikely(page_do_bit17_swizzling)) 529 return -EINVAL; 530 531 vaddr = kmap_atomic(page); 532 if (needs_clflush) 533 drm_clflush_virt_range(vaddr + shmem_page_offset, 534 page_length); 535 ret = __copy_to_user_inatomic(user_data, 536 vaddr + shmem_page_offset, 537 page_length); 538 kunmap_atomic(vaddr); 539 540 return ret ? -EFAULT : 0; 541 } 542 543 static void 544 shmem_clflush_swizzled_range(char *addr, unsigned long length, 545 bool swizzled) 546 { 547 if (unlikely(swizzled)) { 548 unsigned long start = (unsigned long) addr; 549 unsigned long end = (unsigned long) addr + length; 550 551 /* For swizzling simply ensure that we always flush both 552 * channels. Lame, but simple and it works. Swizzled 553 * pwrite/pread is far from a hotpath - current userspace 554 * doesn't use it at all. */ 555 start = round_down(start, 128); 556 end = round_up(end, 128); 557 558 drm_clflush_virt_range((void *)start, end - start); 559 } else { 560 drm_clflush_virt_range(addr, length); 561 } 562 563 } 564 565 /* Only difference to the fast-path function is that this can handle bit17 566 * and uses non-atomic copy and kmap functions. */ 567 static int 568 shmem_pread_slow(struct vm_page *page, int shmem_page_offset, int page_length, 569 char __user *user_data, 570 bool page_do_bit17_swizzling, bool needs_clflush) 571 { 572 char *vaddr; 573 int ret; 574 575 vaddr = kmap(page); 576 if (needs_clflush) 577 shmem_clflush_swizzled_range(vaddr + shmem_page_offset, 578 page_length, 579 page_do_bit17_swizzling); 580 581 if (page_do_bit17_swizzling) 582 ret = __copy_to_user_swizzled(user_data, 583 vaddr, shmem_page_offset, 584 page_length); 585 else 586 ret = __copy_to_user(user_data, 587 vaddr + shmem_page_offset, 588 page_length); 589 kunmap(page); 590 591 return ret ? - EFAULT : 0; 592 } 593 594 static int 595 i915_gem_shmem_pread(struct drm_device *dev, 596 struct drm_i915_gem_object *obj, 597 struct drm_i915_gem_pread *args, 598 struct drm_file *file) 599 { 600 char __user *user_data; 601 ssize_t remain; 602 loff_t offset; 603 int shmem_page_offset, page_length, ret = 0; 604 int obj_do_bit17_swizzling, page_do_bit17_swizzling; 605 int prefaulted = 0; 606 int needs_clflush = 0; 607 struct sg_page_iter sg_iter; 608 609 user_data = u64_to_user_ptr(args->data_ptr); 610 remain = args->size; 611 612 obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj); 613 614 ret = i915_gem_obj_prepare_shmem_read(obj, &needs_clflush); 615 if (ret) 616 return ret; 617 618 offset = args->offset; 619 620 for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 621 offset >> PAGE_SHIFT) { 622 struct vm_page *page = sg_page_iter_page(&sg_iter); 623 624 if (remain <= 0) 625 break; 626 627 /* Operation in this page 628 * 629 * shmem_page_offset = offset within page in shmem file 630 * page_length = bytes to copy for this page 631 */ 632 shmem_page_offset = offset_in_page(offset); 633 page_length = remain; 634 if ((shmem_page_offset + page_length) > PAGE_SIZE) 635 page_length = PAGE_SIZE - shmem_page_offset; 636 637 page_do_bit17_swizzling = obj_do_bit17_swizzling && 638 (page_to_phys(page) & (1 << 17)) != 0; 639 640 ret = shmem_pread_fast(page, shmem_page_offset, page_length, 641 user_data, page_do_bit17_swizzling, 642 needs_clflush); 643 if (ret == 0) 644 goto next_page; 645 646 mutex_unlock(&dev->struct_mutex); 647 648 if (likely(!i915.prefault_disable) && !prefaulted) { 649 ret = fault_in_multipages_writeable(user_data, remain); 650 /* Userspace is tricking us, but we've already clobbered 651 * its pages with the prefault and promised to write the 652 * data up to the first fault. Hence ignore any errors 653 * and just continue. */ 654 (void)ret; 655 prefaulted = 1; 656 } 657 658 ret = shmem_pread_slow(page, shmem_page_offset, page_length, 659 user_data, page_do_bit17_swizzling, 660 needs_clflush); 661 662 mutex_lock(&dev->struct_mutex); 663 664 if (ret) 665 goto out; 666 667 next_page: 668 remain -= page_length; 669 user_data += page_length; 670 offset += page_length; 671 } 672 673 out: 674 i915_gem_object_unpin_pages(obj); 675 676 return ret; 677 } 678 679 /** 680 * Reads data from the object referenced by handle. 681 * 682 * On error, the contents of *data are undefined. 683 */ 684 int 685 i915_gem_pread_ioctl(struct drm_device *dev, void *data, 686 struct drm_file *file) 687 { 688 struct drm_i915_gem_pread *args = data; 689 struct drm_i915_gem_object *obj; 690 int ret = 0; 691 692 if (args->size == 0) 693 return 0; 694 695 #if 0 696 if (!access_ok(VERIFY_WRITE, 697 u64_to_user_ptr(args->data_ptr), 698 args->size)) 699 return -EFAULT; 700 #endif 701 702 ret = i915_mutex_lock_interruptible(dev); 703 if (ret) 704 return ret; 705 706 obj = to_intel_bo(drm_gem_object_lookup(file, args->handle)); 707 if (&obj->base == NULL) { 708 ret = -ENOENT; 709 goto unlock; 710 } 711 712 /* Bounds check source. */ 713 if (args->offset > obj->base.size || 714 args->size > obj->base.size - args->offset) { 715 ret = -EINVAL; 716 goto out; 717 } 718 719 /* prime objects have no backing filp to GEM pread/pwrite 720 * pages from. 721 */ 722 723 trace_i915_gem_object_pread(obj, args->offset, args->size); 724 725 ret = i915_gem_shmem_pread(dev, obj, args, file); 726 727 out: 728 drm_gem_object_unreference(&obj->base); 729 unlock: 730 mutex_unlock(&dev->struct_mutex); 731 return ret; 732 } 733 734 /* This is the fast write path which cannot handle 735 * page faults in the source data 736 */ 737 738 static inline int 739 fast_user_write(struct io_mapping *mapping, 740 loff_t page_base, int page_offset, 741 char __user *user_data, 742 int length) 743 { 744 void __iomem *vaddr_atomic; 745 void *vaddr; 746 unsigned long unwritten; 747 748 vaddr_atomic = io_mapping_map_atomic_wc(mapping, page_base); 749 /* We can use the cpu mem copy function because this is X86. */ 750 vaddr = (char __force*)vaddr_atomic + page_offset; 751 unwritten = __copy_from_user_inatomic_nocache(vaddr, 752 user_data, length); 753 io_mapping_unmap_atomic(vaddr_atomic); 754 return unwritten; 755 } 756 757 /** 758 * This is the fast pwrite path, where we copy the data directly from the 759 * user into the GTT, uncached. 760 */ 761 static int 762 i915_gem_gtt_pwrite_fast(struct drm_device *dev, 763 struct drm_i915_gem_object *obj, 764 struct drm_i915_gem_pwrite *args, 765 struct drm_file *file) 766 { 767 struct drm_i915_private *dev_priv = to_i915(dev); 768 struct i915_ggtt *ggtt = &dev_priv->ggtt; 769 ssize_t remain; 770 loff_t offset, page_base; 771 char __user *user_data; 772 int page_offset, page_length, ret; 773 774 ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE | PIN_NONBLOCK); 775 if (ret) 776 goto out; 777 778 ret = i915_gem_object_set_to_gtt_domain(obj, true); 779 if (ret) 780 goto out_unpin; 781 782 ret = i915_gem_object_put_fence(obj); 783 if (ret) 784 goto out_unpin; 785 786 user_data = u64_to_user_ptr(args->data_ptr); 787 remain = args->size; 788 789 offset = i915_gem_obj_ggtt_offset(obj) + args->offset; 790 791 intel_fb_obj_invalidate(obj, ORIGIN_GTT); 792 793 while (remain > 0) { 794 /* Operation in this page 795 * 796 * page_base = page offset within aperture 797 * page_offset = offset within page 798 * page_length = bytes to copy for this page 799 */ 800 page_base = offset & ~PAGE_MASK; 801 page_offset = offset_in_page(offset); 802 page_length = remain; 803 if ((page_offset + remain) > PAGE_SIZE) 804 page_length = PAGE_SIZE - page_offset; 805 806 /* If we get a fault while copying data, then (presumably) our 807 * source page isn't available. Return the error and we'll 808 * retry in the slow path. 809 */ 810 if (fast_user_write(ggtt->mappable, page_base, 811 page_offset, user_data, page_length)) { 812 ret = -EFAULT; 813 goto out_flush; 814 } 815 816 remain -= page_length; 817 user_data += page_length; 818 offset += page_length; 819 } 820 821 out_flush: 822 intel_fb_obj_flush(obj, false, ORIGIN_GTT); 823 out_unpin: 824 i915_gem_object_ggtt_unpin(obj); 825 out: 826 return ret; 827 } 828 829 /* Per-page copy function for the shmem pwrite fastpath. 830 * Flushes invalid cachelines before writing to the target if 831 * needs_clflush_before is set and flushes out any written cachelines after 832 * writing if needs_clflush is set. */ 833 static int 834 shmem_pwrite_fast(struct vm_page *page, int shmem_page_offset, int page_length, 835 char __user *user_data, 836 bool page_do_bit17_swizzling, 837 bool needs_clflush_before, 838 bool needs_clflush_after) 839 { 840 char *vaddr; 841 int ret; 842 843 if (unlikely(page_do_bit17_swizzling)) 844 return -EINVAL; 845 846 vaddr = kmap_atomic(page); 847 if (needs_clflush_before) 848 drm_clflush_virt_range(vaddr + shmem_page_offset, 849 page_length); 850 ret = __copy_from_user_inatomic(vaddr + shmem_page_offset, 851 user_data, page_length); 852 if (needs_clflush_after) 853 drm_clflush_virt_range(vaddr + shmem_page_offset, 854 page_length); 855 kunmap_atomic(vaddr); 856 857 return ret ? -EFAULT : 0; 858 } 859 860 /* Only difference to the fast-path function is that this can handle bit17 861 * and uses non-atomic copy and kmap functions. */ 862 static int 863 shmem_pwrite_slow(struct vm_page *page, int shmem_page_offset, int page_length, 864 char __user *user_data, 865 bool page_do_bit17_swizzling, 866 bool needs_clflush_before, 867 bool needs_clflush_after) 868 { 869 char *vaddr; 870 int ret; 871 872 vaddr = kmap(page); 873 if (unlikely(needs_clflush_before || page_do_bit17_swizzling)) 874 shmem_clflush_swizzled_range(vaddr + shmem_page_offset, 875 page_length, 876 page_do_bit17_swizzling); 877 if (page_do_bit17_swizzling) 878 ret = __copy_from_user_swizzled(vaddr, shmem_page_offset, 879 user_data, 880 page_length); 881 else 882 ret = __copy_from_user(vaddr + shmem_page_offset, 883 user_data, 884 page_length); 885 if (needs_clflush_after) 886 shmem_clflush_swizzled_range(vaddr + shmem_page_offset, 887 page_length, 888 page_do_bit17_swizzling); 889 kunmap(page); 890 891 return ret ? -EFAULT : 0; 892 } 893 894 static int 895 i915_gem_shmem_pwrite(struct drm_device *dev, 896 struct drm_i915_gem_object *obj, 897 struct drm_i915_gem_pwrite *args, 898 struct drm_file *file) 899 { 900 ssize_t remain; 901 loff_t offset; 902 char __user *user_data; 903 int shmem_page_offset, page_length, ret = 0; 904 int obj_do_bit17_swizzling, page_do_bit17_swizzling; 905 int hit_slowpath = 0; 906 int needs_clflush_after = 0; 907 int needs_clflush_before = 0; 908 struct sg_page_iter sg_iter; 909 910 user_data = u64_to_user_ptr(args->data_ptr); 911 remain = args->size; 912 913 obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj); 914 915 if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) { 916 /* If we're not in the cpu write domain, set ourself into the gtt 917 * write domain and manually flush cachelines (if required). This 918 * optimizes for the case when the gpu will use the data 919 * right away and we therefore have to clflush anyway. */ 920 needs_clflush_after = cpu_write_needs_clflush(obj); 921 ret = i915_gem_object_wait_rendering(obj, false); 922 if (ret) 923 return ret; 924 } 925 /* Same trick applies to invalidate partially written cachelines read 926 * before writing. */ 927 if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) 928 needs_clflush_before = 929 !cpu_cache_is_coherent(dev, obj->cache_level); 930 931 ret = i915_gem_object_get_pages(obj); 932 if (ret) 933 return ret; 934 935 intel_fb_obj_invalidate(obj, ORIGIN_CPU); 936 937 i915_gem_object_pin_pages(obj); 938 939 offset = args->offset; 940 obj->dirty = 1; 941 942 VM_OBJECT_LOCK(obj->base.vm_obj); 943 vm_object_pip_add(obj->base.vm_obj, 1); 944 945 for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 946 offset >> PAGE_SHIFT) { 947 struct vm_page *page = sg_page_iter_page(&sg_iter); 948 int partial_cacheline_write; 949 950 if (remain <= 0) 951 break; 952 953 /* Operation in this page 954 * 955 * shmem_page_offset = offset within page in shmem file 956 * page_length = bytes to copy for this page 957 */ 958 shmem_page_offset = offset_in_page(offset); 959 960 page_length = remain; 961 if ((shmem_page_offset + page_length) > PAGE_SIZE) 962 page_length = PAGE_SIZE - shmem_page_offset; 963 964 /* If we don't overwrite a cacheline completely we need to be 965 * careful to have up-to-date data by first clflushing. Don't 966 * overcomplicate things and flush the entire patch. */ 967 partial_cacheline_write = needs_clflush_before && 968 ((shmem_page_offset | page_length) 969 & (cpu_clflush_line_size - 1)); 970 971 page_do_bit17_swizzling = obj_do_bit17_swizzling && 972 (page_to_phys(page) & (1 << 17)) != 0; 973 974 ret = shmem_pwrite_fast(page, shmem_page_offset, page_length, 975 user_data, page_do_bit17_swizzling, 976 partial_cacheline_write, 977 needs_clflush_after); 978 if (ret == 0) 979 goto next_page; 980 981 hit_slowpath = 1; 982 mutex_unlock(&dev->struct_mutex); 983 ret = shmem_pwrite_slow(page, shmem_page_offset, page_length, 984 user_data, page_do_bit17_swizzling, 985 partial_cacheline_write, 986 needs_clflush_after); 987 988 mutex_lock(&dev->struct_mutex); 989 990 if (ret) 991 goto out; 992 993 next_page: 994 remain -= page_length; 995 user_data += page_length; 996 offset += page_length; 997 } 998 vm_object_pip_wakeup(obj->base.vm_obj); 999 VM_OBJECT_UNLOCK(obj->base.vm_obj); 1000 1001 out: 1002 i915_gem_object_unpin_pages(obj); 1003 1004 if (hit_slowpath) { 1005 /* 1006 * Fixup: Flush cpu caches in case we didn't flush the dirty 1007 * cachelines in-line while writing and the object moved 1008 * out of the cpu write domain while we've dropped the lock. 1009 */ 1010 if (!needs_clflush_after && 1011 obj->base.write_domain != I915_GEM_DOMAIN_CPU) { 1012 if (i915_gem_clflush_object(obj, obj->pin_display)) 1013 needs_clflush_after = true; 1014 } 1015 } 1016 1017 if (needs_clflush_after) 1018 i915_gem_chipset_flush(dev); 1019 else 1020 obj->cache_dirty = true; 1021 1022 intel_fb_obj_flush(obj, false, ORIGIN_CPU); 1023 return ret; 1024 } 1025 1026 /** 1027 * Writes data to the object referenced by handle. 1028 * 1029 * On error, the contents of the buffer that were to be modified are undefined. 1030 */ 1031 int 1032 i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, 1033 struct drm_file *file) 1034 { 1035 struct drm_i915_private *dev_priv = dev->dev_private; 1036 struct drm_i915_gem_pwrite *args = data; 1037 struct drm_i915_gem_object *obj; 1038 int ret; 1039 1040 if (args->size == 0) 1041 return 0; 1042 1043 #if 0 1044 if (!access_ok(VERIFY_READ, 1045 u64_to_user_ptr(args->data_ptr), 1046 args->size)) 1047 return -EFAULT; 1048 #endif 1049 1050 if (likely(!i915.prefault_disable)) { 1051 ret = fault_in_multipages_readable(u64_to_user_ptr(args->data_ptr), 1052 args->size); 1053 if (ret) 1054 return -EFAULT; 1055 } 1056 1057 intel_runtime_pm_get(dev_priv); 1058 1059 ret = i915_mutex_lock_interruptible(dev); 1060 if (ret) 1061 goto put_rpm; 1062 1063 obj = to_intel_bo(drm_gem_object_lookup(file, args->handle)); 1064 if (&obj->base == NULL) { 1065 ret = -ENOENT; 1066 goto unlock; 1067 } 1068 1069 /* Bounds check destination. */ 1070 if (args->offset > obj->base.size || 1071 args->size > obj->base.size - args->offset) { 1072 ret = -EINVAL; 1073 goto out; 1074 } 1075 1076 /* prime objects have no backing filp to GEM pread/pwrite 1077 * pages from. 1078 */ 1079 1080 trace_i915_gem_object_pwrite(obj, args->offset, args->size); 1081 1082 ret = -EFAULT; 1083 /* We can only do the GTT pwrite on untiled buffers, as otherwise 1084 * it would end up going through the fenced access, and we'll get 1085 * different detiling behavior between reading and writing. 1086 * pread/pwrite currently are reading and writing from the CPU 1087 * perspective, requiring manual detiling by the client. 1088 */ 1089 if (obj->tiling_mode == I915_TILING_NONE && 1090 obj->base.write_domain != I915_GEM_DOMAIN_CPU && 1091 cpu_write_needs_clflush(obj)) { 1092 ret = i915_gem_gtt_pwrite_fast(dev, obj, args, file); 1093 /* Note that the gtt paths might fail with non-page-backed user 1094 * pointers (e.g. gtt mappings when moving data between 1095 * textures). Fallback to the shmem path in that case. */ 1096 } 1097 1098 if (ret == -EFAULT || ret == -ENOSPC) { 1099 if (obj->phys_handle) 1100 ret = i915_gem_phys_pwrite(obj, args, file); 1101 else 1102 ret = i915_gem_shmem_pwrite(dev, obj, args, file); 1103 } 1104 1105 out: 1106 drm_gem_object_unreference(&obj->base); 1107 unlock: 1108 mutex_unlock(&dev->struct_mutex); 1109 put_rpm: 1110 intel_runtime_pm_put(dev_priv); 1111 1112 return ret; 1113 } 1114 1115 static int 1116 i915_gem_check_wedge(unsigned reset_counter, bool interruptible) 1117 { 1118 if (__i915_terminally_wedged(reset_counter)) 1119 return -EIO; 1120 1121 if (__i915_reset_in_progress(reset_counter)) { 1122 /* Non-interruptible callers can't handle -EAGAIN, hence return 1123 * -EIO unconditionally for these. */ 1124 if (!interruptible) 1125 return -EIO; 1126 1127 return -EAGAIN; 1128 } 1129 1130 return 0; 1131 } 1132 1133 static void fake_irq(unsigned long data) 1134 { 1135 wakeup_one((void *)data); 1136 } 1137 1138 static bool missed_irq(struct drm_i915_private *dev_priv, 1139 struct intel_engine_cs *engine) 1140 { 1141 return test_bit(engine->id, &dev_priv->gpu_error.missed_irq_rings); 1142 } 1143 1144 #if 0 1145 static int __i915_spin_request(struct drm_i915_gem_request *req, int state) 1146 { 1147 unsigned long timeout; 1148 unsigned cpu; 1149 1150 /* When waiting for high frequency requests, e.g. during synchronous 1151 * rendering split between the CPU and GPU, the finite amount of time 1152 * required to set up the irq and wait upon it limits the response 1153 * rate. By busywaiting on the request completion for a short while we 1154 * can service the high frequency waits as quick as possible. However, 1155 * if it is a slow request, we want to sleep as quickly as possible. 1156 * The tradeoff between waiting and sleeping is roughly the time it 1157 * takes to sleep on a request, on the order of a microsecond. 1158 */ 1159 1160 if (req->engine->irq_refcount) 1161 return -EBUSY; 1162 1163 /* Only spin if we know the GPU is processing this request */ 1164 if (!i915_gem_request_started(req, true)) 1165 return -EAGAIN; 1166 1167 timeout = local_clock_us(&cpu) + 5; 1168 while (!need_resched()) { 1169 if (i915_gem_request_completed(req, true)) 1170 return 0; 1171 1172 if (signal_pending_state(state, current)) 1173 break; 1174 1175 if (busywait_stop(timeout, cpu)) 1176 break; 1177 1178 cpu_relax_lowlatency(); 1179 } 1180 1181 if (i915_gem_request_completed(req, false)) 1182 return 0; 1183 1184 return -EAGAIN; 1185 } 1186 #endif 1187 1188 /** 1189 * __i915_wait_request - wait until execution of request has finished 1190 * @req: duh! 1191 * @interruptible: do an interruptible wait (normally yes) 1192 * @timeout: in - how long to wait (NULL forever); out - how much time remaining 1193 * 1194 * Note: It is of utmost importance that the passed in seqno and reset_counter 1195 * values have been read by the caller in an smp safe manner. Where read-side 1196 * locks are involved, it is sufficient to read the reset_counter before 1197 * unlocking the lock that protects the seqno. For lockless tricks, the 1198 * reset_counter _must_ be read before, and an appropriate smp_rmb must be 1199 * inserted. 1200 * 1201 * Returns 0 if the request was found within the alloted time. Else returns the 1202 * errno with remaining time filled in timeout argument. 1203 */ 1204 int __i915_wait_request(struct drm_i915_gem_request *req, 1205 bool interruptible, 1206 s64 *timeout, 1207 struct intel_rps_client *rps) 1208 { 1209 struct intel_engine_cs *engine = i915_gem_request_get_engine(req); 1210 struct drm_device *dev = engine->dev; 1211 struct drm_i915_private *dev_priv = dev->dev_private; 1212 const bool irq_test_in_progress = 1213 ACCESS_ONCE(dev_priv->gpu_error.test_irq_rings) & intel_engine_flag(engine); 1214 unsigned long timeout_expire; 1215 s64 before = 0; /* Only to silence a compiler warning. */ 1216 int ret, sl_timeout = 1; 1217 1218 WARN(!intel_irqs_enabled(dev_priv), "IRQs disabled"); 1219 1220 if (list_empty(&req->list)) 1221 return 0; 1222 1223 if (i915_gem_request_completed(req, true)) 1224 return 0; 1225 1226 timeout_expire = 0; 1227 if (timeout) { 1228 if (WARN_ON(*timeout < 0)) 1229 return -EINVAL; 1230 1231 if (*timeout == 0) 1232 return -ETIME; 1233 1234 timeout_expire = jiffies + nsecs_to_jiffies_timeout(*timeout); 1235 1236 /* 1237 * Record current time in case interrupted by signal, or wedged. 1238 */ 1239 before = ktime_get_raw_ns(); 1240 } 1241 1242 if (INTEL_INFO(dev_priv)->gen >= 6) 1243 gen6_rps_boost(dev_priv, rps, req->emitted_jiffies); 1244 1245 trace_i915_gem_request_wait_begin(req); 1246 1247 /* Optimistic spin for the next jiffie before touching IRQs */ 1248 #if 0 1249 ret = __i915_spin_request(req); 1250 if (ret == 0) 1251 goto out; 1252 #endif 1253 1254 if (!irq_test_in_progress && WARN_ON(!engine->irq_get(engine))) { 1255 ret = -ENODEV; 1256 goto out; 1257 } 1258 1259 lockmgr(&engine->irq_queue.lock, LK_EXCLUSIVE); 1260 for (;;) { 1261 struct timer_list timer; 1262 1263 /* We need to check whether any gpu reset happened in between 1264 * the request being submitted and now. If a reset has occurred, 1265 * the request is effectively complete (we either are in the 1266 * process of or have discarded the rendering and completely 1267 * reset the GPU. The results of the request are lost and we 1268 * are free to continue on with the original operation. 1269 */ 1270 if (req->reset_counter != i915_reset_counter(&dev_priv->gpu_error)) { 1271 ret = 0; 1272 break; 1273 } 1274 1275 if (i915_gem_request_completed(req, false)) { 1276 ret = 0; 1277 break; 1278 } 1279 1280 if (interruptible && signal_pending(curthread->td_lwp)) { 1281 ret = -ERESTARTSYS; 1282 break; 1283 } 1284 1285 if (timeout && time_after_eq(jiffies, timeout_expire)) { 1286 ret = -ETIME; 1287 break; 1288 } 1289 1290 timer.function = NULL; 1291 if (timeout || missed_irq(dev_priv, engine)) { 1292 unsigned long expire; 1293 1294 setup_timer_on_stack(&timer, fake_irq, (unsigned long)&engine->irq_queue); 1295 expire = missed_irq(dev_priv, engine) ? jiffies + 1 : timeout_expire; 1296 sl_timeout = expire - jiffies; 1297 if (sl_timeout < 1) 1298 sl_timeout = 1; 1299 mod_timer(&timer, expire); 1300 } 1301 1302 #if 0 1303 io_schedule(); 1304 #endif 1305 1306 if (timer.function) { 1307 del_singleshot_timer_sync(&timer); 1308 destroy_timer_on_stack(&timer); 1309 } 1310 1311 lksleep(&engine->irq_queue, &engine->irq_queue.lock, 1312 interruptible ? PCATCH : 0, "lwe", sl_timeout); 1313 } 1314 lockmgr(&engine->irq_queue.lock, LK_RELEASE); 1315 if (!irq_test_in_progress) 1316 engine->irq_put(engine); 1317 1318 out: 1319 trace_i915_gem_request_wait_end(req); 1320 1321 if (timeout) { 1322 s64 tres = *timeout - (ktime_get_raw_ns() - before); 1323 1324 *timeout = tres < 0 ? 0 : tres; 1325 1326 /* 1327 * Apparently ktime isn't accurate enough and occasionally has a 1328 * bit of mismatch in the jiffies<->nsecs<->ktime loop. So patch 1329 * things up to make the test happy. We allow up to 1 jiffy. 1330 * 1331 * This is a regrssion from the timespec->ktime conversion. 1332 */ 1333 if (ret == -ETIME && *timeout < jiffies_to_usecs(1)*1000) 1334 *timeout = 0; 1335 } 1336 1337 return ret; 1338 } 1339 1340 int i915_gem_request_add_to_client(struct drm_i915_gem_request *req, 1341 struct drm_file *file) 1342 { 1343 struct drm_i915_file_private *file_priv; 1344 1345 WARN_ON(!req || !file || req->file_priv); 1346 1347 if (!req || !file) 1348 return -EINVAL; 1349 1350 if (req->file_priv) 1351 return -EINVAL; 1352 1353 file_priv = file->driver_priv; 1354 1355 spin_lock(&file_priv->mm.lock); 1356 req->file_priv = file_priv; 1357 list_add_tail(&req->client_list, &file_priv->mm.request_list); 1358 spin_unlock(&file_priv->mm.lock); 1359 1360 req->pid = curproc->p_pid; 1361 1362 return 0; 1363 } 1364 1365 static inline void 1366 i915_gem_request_remove_from_client(struct drm_i915_gem_request *request) 1367 { 1368 struct drm_i915_file_private *file_priv = request->file_priv; 1369 1370 if (!file_priv) 1371 return; 1372 1373 spin_lock(&file_priv->mm.lock); 1374 list_del(&request->client_list); 1375 request->file_priv = NULL; 1376 spin_unlock(&file_priv->mm.lock); 1377 1378 #if 0 1379 put_pid(request->pid); 1380 request->pid = NULL; 1381 #endif 1382 } 1383 1384 static void i915_gem_request_retire(struct drm_i915_gem_request *request) 1385 { 1386 trace_i915_gem_request_retire(request); 1387 1388 /* We know the GPU must have read the request to have 1389 * sent us the seqno + interrupt, so use the position 1390 * of tail of the request to update the last known position 1391 * of the GPU head. 1392 * 1393 * Note this requires that we are always called in request 1394 * completion order. 1395 */ 1396 request->ringbuf->last_retired_head = request->postfix; 1397 1398 list_del_init(&request->list); 1399 i915_gem_request_remove_from_client(request); 1400 1401 i915_gem_request_unreference(request); 1402 } 1403 1404 static void 1405 __i915_gem_request_retire__upto(struct drm_i915_gem_request *req) 1406 { 1407 struct intel_engine_cs *engine = req->engine; 1408 struct drm_i915_gem_request *tmp; 1409 1410 lockdep_assert_held(&engine->dev->struct_mutex); 1411 1412 if (list_empty(&req->list)) 1413 return; 1414 1415 do { 1416 tmp = list_first_entry(&engine->request_list, 1417 typeof(*tmp), list); 1418 1419 i915_gem_request_retire(tmp); 1420 } while (tmp != req); 1421 1422 WARN_ON(i915_verify_lists(engine->dev)); 1423 } 1424 1425 /** 1426 * Waits for a request to be signaled, and cleans up the 1427 * request and object lists appropriately for that event. 1428 */ 1429 int 1430 i915_wait_request(struct drm_i915_gem_request *req) 1431 { 1432 struct drm_i915_private *dev_priv = req->i915; 1433 bool interruptible; 1434 int ret; 1435 1436 interruptible = dev_priv->mm.interruptible; 1437 1438 BUG_ON(!mutex_is_locked(&dev_priv->dev->struct_mutex)); 1439 1440 ret = __i915_wait_request(req, interruptible, NULL, NULL); 1441 if (ret) 1442 return ret; 1443 1444 /* If the GPU hung, we want to keep the requests to find the guilty. */ 1445 if (req->reset_counter == i915_reset_counter(&dev_priv->gpu_error)) 1446 __i915_gem_request_retire__upto(req); 1447 1448 return 0; 1449 } 1450 1451 /** 1452 * Ensures that all rendering to the object has completed and the object is 1453 * safe to unbind from the GTT or access from the CPU. 1454 */ 1455 int 1456 i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj, 1457 bool readonly) 1458 { 1459 int ret, i; 1460 1461 if (!obj->active) 1462 return 0; 1463 1464 if (readonly) { 1465 if (obj->last_write_req != NULL) { 1466 ret = i915_wait_request(obj->last_write_req); 1467 if (ret) 1468 return ret; 1469 1470 i = obj->last_write_req->engine->id; 1471 if (obj->last_read_req[i] == obj->last_write_req) 1472 i915_gem_object_retire__read(obj, i); 1473 else 1474 i915_gem_object_retire__write(obj); 1475 } 1476 } else { 1477 for (i = 0; i < I915_NUM_ENGINES; i++) { 1478 if (obj->last_read_req[i] == NULL) 1479 continue; 1480 1481 ret = i915_wait_request(obj->last_read_req[i]); 1482 if (ret) 1483 return ret; 1484 1485 i915_gem_object_retire__read(obj, i); 1486 } 1487 GEM_BUG_ON(obj->active); 1488 } 1489 1490 return 0; 1491 } 1492 1493 static void 1494 i915_gem_object_retire_request(struct drm_i915_gem_object *obj, 1495 struct drm_i915_gem_request *req) 1496 { 1497 int ring = req->engine->id; 1498 1499 if (obj->last_read_req[ring] == req) 1500 i915_gem_object_retire__read(obj, ring); 1501 else if (obj->last_write_req == req) 1502 i915_gem_object_retire__write(obj); 1503 1504 if (req->reset_counter == i915_reset_counter(&req->i915->gpu_error)) 1505 __i915_gem_request_retire__upto(req); 1506 } 1507 1508 /* A nonblocking variant of the above wait. This is a highly dangerous routine 1509 * as the object state may change during this call. 1510 */ 1511 static __must_check int 1512 i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj, 1513 struct intel_rps_client *rps, 1514 bool readonly) 1515 { 1516 struct drm_device *dev = obj->base.dev; 1517 struct drm_i915_private *dev_priv = dev->dev_private; 1518 struct drm_i915_gem_request *requests[I915_NUM_ENGINES]; 1519 int ret, i, n = 0; 1520 1521 BUG_ON(!mutex_is_locked(&dev->struct_mutex)); 1522 BUG_ON(!dev_priv->mm.interruptible); 1523 1524 if (!obj->active) 1525 return 0; 1526 1527 if (readonly) { 1528 struct drm_i915_gem_request *req; 1529 1530 req = obj->last_write_req; 1531 if (req == NULL) 1532 return 0; 1533 1534 requests[n++] = i915_gem_request_reference(req); 1535 } else { 1536 for (i = 0; i < I915_NUM_ENGINES; i++) { 1537 struct drm_i915_gem_request *req; 1538 1539 req = obj->last_read_req[i]; 1540 if (req == NULL) 1541 continue; 1542 1543 requests[n++] = i915_gem_request_reference(req); 1544 } 1545 } 1546 1547 mutex_unlock(&dev->struct_mutex); 1548 ret = 0; 1549 for (i = 0; ret == 0 && i < n; i++) 1550 ret = __i915_wait_request(requests[i], true, NULL, rps); 1551 mutex_lock(&dev->struct_mutex); 1552 1553 for (i = 0; i < n; i++) { 1554 if (ret == 0) 1555 i915_gem_object_retire_request(obj, requests[i]); 1556 i915_gem_request_unreference(requests[i]); 1557 } 1558 1559 return ret; 1560 } 1561 1562 static struct intel_rps_client *to_rps_client(struct drm_file *file) 1563 { 1564 struct drm_i915_file_private *fpriv = file->driver_priv; 1565 return &fpriv->rps; 1566 } 1567 1568 /** 1569 * Called when user space prepares to use an object with the CPU, either 1570 * through the mmap ioctl's mapping or a GTT mapping. 1571 */ 1572 int 1573 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, 1574 struct drm_file *file) 1575 { 1576 struct drm_i915_gem_set_domain *args = data; 1577 struct drm_i915_gem_object *obj; 1578 uint32_t read_domains = args->read_domains; 1579 uint32_t write_domain = args->write_domain; 1580 int ret; 1581 1582 /* Only handle setting domains to types used by the CPU. */ 1583 if (write_domain & I915_GEM_GPU_DOMAINS) 1584 return -EINVAL; 1585 1586 if (read_domains & I915_GEM_GPU_DOMAINS) 1587 return -EINVAL; 1588 1589 /* Having something in the write domain implies it's in the read 1590 * domain, and only that read domain. Enforce that in the request. 1591 */ 1592 if (write_domain != 0 && read_domains != write_domain) 1593 return -EINVAL; 1594 1595 ret = i915_mutex_lock_interruptible(dev); 1596 if (ret) 1597 return ret; 1598 1599 obj = to_intel_bo(drm_gem_object_lookup(file, args->handle)); 1600 if (&obj->base == NULL) { 1601 ret = -ENOENT; 1602 goto unlock; 1603 } 1604 1605 /* Try to flush the object off the GPU without holding the lock. 1606 * We will repeat the flush holding the lock in the normal manner 1607 * to catch cases where we are gazumped. 1608 */ 1609 ret = i915_gem_object_wait_rendering__nonblocking(obj, 1610 to_rps_client(file), 1611 !write_domain); 1612 if (ret) 1613 goto unref; 1614 1615 if (read_domains & I915_GEM_DOMAIN_GTT) 1616 ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0); 1617 else 1618 ret = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0); 1619 1620 if (write_domain != 0) 1621 intel_fb_obj_invalidate(obj, 1622 write_domain == I915_GEM_DOMAIN_GTT ? 1623 ORIGIN_GTT : ORIGIN_CPU); 1624 1625 unref: 1626 drm_gem_object_unreference(&obj->base); 1627 unlock: 1628 mutex_unlock(&dev->struct_mutex); 1629 return ret; 1630 } 1631 1632 /** 1633 * Called when user space has done writes to this buffer 1634 */ 1635 int 1636 i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data, 1637 struct drm_file *file) 1638 { 1639 struct drm_i915_gem_sw_finish *args = data; 1640 struct drm_i915_gem_object *obj; 1641 int ret = 0; 1642 1643 ret = i915_mutex_lock_interruptible(dev); 1644 if (ret) 1645 return ret; 1646 1647 obj = to_intel_bo(drm_gem_object_lookup(file, args->handle)); 1648 if (&obj->base == NULL) { 1649 ret = -ENOENT; 1650 goto unlock; 1651 } 1652 1653 /* Pinned buffers may be scanout, so flush the cache */ 1654 if (obj->pin_display) 1655 i915_gem_object_flush_cpu_write_domain(obj); 1656 1657 drm_gem_object_unreference(&obj->base); 1658 unlock: 1659 mutex_unlock(&dev->struct_mutex); 1660 return ret; 1661 } 1662 1663 /** 1664 * Maps the contents of an object, returning the address it is mapped 1665 * into. 1666 * 1667 * While the mapping holds a reference on the contents of the object, it doesn't 1668 * imply a ref on the object itself. 1669 * 1670 * IMPORTANT: 1671 * 1672 * DRM driver writers who look a this function as an example for how to do GEM 1673 * mmap support, please don't implement mmap support like here. The modern way 1674 * to implement DRM mmap support is with an mmap offset ioctl (like 1675 * i915_gem_mmap_gtt) and then using the mmap syscall on the DRM fd directly. 1676 * That way debug tooling like valgrind will understand what's going on, hiding 1677 * the mmap call in a driver private ioctl will break that. The i915 driver only 1678 * does cpu mmaps this way because we didn't know better. 1679 */ 1680 int 1681 i915_gem_mmap_ioctl(struct drm_device *dev, void *data, 1682 struct drm_file *file) 1683 { 1684 struct drm_i915_gem_mmap *args = data; 1685 struct drm_gem_object *obj; 1686 unsigned long addr; 1687 1688 struct proc *p = curproc; 1689 vm_map_t map = &p->p_vmspace->vm_map; 1690 vm_size_t size; 1691 int error = 0, rv; 1692 1693 if (args->flags & ~(I915_MMAP_WC)) 1694 return -EINVAL; 1695 1696 obj = drm_gem_object_lookup(file, args->handle); 1697 if (obj == NULL) 1698 return -ENOENT; 1699 1700 if (args->size == 0) 1701 goto out; 1702 1703 size = round_page(args->size); 1704 if (map->size + size > p->p_rlimit[RLIMIT_VMEM].rlim_cur) { 1705 error = -ENOMEM; 1706 goto out; 1707 } 1708 1709 /* prime objects have no backing filp to GEM mmap 1710 * pages from. 1711 */ 1712 1713 /* 1714 * Call hint to ensure that NULL is not returned as a valid address 1715 * and to reduce vm_map traversals. XXX causes instability, use a 1716 * fixed low address as the start point instead to avoid the NULL 1717 * return issue. 1718 */ 1719 1720 addr = PAGE_SIZE; 1721 1722 /* 1723 * Use 256KB alignment. It is unclear why this matters for a 1724 * virtual address but it appears to fix a number of application/X 1725 * crashes and kms console switching is much faster. 1726 */ 1727 vm_object_hold(obj->vm_obj); 1728 vm_object_reference_locked(obj->vm_obj); 1729 vm_object_drop(obj->vm_obj); 1730 1731 rv = vm_map_find(map, obj->vm_obj, NULL, 1732 args->offset, &addr, args->size, 1733 256 * 1024, /* align */ 1734 TRUE, /* fitit */ 1735 VM_MAPTYPE_NORMAL, VM_SUBSYS_DRM_GEM, 1736 VM_PROT_READ | VM_PROT_WRITE, /* prot */ 1737 VM_PROT_READ | VM_PROT_WRITE, /* max */ 1738 MAP_SHARED /* cow */); 1739 if (rv != KERN_SUCCESS) { 1740 vm_object_deallocate(obj->vm_obj); 1741 error = -vm_mmap_to_errno(rv); 1742 } else { 1743 args->addr_ptr = (uint64_t)addr; 1744 } 1745 out: 1746 drm_gem_object_unreference(obj); 1747 return (error); 1748 } 1749 1750 /** 1751 * i915_gem_fault - fault a page into the GTT 1752 * 1753 * vm_obj is locked on entry and expected to be locked on return. 1754 * 1755 * The vm_pager has placemarked the object with an anonymous memory page 1756 * which we must replace atomically to avoid races against concurrent faults 1757 * on the same page. XXX we currently are unable to do this atomically. 1758 * 1759 * If we are to return an error we should not touch the anonymous page, 1760 * the caller will deallocate it. 1761 * 1762 * XXX Most GEM calls appear to be interruptable, but we can't hard loop 1763 * in that case. Release all resources and wait 1 tick before retrying. 1764 * This is a huge problem which needs to be fixed by getting rid of most 1765 * of the interruptability. The linux code does not retry but does appear 1766 * to have some sort of mechanism (VM_FAULT_NOPAGE ?) for the higher level 1767 * to be able to retry. 1768 * 1769 * -- 1770 * @vma: VMA in question 1771 * @vmf: fault info 1772 * 1773 * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped 1774 * from userspace. The fault handler takes care of binding the object to 1775 * the GTT (if needed), allocating and programming a fence register (again, 1776 * only if needed based on whether the old reg is still valid or the object 1777 * is tiled) and inserting a new PTE into the faulting process. 1778 * 1779 * Note that the faulting process may involve evicting existing objects 1780 * from the GTT and/or fence registers to make room. So performance may 1781 * suffer if the GTT working set is large or there are few fence registers 1782 * left. 1783 * 1784 * vm_obj is locked on entry and expected to be locked on return. The VM 1785 * pager has placed an anonymous memory page at (obj,offset) which we have 1786 * to replace. 1787 */ 1788 int i915_gem_fault(vm_object_t vm_obj, vm_ooffset_t offset, int prot, vm_page_t *mres) 1789 { 1790 struct drm_i915_gem_object *obj = to_intel_bo(vm_obj->handle); 1791 struct drm_device *dev = obj->base.dev; 1792 struct drm_i915_private *dev_priv = to_i915(dev); 1793 struct i915_ggtt *ggtt = &dev_priv->ggtt; 1794 struct i915_ggtt_view view = i915_ggtt_view_normal; 1795 unsigned long page_offset; 1796 vm_page_t m, oldm = NULL; 1797 int ret = 0; 1798 bool write = !!(prot & VM_PROT_WRITE); 1799 1800 intel_runtime_pm_get(dev_priv); 1801 1802 /* We don't use vmf->pgoff since that has the fake offset */ 1803 page_offset = (unsigned long)offset; 1804 1805 retry: 1806 ret = i915_mutex_lock_interruptible(dev); 1807 if (ret) 1808 goto out; 1809 1810 trace_i915_gem_object_fault(obj, page_offset, true, write); 1811 1812 /* Try to flush the object off the GPU first without holding the lock. 1813 * Upon reacquiring the lock, we will perform our sanity checks and then 1814 * repeat the flush holding the lock in the normal manner to catch cases 1815 * where we are gazumped. 1816 */ 1817 ret = i915_gem_object_wait_rendering__nonblocking(obj, NULL, !write); 1818 if (ret) 1819 goto unlock; 1820 1821 /* Access to snoopable pages through the GTT is incoherent. */ 1822 if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(dev)) { 1823 ret = -EFAULT; 1824 goto unlock; 1825 } 1826 1827 /* Use a partial view if the object is bigger than the aperture. */ 1828 if (obj->base.size >= ggtt->mappable_end && 1829 obj->tiling_mode == I915_TILING_NONE) { 1830 #if 0 1831 static const unsigned int chunk_size = 256; // 1 MiB 1832 1833 memset(&view, 0, sizeof(view)); 1834 view.type = I915_GGTT_VIEW_PARTIAL; 1835 view.params.partial.offset = rounddown(page_offset, chunk_size); 1836 view.params.partial.size = 1837 min_t(unsigned int, 1838 chunk_size, 1839 (vma->vm_end - vma->vm_start)/PAGE_SIZE - 1840 view.params.partial.offset); 1841 #endif 1842 } 1843 1844 /* Now pin it into the GTT if needed */ 1845 ret = i915_gem_object_ggtt_pin(obj, &view, 0, PIN_MAPPABLE); 1846 if (ret) 1847 goto unlock; 1848 1849 ret = i915_gem_object_set_to_gtt_domain(obj, write); 1850 if (ret) 1851 goto unpin; 1852 1853 ret = i915_gem_object_get_fence(obj); 1854 if (ret) 1855 goto unpin; 1856 1857 /* 1858 * START FREEBSD MAGIC 1859 * 1860 * Add a pip count to avoid destruction and certain other 1861 * complex operations (such as collapses?) while unlocked. 1862 */ 1863 vm_object_pip_add(vm_obj, 1); 1864 1865 /* 1866 * XXX We must currently remove the placeholder page now to avoid 1867 * a deadlock against a concurrent i915_gem_release_mmap(). 1868 * Otherwise concurrent operation will block on the busy page 1869 * while holding locks which we need to obtain. 1870 */ 1871 if (*mres != NULL) { 1872 oldm = *mres; 1873 if ((oldm->flags & PG_BUSY) == 0) 1874 kprintf("i915_gem_fault: Page was not busy\n"); 1875 else 1876 vm_page_remove(oldm); 1877 *mres = NULL; 1878 } else { 1879 oldm = NULL; 1880 } 1881 1882 ret = 0; 1883 m = NULL; 1884 1885 /* 1886 * Since the object lock was dropped, another thread might have 1887 * faulted on the same GTT address and instantiated the mapping. 1888 * Recheck. 1889 */ 1890 m = vm_page_lookup(vm_obj, OFF_TO_IDX(offset)); 1891 if (m != NULL) { 1892 /* 1893 * Try to busy the page, retry on failure (non-zero ret). 1894 */ 1895 if (vm_page_busy_try(m, false)) { 1896 kprintf("i915_gem_fault: PG_BUSY\n"); 1897 ret = -EINTR; 1898 goto unlock; 1899 } 1900 goto have_page; 1901 } 1902 /* 1903 * END FREEBSD MAGIC 1904 */ 1905 1906 obj->fault_mappable = true; 1907 1908 /* Finally, remap it using the new GTT offset */ 1909 m = vm_phys_fictitious_to_vm_page(ggtt->mappable_base + 1910 i915_gem_obj_ggtt_offset_view(obj, &view) + offset); 1911 if (m == NULL) { 1912 ret = -EFAULT; 1913 goto unpin; 1914 } 1915 KASSERT((m->flags & PG_FICTITIOUS) != 0, ("not fictitious %p", m)); 1916 KASSERT(m->wire_count == 1, ("wire_count not 1 %p", m)); 1917 1918 /* 1919 * Try to busy the page. Fails on non-zero return. 1920 */ 1921 if (vm_page_busy_try(m, false)) { 1922 kprintf("i915_gem_fault: PG_BUSY(2)\n"); 1923 ret = -EINTR; 1924 goto unpin; 1925 } 1926 m->valid = VM_PAGE_BITS_ALL; 1927 1928 #if 0 1929 if (unlikely(view.type == I915_GGTT_VIEW_PARTIAL)) { 1930 /* Overriding existing pages in partial view does not cause 1931 * us any trouble as TLBs are still valid because the fault 1932 * is due to userspace losing part of the mapping or never 1933 * having accessed it before (at this partials' range). 1934 */ 1935 unsigned long base = vma->vm_start + 1936 (view.params.partial.offset << PAGE_SHIFT); 1937 unsigned int i; 1938 1939 for (i = 0; i < view.params.partial.size; i++) { 1940 ret = vm_insert_pfn(vma, base + i * PAGE_SIZE, pfn + i); 1941 if (ret) 1942 break; 1943 } 1944 1945 obj->fault_mappable = true; 1946 } else { 1947 if (!obj->fault_mappable) { 1948 unsigned long size = min_t(unsigned long, 1949 vma->vm_end - vma->vm_start, 1950 obj->base.size); 1951 int i; 1952 1953 for (i = 0; i < size >> PAGE_SHIFT; i++) { 1954 ret = vm_insert_pfn(vma, 1955 (unsigned long)vma->vm_start + i * PAGE_SIZE, 1956 pfn + i); 1957 if (ret) 1958 break; 1959 } 1960 1961 obj->fault_mappable = true; 1962 } else 1963 ret = vm_insert_pfn(vma, 1964 (unsigned long)vmf->virtual_address, 1965 pfn + page_offset); 1966 #endif 1967 vm_page_insert(m, vm_obj, OFF_TO_IDX(offset)); 1968 #if 0 1969 } 1970 #endif 1971 1972 have_page: 1973 *mres = m; 1974 1975 i915_gem_object_ggtt_unpin_view(obj, &view); 1976 mutex_unlock(&dev->struct_mutex); 1977 ret = VM_PAGER_OK; 1978 goto done; 1979 1980 /* 1981 * ALTERNATIVE ERROR RETURN. 1982 * 1983 * OBJECT EXPECTED TO BE LOCKED. 1984 */ 1985 unpin: 1986 i915_gem_object_ggtt_unpin_view(obj, &view); 1987 unlock: 1988 mutex_unlock(&dev->struct_mutex); 1989 out: 1990 switch (ret) { 1991 case -EIO: 1992 /* 1993 * We eat errors when the gpu is terminally wedged to avoid 1994 * userspace unduly crashing (gl has no provisions for mmaps to 1995 * fail). But any other -EIO isn't ours (e.g. swap in failure) 1996 * and so needs to be reported. 1997 */ 1998 if (!i915_terminally_wedged(&dev_priv->gpu_error)) { 1999 // ret = VM_FAULT_SIGBUS; 2000 break; 2001 } 2002 case -EAGAIN: 2003 /* 2004 * EAGAIN means the gpu is hung and we'll wait for the error 2005 * handler to reset everything when re-faulting in 2006 * i915_mutex_lock_interruptible. 2007 */ 2008 case -ERESTARTSYS: 2009 case -EINTR: 2010 VM_OBJECT_UNLOCK(vm_obj); 2011 int dummy; 2012 tsleep(&dummy, 0, "delay", 1); /* XXX */ 2013 VM_OBJECT_LOCK(vm_obj); 2014 goto retry; 2015 default: 2016 WARN_ONCE(ret, "unhandled error in i915_gem_fault: %i\n", ret); 2017 ret = VM_PAGER_ERROR; 2018 break; 2019 } 2020 2021 done: 2022 if (oldm != NULL) 2023 vm_page_free(oldm); 2024 vm_object_pip_wakeup(vm_obj); 2025 2026 intel_runtime_pm_put(dev_priv); 2027 return ret; 2028 } 2029 2030 /** 2031 * i915_gem_release_mmap - remove physical page mappings 2032 * @obj: obj in question 2033 * 2034 * Preserve the reservation of the mmapping with the DRM core code, but 2035 * relinquish ownership of the pages back to the system. 2036 * 2037 * It is vital that we remove the page mapping if we have mapped a tiled 2038 * object through the GTT and then lose the fence register due to 2039 * resource pressure. Similarly if the object has been moved out of the 2040 * aperture, than pages mapped into userspace must be revoked. Removing the 2041 * mapping will then trigger a page fault on the next user access, allowing 2042 * fixup by i915_gem_fault(). 2043 */ 2044 void 2045 i915_gem_release_mmap(struct drm_i915_gem_object *obj) 2046 { 2047 vm_object_t devobj; 2048 vm_page_t m; 2049 int i, page_count; 2050 2051 /* Serialisation between user GTT access and our code depends upon 2052 * revoking the CPU's PTE whilst the mutex is held. The next user 2053 * pagefault then has to wait until we release the mutex. 2054 */ 2055 lockdep_assert_held(&obj->base.dev->struct_mutex); 2056 2057 if (!obj->fault_mappable) 2058 return; 2059 2060 devobj = cdev_pager_lookup(obj); 2061 if (devobj != NULL) { 2062 page_count = OFF_TO_IDX(obj->base.size); 2063 2064 VM_OBJECT_LOCK(devobj); 2065 for (i = 0; i < page_count; i++) { 2066 m = vm_page_lookup_busy_wait(devobj, i, TRUE, "915unm"); 2067 if (m == NULL) 2068 continue; 2069 cdev_pager_free_page(devobj, m); 2070 } 2071 VM_OBJECT_UNLOCK(devobj); 2072 vm_object_deallocate(devobj); 2073 } 2074 2075 /* Ensure that the CPU's PTE are revoked and there are not outstanding 2076 * memory transactions from userspace before we return. The TLB 2077 * flushing implied above by changing the PTE above *should* be 2078 * sufficient, an extra barrier here just provides us with a bit 2079 * of paranoid documentation about our requirement to serialise 2080 * memory writes before touching registers / GSM. 2081 */ 2082 wmb(); 2083 2084 obj->fault_mappable = false; 2085 } 2086 2087 void 2088 i915_gem_release_all_mmaps(struct drm_i915_private *dev_priv) 2089 { 2090 struct drm_i915_gem_object *obj; 2091 2092 list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) 2093 i915_gem_release_mmap(obj); 2094 } 2095 2096 uint32_t 2097 i915_gem_get_gtt_size(struct drm_device *dev, uint32_t size, int tiling_mode) 2098 { 2099 uint32_t gtt_size; 2100 2101 if (INTEL_INFO(dev)->gen >= 4 || 2102 tiling_mode == I915_TILING_NONE) 2103 return size; 2104 2105 /* Previous chips need a power-of-two fence region when tiling */ 2106 if (INTEL_INFO(dev)->gen == 3) 2107 gtt_size = 1024*1024; 2108 else 2109 gtt_size = 512*1024; 2110 2111 while (gtt_size < size) 2112 gtt_size <<= 1; 2113 2114 return gtt_size; 2115 } 2116 2117 /** 2118 * i915_gem_get_gtt_alignment - return required GTT alignment for an object 2119 * @obj: object to check 2120 * 2121 * Return the required GTT alignment for an object, taking into account 2122 * potential fence register mapping. 2123 */ 2124 uint32_t 2125 i915_gem_get_gtt_alignment(struct drm_device *dev, uint32_t size, 2126 int tiling_mode, bool fenced) 2127 { 2128 /* 2129 * Minimum alignment is 4k (GTT page size), but might be greater 2130 * if a fence register is needed for the object. 2131 */ 2132 if (INTEL_INFO(dev)->gen >= 4 || (!fenced && IS_G33(dev)) || 2133 tiling_mode == I915_TILING_NONE) 2134 return 4096; 2135 2136 /* 2137 * Previous chips need to be aligned to the size of the smallest 2138 * fence register that can contain the object. 2139 */ 2140 return i915_gem_get_gtt_size(dev, size, tiling_mode); 2141 } 2142 2143 static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj) 2144 { 2145 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 2146 int ret; 2147 2148 #if 0 2149 if (drm_vma_node_has_offset(&obj->base.vma_node)) 2150 return 0; 2151 #endif 2152 2153 dev_priv->mm.shrinker_no_lock_stealing = true; 2154 2155 ret = drm_gem_create_mmap_offset(&obj->base); 2156 if (ret != -ENOSPC) 2157 goto out; 2158 2159 /* Badly fragmented mmap space? The only way we can recover 2160 * space is by destroying unwanted objects. We can't randomly release 2161 * mmap_offsets as userspace expects them to be persistent for the 2162 * lifetime of the objects. The closest we can is to release the 2163 * offsets on purgeable objects by truncating it and marking it purged, 2164 * which prevents userspace from ever using that object again. 2165 */ 2166 i915_gem_shrink(dev_priv, 2167 obj->base.size >> PAGE_SHIFT, 2168 I915_SHRINK_BOUND | 2169 I915_SHRINK_UNBOUND | 2170 I915_SHRINK_PURGEABLE); 2171 ret = drm_gem_create_mmap_offset(&obj->base); 2172 if (ret != -ENOSPC) 2173 goto out; 2174 2175 i915_gem_shrink_all(dev_priv); 2176 ret = drm_gem_create_mmap_offset(&obj->base); 2177 out: 2178 dev_priv->mm.shrinker_no_lock_stealing = false; 2179 2180 return ret; 2181 } 2182 2183 static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj) 2184 { 2185 drm_gem_free_mmap_offset(&obj->base); 2186 } 2187 2188 int 2189 i915_gem_mmap_gtt(struct drm_file *file, 2190 struct drm_device *dev, 2191 uint32_t handle, 2192 uint64_t *offset) 2193 { 2194 struct drm_i915_gem_object *obj; 2195 int ret; 2196 2197 ret = i915_mutex_lock_interruptible(dev); 2198 if (ret) 2199 return ret; 2200 2201 obj = to_intel_bo(drm_gem_object_lookup(file, handle)); 2202 if (&obj->base == NULL) { 2203 ret = -ENOENT; 2204 goto unlock; 2205 } 2206 2207 if (obj->madv != I915_MADV_WILLNEED) { 2208 DRM_DEBUG("Attempting to mmap a purgeable buffer\n"); 2209 ret = -EFAULT; 2210 goto out; 2211 } 2212 2213 ret = i915_gem_object_create_mmap_offset(obj); 2214 if (ret) 2215 goto out; 2216 2217 *offset = DRM_GEM_MAPPING_OFF(obj->base.map_list.key) | 2218 DRM_GEM_MAPPING_KEY; 2219 2220 out: 2221 drm_gem_object_unreference(&obj->base); 2222 unlock: 2223 mutex_unlock(&dev->struct_mutex); 2224 return ret; 2225 } 2226 2227 /** 2228 * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing 2229 * @dev: DRM device 2230 * @data: GTT mapping ioctl data 2231 * @file: GEM object info 2232 * 2233 * Simply returns the fake offset to userspace so it can mmap it. 2234 * The mmap call will end up in drm_gem_mmap(), which will set things 2235 * up so we can get faults in the handler above. 2236 * 2237 * The fault handler will take care of binding the object into the GTT 2238 * (since it may have been evicted to make room for something), allocating 2239 * a fence register, and mapping the appropriate aperture address into 2240 * userspace. 2241 */ 2242 int 2243 i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data, 2244 struct drm_file *file) 2245 { 2246 struct drm_i915_gem_mmap_gtt *args = data; 2247 2248 return i915_gem_mmap_gtt(file, dev, args->handle, (uint64_t *)&args->offset); 2249 } 2250 2251 /* Immediately discard the backing storage */ 2252 static void 2253 i915_gem_object_truncate(struct drm_i915_gem_object *obj) 2254 { 2255 vm_object_t vm_obj; 2256 2257 vm_obj = obj->base.vm_obj; 2258 VM_OBJECT_LOCK(vm_obj); 2259 vm_object_page_remove(vm_obj, 0, 0, false); 2260 VM_OBJECT_UNLOCK(vm_obj); 2261 2262 obj->madv = __I915_MADV_PURGED; 2263 } 2264 2265 /* Try to discard unwanted pages */ 2266 static void 2267 i915_gem_object_invalidate(struct drm_i915_gem_object *obj) 2268 { 2269 #if 0 2270 struct address_space *mapping; 2271 #endif 2272 2273 switch (obj->madv) { 2274 case I915_MADV_DONTNEED: 2275 i915_gem_object_truncate(obj); 2276 case __I915_MADV_PURGED: 2277 return; 2278 } 2279 2280 #if 0 2281 if (obj->base.filp == NULL) 2282 return; 2283 2284 mapping = file_inode(obj->base.filp)->i_mapping, 2285 invalidate_mapping_pages(mapping, 0, (loff_t)-1); 2286 #endif 2287 } 2288 2289 static void 2290 i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj) 2291 { 2292 struct sg_page_iter sg_iter; 2293 int ret; 2294 2295 BUG_ON(obj->madv == __I915_MADV_PURGED); 2296 2297 ret = i915_gem_object_set_to_cpu_domain(obj, true); 2298 if (WARN_ON(ret)) { 2299 /* In the event of a disaster, abandon all caches and 2300 * hope for the best. 2301 */ 2302 i915_gem_clflush_object(obj, true); 2303 obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU; 2304 } 2305 2306 i915_gem_gtt_finish_object(obj); 2307 2308 if (i915_gem_object_needs_bit17_swizzle(obj)) 2309 i915_gem_object_save_bit_17_swizzle(obj); 2310 2311 if (obj->madv == I915_MADV_DONTNEED) 2312 obj->dirty = 0; 2313 2314 for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 0) { 2315 struct vm_page *page = sg_page_iter_page(&sg_iter); 2316 2317 if (obj->dirty) 2318 set_page_dirty(page); 2319 2320 if (obj->madv == I915_MADV_WILLNEED) 2321 mark_page_accessed(page); 2322 2323 vm_page_busy_wait(page, FALSE, "i915gem"); 2324 vm_page_unwire(page, 1); 2325 vm_page_wakeup(page); 2326 } 2327 obj->dirty = 0; 2328 2329 sg_free_table(obj->pages); 2330 kfree(obj->pages); 2331 } 2332 2333 int 2334 i915_gem_object_put_pages(struct drm_i915_gem_object *obj) 2335 { 2336 const struct drm_i915_gem_object_ops *ops = obj->ops; 2337 2338 if (obj->pages == NULL) 2339 return 0; 2340 2341 if (obj->pages_pin_count) 2342 return -EBUSY; 2343 2344 BUG_ON(i915_gem_obj_bound_any(obj)); 2345 2346 /* ->put_pages might need to allocate memory for the bit17 swizzle 2347 * array, hence protect them from being reaped by removing them from gtt 2348 * lists early. */ 2349 list_del(&obj->global_list); 2350 2351 if (obj->mapping) { 2352 if (is_vmalloc_addr(obj->mapping)) 2353 vunmap(obj->mapping); 2354 else 2355 kunmap(kmap_to_page(obj->mapping)); 2356 obj->mapping = NULL; 2357 } 2358 2359 ops->put_pages(obj); 2360 obj->pages = NULL; 2361 2362 i915_gem_object_invalidate(obj); 2363 2364 return 0; 2365 } 2366 2367 static int 2368 i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) 2369 { 2370 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 2371 int page_count, i; 2372 vm_object_t vm_obj; 2373 struct sg_table *st; 2374 struct scatterlist *sg; 2375 struct sg_page_iter sg_iter; 2376 struct vm_page *page; 2377 unsigned long last_pfn = 0; /* suppress gcc warning */ 2378 int ret; 2379 2380 /* Assert that the object is not currently in any GPU domain. As it 2381 * wasn't in the GTT, there shouldn't be any way it could have been in 2382 * a GPU cache 2383 */ 2384 BUG_ON(obj->base.read_domains & I915_GEM_GPU_DOMAINS); 2385 BUG_ON(obj->base.write_domain & I915_GEM_GPU_DOMAINS); 2386 2387 st = kmalloc(sizeof(*st), M_DRM, M_WAITOK); 2388 if (st == NULL) 2389 return -ENOMEM; 2390 2391 page_count = obj->base.size / PAGE_SIZE; 2392 if (sg_alloc_table(st, page_count, GFP_KERNEL)) { 2393 kfree(st); 2394 return -ENOMEM; 2395 } 2396 2397 /* Get the list of pages out of our struct file. They'll be pinned 2398 * at this point until we release them. 2399 * 2400 * Fail silently without starting the shrinker 2401 */ 2402 vm_obj = obj->base.vm_obj; 2403 VM_OBJECT_LOCK(vm_obj); 2404 sg = st->sgl; 2405 st->nents = 0; 2406 for (i = 0; i < page_count; i++) { 2407 page = shmem_read_mapping_page(vm_obj, i); 2408 if (IS_ERR(page)) { 2409 i915_gem_shrink(dev_priv, 2410 page_count, 2411 I915_SHRINK_BOUND | 2412 I915_SHRINK_UNBOUND | 2413 I915_SHRINK_PURGEABLE); 2414 page = shmem_read_mapping_page(vm_obj, i); 2415 } 2416 if (IS_ERR(page)) { 2417 /* We've tried hard to allocate the memory by reaping 2418 * our own buffer, now let the real VM do its job and 2419 * go down in flames if truly OOM. 2420 */ 2421 i915_gem_shrink_all(dev_priv); 2422 page = shmem_read_mapping_page(vm_obj, i); 2423 if (IS_ERR(page)) { 2424 ret = PTR_ERR(page); 2425 goto err_pages; 2426 } 2427 } 2428 #ifdef CONFIG_SWIOTLB 2429 if (swiotlb_nr_tbl()) { 2430 st->nents++; 2431 sg_set_page(sg, page, PAGE_SIZE, 0); 2432 sg = sg_next(sg); 2433 continue; 2434 } 2435 #endif 2436 if (!i || page_to_pfn(page) != last_pfn + 1) { 2437 if (i) 2438 sg = sg_next(sg); 2439 st->nents++; 2440 sg_set_page(sg, page, PAGE_SIZE, 0); 2441 } else { 2442 sg->length += PAGE_SIZE; 2443 } 2444 last_pfn = page_to_pfn(page); 2445 2446 /* Check that the i965g/gm workaround works. */ 2447 } 2448 #ifdef CONFIG_SWIOTLB 2449 if (!swiotlb_nr_tbl()) 2450 #endif 2451 sg_mark_end(sg); 2452 obj->pages = st; 2453 VM_OBJECT_UNLOCK(vm_obj); 2454 2455 ret = i915_gem_gtt_prepare_object(obj); 2456 if (ret) 2457 goto err_pages; 2458 2459 if (i915_gem_object_needs_bit17_swizzle(obj)) 2460 i915_gem_object_do_bit_17_swizzle(obj); 2461 2462 if (obj->tiling_mode != I915_TILING_NONE && 2463 dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) 2464 i915_gem_object_pin_pages(obj); 2465 2466 return 0; 2467 2468 err_pages: 2469 sg_mark_end(sg); 2470 for_each_sg_page(st->sgl, &sg_iter, st->nents, 0) { 2471 page = sg_page_iter_page(&sg_iter); 2472 vm_page_busy_wait(page, FALSE, "i915gem"); 2473 vm_page_unwire(page, 0); 2474 vm_page_wakeup(page); 2475 } 2476 VM_OBJECT_UNLOCK(vm_obj); 2477 sg_free_table(st); 2478 kfree(st); 2479 2480 /* shmemfs first checks if there is enough memory to allocate the page 2481 * and reports ENOSPC should there be insufficient, along with the usual 2482 * ENOMEM for a genuine allocation failure. 2483 * 2484 * We use ENOSPC in our driver to mean that we have run out of aperture 2485 * space and so want to translate the error from shmemfs back to our 2486 * usual understanding of ENOMEM. 2487 */ 2488 if (ret == -ENOSPC) 2489 ret = -ENOMEM; 2490 2491 return ret; 2492 } 2493 2494 /* Ensure that the associated pages are gathered from the backing storage 2495 * and pinned into our object. i915_gem_object_get_pages() may be called 2496 * multiple times before they are released by a single call to 2497 * i915_gem_object_put_pages() - once the pages are no longer referenced 2498 * either as a result of memory pressure (reaping pages under the shrinker) 2499 * or as the object is itself released. 2500 */ 2501 int 2502 i915_gem_object_get_pages(struct drm_i915_gem_object *obj) 2503 { 2504 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 2505 const struct drm_i915_gem_object_ops *ops = obj->ops; 2506 int ret; 2507 2508 if (obj->pages) 2509 return 0; 2510 2511 if (obj->madv != I915_MADV_WILLNEED) { 2512 DRM_DEBUG("Attempting to obtain a purgeable object\n"); 2513 return -EFAULT; 2514 } 2515 2516 BUG_ON(obj->pages_pin_count); 2517 2518 ret = ops->get_pages(obj); 2519 if (ret) 2520 return ret; 2521 2522 list_add_tail(&obj->global_list, &dev_priv->mm.unbound_list); 2523 2524 obj->get_page.sg = obj->pages->sgl; 2525 obj->get_page.last = 0; 2526 2527 return 0; 2528 } 2529 2530 void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj) 2531 { 2532 int ret; 2533 2534 lockdep_assert_held(&obj->base.dev->struct_mutex); 2535 2536 ret = i915_gem_object_get_pages(obj); 2537 if (ret) 2538 return ERR_PTR(ret); 2539 2540 i915_gem_object_pin_pages(obj); 2541 2542 if (obj->mapping == NULL) { 2543 struct vm_page **pages; 2544 2545 pages = NULL; 2546 if (obj->base.size == PAGE_SIZE) 2547 obj->mapping = kmap(sg_page(obj->pages->sgl)); 2548 else 2549 pages = drm_malloc_gfp(obj->base.size >> PAGE_SHIFT, 2550 sizeof(*pages), 2551 GFP_TEMPORARY); 2552 if (pages != NULL) { 2553 struct sg_page_iter sg_iter; 2554 int n; 2555 2556 n = 0; 2557 for_each_sg_page(obj->pages->sgl, &sg_iter, 2558 obj->pages->nents, 0) 2559 pages[n++] = sg_page_iter_page(&sg_iter); 2560 2561 obj->mapping = vmap(pages, n, 0, PAGE_KERNEL); 2562 drm_free_large(pages); 2563 } 2564 if (obj->mapping == NULL) { 2565 i915_gem_object_unpin_pages(obj); 2566 return ERR_PTR(-ENOMEM); 2567 } 2568 } 2569 2570 return obj->mapping; 2571 } 2572 2573 void i915_vma_move_to_active(struct i915_vma *vma, 2574 struct drm_i915_gem_request *req) 2575 { 2576 struct drm_i915_gem_object *obj = vma->obj; 2577 struct intel_engine_cs *engine; 2578 2579 engine = i915_gem_request_get_engine(req); 2580 2581 /* Add a reference if we're newly entering the active list. */ 2582 if (obj->active == 0) 2583 drm_gem_object_reference(&obj->base); 2584 obj->active |= intel_engine_flag(engine); 2585 2586 list_move_tail(&obj->engine_list[engine->id], &engine->active_list); 2587 i915_gem_request_assign(&obj->last_read_req[engine->id], req); 2588 2589 list_move_tail(&vma->vm_link, &vma->vm->active_list); 2590 } 2591 2592 static void 2593 i915_gem_object_retire__write(struct drm_i915_gem_object *obj) 2594 { 2595 GEM_BUG_ON(obj->last_write_req == NULL); 2596 GEM_BUG_ON(!(obj->active & intel_engine_flag(obj->last_write_req->engine))); 2597 2598 i915_gem_request_assign(&obj->last_write_req, NULL); 2599 intel_fb_obj_flush(obj, true, ORIGIN_CS); 2600 } 2601 2602 static void 2603 i915_gem_object_retire__read(struct drm_i915_gem_object *obj, int ring) 2604 { 2605 struct i915_vma *vma; 2606 2607 GEM_BUG_ON(obj->last_read_req[ring] == NULL); 2608 GEM_BUG_ON(!(obj->active & (1 << ring))); 2609 2610 list_del_init(&obj->engine_list[ring]); 2611 i915_gem_request_assign(&obj->last_read_req[ring], NULL); 2612 2613 if (obj->last_write_req && obj->last_write_req->engine->id == ring) 2614 i915_gem_object_retire__write(obj); 2615 2616 obj->active &= ~(1 << ring); 2617 if (obj->active) 2618 return; 2619 2620 /* Bump our place on the bound list to keep it roughly in LRU order 2621 * so that we don't steal from recently used but inactive objects 2622 * (unless we are forced to ofc!) 2623 */ 2624 list_move_tail(&obj->global_list, 2625 &to_i915(obj->base.dev)->mm.bound_list); 2626 2627 list_for_each_entry(vma, &obj->vma_list, obj_link) { 2628 if (!list_empty(&vma->vm_link)) 2629 list_move_tail(&vma->vm_link, &vma->vm->inactive_list); 2630 } 2631 2632 i915_gem_request_assign(&obj->last_fenced_req, NULL); 2633 drm_gem_object_unreference(&obj->base); 2634 } 2635 2636 static int 2637 i915_gem_init_seqno(struct drm_device *dev, u32 seqno) 2638 { 2639 struct drm_i915_private *dev_priv = dev->dev_private; 2640 struct intel_engine_cs *engine; 2641 int ret; 2642 2643 /* Carefully retire all requests without writing to the rings */ 2644 for_each_engine(engine, dev_priv) { 2645 ret = intel_engine_idle(engine); 2646 if (ret) 2647 return ret; 2648 } 2649 i915_gem_retire_requests(dev); 2650 2651 /* Finally reset hw state */ 2652 for_each_engine(engine, dev_priv) 2653 intel_ring_init_seqno(engine, seqno); 2654 2655 return 0; 2656 } 2657 2658 int i915_gem_set_seqno(struct drm_device *dev, u32 seqno) 2659 { 2660 struct drm_i915_private *dev_priv = dev->dev_private; 2661 int ret; 2662 2663 if (seqno == 0) 2664 return -EINVAL; 2665 2666 /* HWS page needs to be set less than what we 2667 * will inject to ring 2668 */ 2669 ret = i915_gem_init_seqno(dev, seqno - 1); 2670 if (ret) 2671 return ret; 2672 2673 /* Carefully set the last_seqno value so that wrap 2674 * detection still works 2675 */ 2676 dev_priv->next_seqno = seqno; 2677 dev_priv->last_seqno = seqno - 1; 2678 if (dev_priv->last_seqno == 0) 2679 dev_priv->last_seqno--; 2680 2681 return 0; 2682 } 2683 2684 int 2685 i915_gem_get_seqno(struct drm_device *dev, u32 *seqno) 2686 { 2687 struct drm_i915_private *dev_priv = dev->dev_private; 2688 2689 /* reserve 0 for non-seqno */ 2690 if (dev_priv->next_seqno == 0) { 2691 int ret = i915_gem_init_seqno(dev, 0); 2692 if (ret) 2693 return ret; 2694 2695 dev_priv->next_seqno = 1; 2696 } 2697 2698 *seqno = dev_priv->last_seqno = dev_priv->next_seqno++; 2699 return 0; 2700 } 2701 2702 /* 2703 * NB: This function is not allowed to fail. Doing so would mean the the 2704 * request is not being tracked for completion but the work itself is 2705 * going to happen on the hardware. This would be a Bad Thing(tm). 2706 */ 2707 void __i915_add_request(struct drm_i915_gem_request *request, 2708 struct drm_i915_gem_object *obj, 2709 bool flush_caches) 2710 { 2711 struct intel_engine_cs *engine; 2712 struct drm_i915_private *dev_priv; 2713 struct intel_ringbuffer *ringbuf; 2714 u32 request_start; 2715 int ret; 2716 2717 if (WARN_ON(request == NULL)) 2718 return; 2719 2720 engine = request->engine; 2721 dev_priv = request->i915; 2722 ringbuf = request->ringbuf; 2723 2724 /* 2725 * To ensure that this call will not fail, space for its emissions 2726 * should already have been reserved in the ring buffer. Let the ring 2727 * know that it is time to use that space up. 2728 */ 2729 intel_ring_reserved_space_use(ringbuf); 2730 2731 request_start = intel_ring_get_tail(ringbuf); 2732 /* 2733 * Emit any outstanding flushes - execbuf can fail to emit the flush 2734 * after having emitted the batchbuffer command. Hence we need to fix 2735 * things up similar to emitting the lazy request. The difference here 2736 * is that the flush _must_ happen before the next request, no matter 2737 * what. 2738 */ 2739 if (flush_caches) { 2740 if (i915.enable_execlists) 2741 ret = logical_ring_flush_all_caches(request); 2742 else 2743 ret = intel_ring_flush_all_caches(request); 2744 /* Not allowed to fail! */ 2745 WARN(ret, "*_ring_flush_all_caches failed: %d!\n", ret); 2746 } 2747 2748 trace_i915_gem_request_add(request); 2749 2750 request->head = request_start; 2751 2752 /* Whilst this request exists, batch_obj will be on the 2753 * active_list, and so will hold the active reference. Only when this 2754 * request is retired will the the batch_obj be moved onto the 2755 * inactive_list and lose its active reference. Hence we do not need 2756 * to explicitly hold another reference here. 2757 */ 2758 request->batch_obj = obj; 2759 2760 /* Seal the request and mark it as pending execution. Note that 2761 * we may inspect this state, without holding any locks, during 2762 * hangcheck. Hence we apply the barrier to ensure that we do not 2763 * see a more recent value in the hws than we are tracking. 2764 */ 2765 request->emitted_jiffies = jiffies; 2766 request->previous_seqno = engine->last_submitted_seqno; 2767 smp_store_mb(engine->last_submitted_seqno, request->seqno); 2768 list_add_tail(&request->list, &engine->request_list); 2769 2770 /* Record the position of the start of the request so that 2771 * should we detect the updated seqno part-way through the 2772 * GPU processing the request, we never over-estimate the 2773 * position of the head. 2774 */ 2775 request->postfix = intel_ring_get_tail(ringbuf); 2776 2777 if (i915.enable_execlists) 2778 ret = engine->emit_request(request); 2779 else { 2780 ret = engine->add_request(request); 2781 2782 request->tail = intel_ring_get_tail(ringbuf); 2783 } 2784 2785 /* Not allowed to fail! */ 2786 WARN(ret, "emit|add_request failed: %d!\n", ret); 2787 2788 i915_queue_hangcheck(engine->dev); 2789 2790 queue_delayed_work(dev_priv->wq, 2791 &dev_priv->mm.retire_work, 2792 round_jiffies_up_relative(HZ)); 2793 intel_mark_busy(dev_priv->dev); 2794 2795 /* Sanity check that the reserved size was large enough. */ 2796 intel_ring_reserved_space_end(ringbuf); 2797 } 2798 2799 static bool i915_context_is_banned(struct drm_i915_private *dev_priv, 2800 const struct intel_context *ctx) 2801 { 2802 unsigned long elapsed; 2803 2804 elapsed = get_seconds() - ctx->hang_stats.guilty_ts; 2805 2806 if (ctx->hang_stats.banned) 2807 return true; 2808 2809 if (ctx->hang_stats.ban_period_seconds && 2810 elapsed <= ctx->hang_stats.ban_period_seconds) { 2811 if (!i915_gem_context_is_default(ctx)) { 2812 DRM_DEBUG("context hanging too fast, banning!\n"); 2813 return true; 2814 } else if (i915_stop_ring_allow_ban(dev_priv)) { 2815 if (i915_stop_ring_allow_warn(dev_priv)) 2816 DRM_ERROR("gpu hanging too fast, banning!\n"); 2817 return true; 2818 } 2819 } 2820 2821 return false; 2822 } 2823 2824 static void i915_set_reset_status(struct drm_i915_private *dev_priv, 2825 struct intel_context *ctx, 2826 const bool guilty) 2827 { 2828 struct i915_ctx_hang_stats *hs; 2829 2830 if (WARN_ON(!ctx)) 2831 return; 2832 2833 hs = &ctx->hang_stats; 2834 2835 if (guilty) { 2836 hs->banned = i915_context_is_banned(dev_priv, ctx); 2837 hs->batch_active++; 2838 hs->guilty_ts = get_seconds(); 2839 } else { 2840 hs->batch_pending++; 2841 } 2842 } 2843 2844 void i915_gem_request_free(struct kref *req_ref) 2845 { 2846 struct drm_i915_gem_request *req = container_of(req_ref, 2847 typeof(*req), ref); 2848 struct intel_context *ctx = req->ctx; 2849 2850 if (req->file_priv) 2851 i915_gem_request_remove_from_client(req); 2852 2853 if (ctx) { 2854 if (i915.enable_execlists && ctx != req->i915->kernel_context) 2855 intel_lr_context_unpin(ctx, req->engine); 2856 2857 i915_gem_context_unreference(ctx); 2858 } 2859 2860 kfree(req); 2861 } 2862 2863 static inline int 2864 __i915_gem_request_alloc(struct intel_engine_cs *engine, 2865 struct intel_context *ctx, 2866 struct drm_i915_gem_request **req_out) 2867 { 2868 struct drm_i915_private *dev_priv = to_i915(engine->dev); 2869 unsigned reset_counter = i915_reset_counter(&dev_priv->gpu_error); 2870 struct drm_i915_gem_request *req; 2871 int ret; 2872 2873 if (!req_out) 2874 return -EINVAL; 2875 2876 *req_out = NULL; 2877 2878 /* ABI: Before userspace accesses the GPU (e.g. execbuffer), report 2879 * EIO if the GPU is already wedged, or EAGAIN to drop the struct_mutex 2880 * and restart. 2881 */ 2882 ret = i915_gem_check_wedge(reset_counter, dev_priv->mm.interruptible); 2883 if (ret) 2884 return ret; 2885 2886 req = kzalloc(sizeof(*req), GFP_KERNEL); 2887 if (req == NULL) 2888 return -ENOMEM; 2889 2890 ret = i915_gem_get_seqno(engine->dev, &req->seqno); 2891 if (ret) 2892 goto err; 2893 2894 kref_init(&req->ref); 2895 req->i915 = dev_priv; 2896 req->engine = engine; 2897 req->reset_counter = reset_counter; 2898 req->ctx = ctx; 2899 i915_gem_context_reference(req->ctx); 2900 2901 if (i915.enable_execlists) 2902 ret = intel_logical_ring_alloc_request_extras(req); 2903 else 2904 ret = intel_ring_alloc_request_extras(req); 2905 if (ret) { 2906 i915_gem_context_unreference(req->ctx); 2907 goto err; 2908 } 2909 2910 /* 2911 * Reserve space in the ring buffer for all the commands required to 2912 * eventually emit this request. This is to guarantee that the 2913 * i915_add_request() call can't fail. Note that the reserve may need 2914 * to be redone if the request is not actually submitted straight 2915 * away, e.g. because a GPU scheduler has deferred it. 2916 */ 2917 if (i915.enable_execlists) 2918 ret = intel_logical_ring_reserve_space(req); 2919 else 2920 ret = intel_ring_reserve_space(req); 2921 if (ret) { 2922 /* 2923 * At this point, the request is fully allocated even if not 2924 * fully prepared. Thus it can be cleaned up using the proper 2925 * free code. 2926 */ 2927 intel_ring_reserved_space_cancel(req->ringbuf); 2928 i915_gem_request_unreference(req); 2929 return ret; 2930 } 2931 2932 *req_out = req; 2933 return 0; 2934 2935 err: 2936 kfree(req); 2937 return ret; 2938 } 2939 2940 /** 2941 * i915_gem_request_alloc - allocate a request structure 2942 * 2943 * @engine: engine that we wish to issue the request on. 2944 * @ctx: context that the request will be associated with. 2945 * This can be NULL if the request is not directly related to 2946 * any specific user context, in which case this function will 2947 * choose an appropriate context to use. 2948 * 2949 * Returns a pointer to the allocated request if successful, 2950 * or an error code if not. 2951 */ 2952 struct drm_i915_gem_request * 2953 i915_gem_request_alloc(struct intel_engine_cs *engine, 2954 struct intel_context *ctx) 2955 { 2956 struct drm_i915_gem_request *req; 2957 int err; 2958 2959 if (ctx == NULL) 2960 ctx = to_i915(engine->dev)->kernel_context; 2961 err = __i915_gem_request_alloc(engine, ctx, &req); 2962 return err ? ERR_PTR(err) : req; 2963 } 2964 2965 struct drm_i915_gem_request * 2966 i915_gem_find_active_request(struct intel_engine_cs *engine) 2967 { 2968 struct drm_i915_gem_request *request; 2969 2970 list_for_each_entry(request, &engine->request_list, list) { 2971 if (i915_gem_request_completed(request, false)) 2972 continue; 2973 2974 return request; 2975 } 2976 2977 return NULL; 2978 } 2979 2980 static void i915_gem_reset_engine_status(struct drm_i915_private *dev_priv, 2981 struct intel_engine_cs *engine) 2982 { 2983 struct drm_i915_gem_request *request; 2984 bool ring_hung; 2985 2986 request = i915_gem_find_active_request(engine); 2987 2988 if (request == NULL) 2989 return; 2990 2991 ring_hung = engine->hangcheck.score >= HANGCHECK_SCORE_RING_HUNG; 2992 2993 i915_set_reset_status(dev_priv, request->ctx, ring_hung); 2994 2995 list_for_each_entry_continue(request, &engine->request_list, list) 2996 i915_set_reset_status(dev_priv, request->ctx, false); 2997 } 2998 2999 static void i915_gem_reset_engine_cleanup(struct drm_i915_private *dev_priv, 3000 struct intel_engine_cs *engine) 3001 { 3002 struct intel_ringbuffer *buffer; 3003 3004 while (!list_empty(&engine->active_list)) { 3005 struct drm_i915_gem_object *obj; 3006 3007 obj = list_first_entry(&engine->active_list, 3008 struct drm_i915_gem_object, 3009 engine_list[engine->id]); 3010 3011 i915_gem_object_retire__read(obj, engine->id); 3012 } 3013 3014 /* 3015 * Clear the execlists queue up before freeing the requests, as those 3016 * are the ones that keep the context and ringbuffer backing objects 3017 * pinned in place. 3018 */ 3019 3020 if (i915.enable_execlists) { 3021 /* Ensure irq handler finishes or is cancelled. */ 3022 tasklet_kill(&engine->irq_tasklet); 3023 3024 spin_lock_bh(&engine->execlist_lock); 3025 /* list_splice_tail_init checks for empty lists */ 3026 list_splice_tail_init(&engine->execlist_queue, 3027 &engine->execlist_retired_req_list); 3028 spin_unlock_bh(&engine->execlist_lock); 3029 3030 intel_execlists_retire_requests(engine); 3031 } 3032 3033 /* 3034 * We must free the requests after all the corresponding objects have 3035 * been moved off active lists. Which is the same order as the normal 3036 * retire_requests function does. This is important if object hold 3037 * implicit references on things like e.g. ppgtt address spaces through 3038 * the request. 3039 */ 3040 while (!list_empty(&engine->request_list)) { 3041 struct drm_i915_gem_request *request; 3042 3043 request = list_first_entry(&engine->request_list, 3044 struct drm_i915_gem_request, 3045 list); 3046 3047 i915_gem_request_retire(request); 3048 } 3049 3050 /* Having flushed all requests from all queues, we know that all 3051 * ringbuffers must now be empty. However, since we do not reclaim 3052 * all space when retiring the request (to prevent HEADs colliding 3053 * with rapid ringbuffer wraparound) the amount of available space 3054 * upon reset is less than when we start. Do one more pass over 3055 * all the ringbuffers to reset last_retired_head. 3056 */ 3057 list_for_each_entry(buffer, &engine->buffers, link) { 3058 buffer->last_retired_head = buffer->tail; 3059 intel_ring_update_space(buffer); 3060 } 3061 3062 intel_ring_init_seqno(engine, engine->last_submitted_seqno); 3063 } 3064 3065 void i915_gem_reset(struct drm_device *dev) 3066 { 3067 struct drm_i915_private *dev_priv = dev->dev_private; 3068 struct intel_engine_cs *engine; 3069 3070 /* 3071 * Before we free the objects from the requests, we need to inspect 3072 * them for finding the guilty party. As the requests only borrow 3073 * their reference to the objects, the inspection must be done first. 3074 */ 3075 for_each_engine(engine, dev_priv) 3076 i915_gem_reset_engine_status(dev_priv, engine); 3077 3078 for_each_engine(engine, dev_priv) 3079 i915_gem_reset_engine_cleanup(dev_priv, engine); 3080 3081 i915_gem_context_reset(dev); 3082 3083 i915_gem_restore_fences(dev); 3084 3085 WARN_ON(i915_verify_lists(dev)); 3086 } 3087 3088 /** 3089 * This function clears the request list as sequence numbers are passed. 3090 */ 3091 void 3092 i915_gem_retire_requests_ring(struct intel_engine_cs *engine) 3093 { 3094 WARN_ON(i915_verify_lists(engine->dev)); 3095 3096 /* Retire requests first as we use it above for the early return. 3097 * If we retire requests last, we may use a later seqno and so clear 3098 * the requests lists without clearing the active list, leading to 3099 * confusion. 3100 */ 3101 while (!list_empty(&engine->request_list)) { 3102 struct drm_i915_gem_request *request; 3103 3104 request = list_first_entry(&engine->request_list, 3105 struct drm_i915_gem_request, 3106 list); 3107 3108 if (!i915_gem_request_completed(request, true)) 3109 break; 3110 3111 i915_gem_request_retire(request); 3112 } 3113 3114 /* Move any buffers on the active list that are no longer referenced 3115 * by the ringbuffer to the flushing/inactive lists as appropriate, 3116 * before we free the context associated with the requests. 3117 */ 3118 while (!list_empty(&engine->active_list)) { 3119 struct drm_i915_gem_object *obj; 3120 3121 obj = list_first_entry(&engine->active_list, 3122 struct drm_i915_gem_object, 3123 engine_list[engine->id]); 3124 3125 if (!list_empty(&obj->last_read_req[engine->id]->list)) 3126 break; 3127 3128 i915_gem_object_retire__read(obj, engine->id); 3129 } 3130 3131 if (unlikely(engine->trace_irq_req && 3132 i915_gem_request_completed(engine->trace_irq_req, true))) { 3133 engine->irq_put(engine); 3134 i915_gem_request_assign(&engine->trace_irq_req, NULL); 3135 } 3136 3137 WARN_ON(i915_verify_lists(engine->dev)); 3138 } 3139 3140 bool 3141 i915_gem_retire_requests(struct drm_device *dev) 3142 { 3143 struct drm_i915_private *dev_priv = dev->dev_private; 3144 struct intel_engine_cs *engine; 3145 bool idle = true; 3146 3147 for_each_engine(engine, dev_priv) { 3148 i915_gem_retire_requests_ring(engine); 3149 idle &= list_empty(&engine->request_list); 3150 if (i915.enable_execlists) { 3151 spin_lock_bh(&engine->execlist_lock); 3152 idle &= list_empty(&engine->execlist_queue); 3153 spin_unlock_bh(&engine->execlist_lock); 3154 3155 intel_execlists_retire_requests(engine); 3156 } 3157 } 3158 3159 if (idle) 3160 mod_delayed_work(dev_priv->wq, 3161 &dev_priv->mm.idle_work, 3162 msecs_to_jiffies(100)); 3163 3164 return idle; 3165 } 3166 3167 static void 3168 i915_gem_retire_work_handler(struct work_struct *work) 3169 { 3170 struct drm_i915_private *dev_priv = 3171 container_of(work, typeof(*dev_priv), mm.retire_work.work); 3172 struct drm_device *dev = dev_priv->dev; 3173 bool idle; 3174 3175 /* Come back later if the device is busy... */ 3176 idle = false; 3177 if (mutex_trylock(&dev->struct_mutex)) { 3178 idle = i915_gem_retire_requests(dev); 3179 mutex_unlock(&dev->struct_mutex); 3180 } 3181 if (!idle) 3182 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 3183 round_jiffies_up_relative(HZ)); 3184 } 3185 3186 static void 3187 i915_gem_idle_work_handler(struct work_struct *work) 3188 { 3189 struct drm_i915_private *dev_priv = 3190 container_of(work, typeof(*dev_priv), mm.idle_work.work); 3191 struct drm_device *dev = dev_priv->dev; 3192 struct intel_engine_cs *engine; 3193 3194 for_each_engine(engine, dev_priv) 3195 if (!list_empty(&engine->request_list)) 3196 return; 3197 3198 /* we probably should sync with hangcheck here, using cancel_work_sync. 3199 * Also locking seems to be fubar here, engine->request_list is protected 3200 * by dev->struct_mutex. */ 3201 3202 intel_mark_idle(dev); 3203 3204 if (mutex_trylock(&dev->struct_mutex)) { 3205 for_each_engine(engine, dev_priv) 3206 i915_gem_batch_pool_fini(&engine->batch_pool); 3207 3208 mutex_unlock(&dev->struct_mutex); 3209 } 3210 } 3211 3212 /** 3213 * Ensures that an object will eventually get non-busy by flushing any required 3214 * write domains, emitting any outstanding lazy request and retiring and 3215 * completed requests. 3216 */ 3217 static int 3218 i915_gem_object_flush_active(struct drm_i915_gem_object *obj) 3219 { 3220 int i; 3221 3222 if (!obj->active) 3223 return 0; 3224 3225 for (i = 0; i < I915_NUM_ENGINES; i++) { 3226 struct drm_i915_gem_request *req; 3227 3228 req = obj->last_read_req[i]; 3229 if (req == NULL) 3230 continue; 3231 3232 if (list_empty(&req->list)) 3233 goto retire; 3234 3235 if (i915_gem_request_completed(req, true)) { 3236 __i915_gem_request_retire__upto(req); 3237 retire: 3238 i915_gem_object_retire__read(obj, i); 3239 } 3240 } 3241 3242 return 0; 3243 } 3244 3245 /** 3246 * i915_gem_wait_ioctl - implements DRM_IOCTL_I915_GEM_WAIT 3247 * @DRM_IOCTL_ARGS: standard ioctl arguments 3248 * 3249 * Returns 0 if successful, else an error is returned with the remaining time in 3250 * the timeout parameter. 3251 * -ETIME: object is still busy after timeout 3252 * -ERESTARTSYS: signal interrupted the wait 3253 * -ENONENT: object doesn't exist 3254 * Also possible, but rare: 3255 * -EAGAIN: GPU wedged 3256 * -ENOMEM: damn 3257 * -ENODEV: Internal IRQ fail 3258 * -E?: The add request failed 3259 * 3260 * The wait ioctl with a timeout of 0 reimplements the busy ioctl. With any 3261 * non-zero timeout parameter the wait ioctl will wait for the given number of 3262 * nanoseconds on an object becoming unbusy. Since the wait itself does so 3263 * without holding struct_mutex the object may become re-busied before this 3264 * function completes. A similar but shorter * race condition exists in the busy 3265 * ioctl 3266 */ 3267 int 3268 i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) 3269 { 3270 struct drm_i915_gem_wait *args = data; 3271 struct drm_i915_gem_object *obj; 3272 struct drm_i915_gem_request *req[I915_NUM_ENGINES]; 3273 int i, n = 0; 3274 int ret; 3275 3276 if (args->flags != 0) 3277 return -EINVAL; 3278 3279 ret = i915_mutex_lock_interruptible(dev); 3280 if (ret) 3281 return ret; 3282 3283 obj = to_intel_bo(drm_gem_object_lookup(file, args->bo_handle)); 3284 if (&obj->base == NULL) { 3285 mutex_unlock(&dev->struct_mutex); 3286 return -ENOENT; 3287 } 3288 3289 /* Need to make sure the object gets inactive eventually. */ 3290 ret = i915_gem_object_flush_active(obj); 3291 if (ret) 3292 goto out; 3293 3294 if (!obj->active) 3295 goto out; 3296 3297 /* Do this after OLR check to make sure we make forward progress polling 3298 * on this IOCTL with a timeout == 0 (like busy ioctl) 3299 */ 3300 if (args->timeout_ns == 0) { 3301 ret = -ETIME; 3302 goto out; 3303 } 3304 3305 drm_gem_object_unreference(&obj->base); 3306 3307 for (i = 0; i < I915_NUM_ENGINES; i++) { 3308 if (obj->last_read_req[i] == NULL) 3309 continue; 3310 3311 req[n++] = i915_gem_request_reference(obj->last_read_req[i]); 3312 } 3313 3314 mutex_unlock(&dev->struct_mutex); 3315 3316 for (i = 0; i < n; i++) { 3317 if (ret == 0) 3318 ret = __i915_wait_request(req[i], true, 3319 args->timeout_ns > 0 ? &args->timeout_ns : NULL, 3320 to_rps_client(file)); 3321 i915_gem_request_unreference__unlocked(req[i]); 3322 } 3323 return ret; 3324 3325 out: 3326 drm_gem_object_unreference(&obj->base); 3327 mutex_unlock(&dev->struct_mutex); 3328 return ret; 3329 } 3330 3331 static int 3332 __i915_gem_object_sync(struct drm_i915_gem_object *obj, 3333 struct intel_engine_cs *to, 3334 struct drm_i915_gem_request *from_req, 3335 struct drm_i915_gem_request **to_req) 3336 { 3337 struct intel_engine_cs *from; 3338 int ret; 3339 3340 from = i915_gem_request_get_engine(from_req); 3341 if (to == from) 3342 return 0; 3343 3344 if (i915_gem_request_completed(from_req, true)) 3345 return 0; 3346 3347 if (!i915_semaphore_is_enabled(obj->base.dev)) { 3348 struct drm_i915_private *i915 = to_i915(obj->base.dev); 3349 ret = __i915_wait_request(from_req, 3350 i915->mm.interruptible, 3351 NULL, 3352 &i915->rps.semaphores); 3353 if (ret) 3354 return ret; 3355 3356 i915_gem_object_retire_request(obj, from_req); 3357 } else { 3358 int idx = intel_ring_sync_index(from, to); 3359 u32 seqno = i915_gem_request_get_seqno(from_req); 3360 3361 WARN_ON(!to_req); 3362 3363 if (seqno <= from->semaphore.sync_seqno[idx]) 3364 return 0; 3365 3366 if (*to_req == NULL) { 3367 struct drm_i915_gem_request *req; 3368 3369 req = i915_gem_request_alloc(to, NULL); 3370 if (IS_ERR(req)) 3371 return PTR_ERR(req); 3372 3373 *to_req = req; 3374 } 3375 3376 trace_i915_gem_ring_sync_to(*to_req, from, from_req); 3377 ret = to->semaphore.sync_to(*to_req, from, seqno); 3378 if (ret) 3379 return ret; 3380 3381 /* We use last_read_req because sync_to() 3382 * might have just caused seqno wrap under 3383 * the radar. 3384 */ 3385 from->semaphore.sync_seqno[idx] = 3386 i915_gem_request_get_seqno(obj->last_read_req[from->id]); 3387 } 3388 3389 return 0; 3390 } 3391 3392 /** 3393 * i915_gem_object_sync - sync an object to a ring. 3394 * 3395 * @obj: object which may be in use on another ring. 3396 * @to: ring we wish to use the object on. May be NULL. 3397 * @to_req: request we wish to use the object for. See below. 3398 * This will be allocated and returned if a request is 3399 * required but not passed in. 3400 * 3401 * This code is meant to abstract object synchronization with the GPU. 3402 * Calling with NULL implies synchronizing the object with the CPU 3403 * rather than a particular GPU ring. Conceptually we serialise writes 3404 * between engines inside the GPU. We only allow one engine to write 3405 * into a buffer at any time, but multiple readers. To ensure each has 3406 * a coherent view of memory, we must: 3407 * 3408 * - If there is an outstanding write request to the object, the new 3409 * request must wait for it to complete (either CPU or in hw, requests 3410 * on the same ring will be naturally ordered). 3411 * 3412 * - If we are a write request (pending_write_domain is set), the new 3413 * request must wait for outstanding read requests to complete. 3414 * 3415 * For CPU synchronisation (NULL to) no request is required. For syncing with 3416 * rings to_req must be non-NULL. However, a request does not have to be 3417 * pre-allocated. If *to_req is NULL and sync commands will be emitted then a 3418 * request will be allocated automatically and returned through *to_req. Note 3419 * that it is not guaranteed that commands will be emitted (because the system 3420 * might already be idle). Hence there is no need to create a request that 3421 * might never have any work submitted. Note further that if a request is 3422 * returned in *to_req, it is the responsibility of the caller to submit 3423 * that request (after potentially adding more work to it). 3424 * 3425 * Returns 0 if successful, else propagates up the lower layer error. 3426 */ 3427 int 3428 i915_gem_object_sync(struct drm_i915_gem_object *obj, 3429 struct intel_engine_cs *to, 3430 struct drm_i915_gem_request **to_req) 3431 { 3432 const bool readonly = obj->base.pending_write_domain == 0; 3433 struct drm_i915_gem_request *req[I915_NUM_ENGINES]; 3434 int ret, i, n; 3435 3436 if (!obj->active) 3437 return 0; 3438 3439 if (to == NULL) 3440 return i915_gem_object_wait_rendering(obj, readonly); 3441 3442 n = 0; 3443 if (readonly) { 3444 if (obj->last_write_req) 3445 req[n++] = obj->last_write_req; 3446 } else { 3447 for (i = 0; i < I915_NUM_ENGINES; i++) 3448 if (obj->last_read_req[i]) 3449 req[n++] = obj->last_read_req[i]; 3450 } 3451 for (i = 0; i < n; i++) { 3452 ret = __i915_gem_object_sync(obj, to, req[i], to_req); 3453 if (ret) 3454 return ret; 3455 } 3456 3457 return 0; 3458 } 3459 3460 static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj) 3461 { 3462 u32 old_write_domain, old_read_domains; 3463 3464 /* Force a pagefault for domain tracking on next user access */ 3465 i915_gem_release_mmap(obj); 3466 3467 if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0) 3468 return; 3469 3470 old_read_domains = obj->base.read_domains; 3471 old_write_domain = obj->base.write_domain; 3472 3473 obj->base.read_domains &= ~I915_GEM_DOMAIN_GTT; 3474 obj->base.write_domain &= ~I915_GEM_DOMAIN_GTT; 3475 3476 trace_i915_gem_object_change_domain(obj, 3477 old_read_domains, 3478 old_write_domain); 3479 } 3480 3481 static int __i915_vma_unbind(struct i915_vma *vma, bool wait) 3482 { 3483 struct drm_i915_gem_object *obj = vma->obj; 3484 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 3485 int ret; 3486 3487 if (list_empty(&vma->obj_link)) 3488 return 0; 3489 3490 if (!drm_mm_node_allocated(&vma->node)) { 3491 i915_gem_vma_destroy(vma); 3492 return 0; 3493 } 3494 3495 if (vma->pin_count) 3496 return -EBUSY; 3497 3498 BUG_ON(obj->pages == NULL); 3499 3500 if (wait) { 3501 ret = i915_gem_object_wait_rendering(obj, false); 3502 if (ret) 3503 return ret; 3504 } 3505 3506 if (vma->is_ggtt && vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) { 3507 i915_gem_object_finish_gtt(obj); 3508 3509 /* release the fence reg _after_ flushing */ 3510 ret = i915_gem_object_put_fence(obj); 3511 if (ret) 3512 return ret; 3513 } 3514 3515 trace_i915_vma_unbind(vma); 3516 3517 vma->vm->unbind_vma(vma); 3518 vma->bound = 0; 3519 3520 list_del_init(&vma->vm_link); 3521 if (vma->is_ggtt) { 3522 if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) { 3523 obj->map_and_fenceable = false; 3524 } else if (vma->ggtt_view.pages) { 3525 sg_free_table(vma->ggtt_view.pages); 3526 kfree(vma->ggtt_view.pages); 3527 } 3528 vma->ggtt_view.pages = NULL; 3529 } 3530 3531 drm_mm_remove_node(&vma->node); 3532 i915_gem_vma_destroy(vma); 3533 3534 /* Since the unbound list is global, only move to that list if 3535 * no more VMAs exist. */ 3536 if (list_empty(&obj->vma_list)) 3537 list_move_tail(&obj->global_list, &dev_priv->mm.unbound_list); 3538 3539 /* And finally now the object is completely decoupled from this vma, 3540 * we can drop its hold on the backing storage and allow it to be 3541 * reaped by the shrinker. 3542 */ 3543 i915_gem_object_unpin_pages(obj); 3544 3545 return 0; 3546 } 3547 3548 int i915_vma_unbind(struct i915_vma *vma) 3549 { 3550 return __i915_vma_unbind(vma, true); 3551 } 3552 3553 int __i915_vma_unbind_no_wait(struct i915_vma *vma) 3554 { 3555 return __i915_vma_unbind(vma, false); 3556 } 3557 3558 int i915_gpu_idle(struct drm_device *dev) 3559 { 3560 struct drm_i915_private *dev_priv = dev->dev_private; 3561 struct intel_engine_cs *engine; 3562 int ret; 3563 3564 /* Flush everything onto the inactive list. */ 3565 for_each_engine(engine, dev_priv) { 3566 if (!i915.enable_execlists) { 3567 struct drm_i915_gem_request *req; 3568 3569 req = i915_gem_request_alloc(engine, NULL); 3570 if (IS_ERR(req)) 3571 return PTR_ERR(req); 3572 3573 ret = i915_switch_context(req); 3574 i915_add_request_no_flush(req); 3575 if (ret) 3576 return ret; 3577 } 3578 3579 ret = intel_engine_idle(engine); 3580 if (ret) 3581 return ret; 3582 } 3583 3584 WARN_ON(i915_verify_lists(dev)); 3585 return 0; 3586 } 3587 3588 static bool i915_gem_valid_gtt_space(struct i915_vma *vma, 3589 unsigned long cache_level) 3590 { 3591 struct drm_mm_node *gtt_space = &vma->node; 3592 struct drm_mm_node *other; 3593 3594 /* 3595 * On some machines we have to be careful when putting differing types 3596 * of snoopable memory together to avoid the prefetcher crossing memory 3597 * domains and dying. During vm initialisation, we decide whether or not 3598 * these constraints apply and set the drm_mm.color_adjust 3599 * appropriately. 3600 */ 3601 if (vma->vm->mm.color_adjust == NULL) 3602 return true; 3603 3604 if (!drm_mm_node_allocated(gtt_space)) 3605 return true; 3606 3607 if (list_empty(>t_space->node_list)) 3608 return true; 3609 3610 other = list_entry(gtt_space->node_list.prev, struct drm_mm_node, node_list); 3611 if (other->allocated && !other->hole_follows && other->color != cache_level) 3612 return false; 3613 3614 other = list_entry(gtt_space->node_list.next, struct drm_mm_node, node_list); 3615 if (other->allocated && !gtt_space->hole_follows && other->color != cache_level) 3616 return false; 3617 3618 return true; 3619 } 3620 3621 /** 3622 * Finds free space in the GTT aperture and binds the object or a view of it 3623 * there. 3624 */ 3625 static struct i915_vma * 3626 i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj, 3627 struct i915_address_space *vm, 3628 const struct i915_ggtt_view *ggtt_view, 3629 unsigned alignment, 3630 uint64_t flags) 3631 { 3632 struct drm_device *dev = obj->base.dev; 3633 struct drm_i915_private *dev_priv = to_i915(dev); 3634 struct i915_ggtt *ggtt = &dev_priv->ggtt; 3635 u32 fence_alignment, unfenced_alignment; 3636 u32 search_flag, alloc_flag; 3637 u64 start, end; 3638 u64 size, fence_size; 3639 struct i915_vma *vma; 3640 int ret; 3641 3642 if (i915_is_ggtt(vm)) { 3643 u32 view_size; 3644 3645 if (WARN_ON(!ggtt_view)) 3646 return ERR_PTR(-EINVAL); 3647 3648 view_size = i915_ggtt_view_size(obj, ggtt_view); 3649 3650 fence_size = i915_gem_get_gtt_size(dev, 3651 view_size, 3652 obj->tiling_mode); 3653 fence_alignment = i915_gem_get_gtt_alignment(dev, 3654 view_size, 3655 obj->tiling_mode, 3656 true); 3657 unfenced_alignment = i915_gem_get_gtt_alignment(dev, 3658 view_size, 3659 obj->tiling_mode, 3660 false); 3661 size = flags & PIN_MAPPABLE ? fence_size : view_size; 3662 } else { 3663 fence_size = i915_gem_get_gtt_size(dev, 3664 obj->base.size, 3665 obj->tiling_mode); 3666 fence_alignment = i915_gem_get_gtt_alignment(dev, 3667 obj->base.size, 3668 obj->tiling_mode, 3669 true); 3670 unfenced_alignment = 3671 i915_gem_get_gtt_alignment(dev, 3672 obj->base.size, 3673 obj->tiling_mode, 3674 false); 3675 size = flags & PIN_MAPPABLE ? fence_size : obj->base.size; 3676 } 3677 3678 start = flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0; 3679 end = vm->total; 3680 if (flags & PIN_MAPPABLE) 3681 end = min_t(u64, end, ggtt->mappable_end); 3682 if (flags & PIN_ZONE_4G) 3683 end = min_t(u64, end, (1ULL << 32) - PAGE_SIZE); 3684 3685 if (alignment == 0) 3686 alignment = flags & PIN_MAPPABLE ? fence_alignment : 3687 unfenced_alignment; 3688 if (flags & PIN_MAPPABLE && alignment & (fence_alignment - 1)) { 3689 DRM_DEBUG("Invalid object (view type=%u) alignment requested %u\n", 3690 ggtt_view ? ggtt_view->type : 0, 3691 alignment); 3692 return ERR_PTR(-EINVAL); 3693 } 3694 3695 /* If binding the object/GGTT view requires more space than the entire 3696 * aperture has, reject it early before evicting everything in a vain 3697 * attempt to find space. 3698 */ 3699 if (size > end) { 3700 DRM_DEBUG("Attempting to bind an object (view type=%u) larger than the aperture: size=%llu > %s aperture=%llu\n", 3701 ggtt_view ? ggtt_view->type : 0, 3702 size, 3703 flags & PIN_MAPPABLE ? "mappable" : "total", 3704 end); 3705 return ERR_PTR(-E2BIG); 3706 } 3707 3708 ret = i915_gem_object_get_pages(obj); 3709 if (ret) 3710 return ERR_PTR(ret); 3711 3712 i915_gem_object_pin_pages(obj); 3713 3714 vma = ggtt_view ? i915_gem_obj_lookup_or_create_ggtt_vma(obj, ggtt_view) : 3715 i915_gem_obj_lookup_or_create_vma(obj, vm); 3716 3717 if (IS_ERR(vma)) 3718 goto err_unpin; 3719 3720 if (flags & PIN_OFFSET_FIXED) { 3721 uint64_t offset = flags & PIN_OFFSET_MASK; 3722 3723 if (offset & (alignment - 1) || offset + size > end) { 3724 ret = -EINVAL; 3725 goto err_free_vma; 3726 } 3727 vma->node.start = offset; 3728 vma->node.size = size; 3729 vma->node.color = obj->cache_level; 3730 ret = drm_mm_reserve_node(&vm->mm, &vma->node); 3731 if (ret) { 3732 ret = i915_gem_evict_for_vma(vma); 3733 if (ret == 0) 3734 ret = drm_mm_reserve_node(&vm->mm, &vma->node); 3735 } 3736 if (ret) 3737 goto err_free_vma; 3738 } else { 3739 if (flags & PIN_HIGH) { 3740 search_flag = DRM_MM_SEARCH_BELOW; 3741 alloc_flag = DRM_MM_CREATE_TOP; 3742 } else { 3743 search_flag = DRM_MM_SEARCH_DEFAULT; 3744 alloc_flag = DRM_MM_CREATE_DEFAULT; 3745 } 3746 3747 search_free: 3748 ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node, 3749 size, alignment, 3750 obj->cache_level, 3751 start, end, 3752 search_flag, 3753 alloc_flag); 3754 if (ret) { 3755 ret = i915_gem_evict_something(dev, vm, size, alignment, 3756 obj->cache_level, 3757 start, end, 3758 flags); 3759 if (ret == 0) 3760 goto search_free; 3761 3762 goto err_free_vma; 3763 } 3764 } 3765 if (WARN_ON(!i915_gem_valid_gtt_space(vma, obj->cache_level))) { 3766 ret = -EINVAL; 3767 goto err_remove_node; 3768 } 3769 3770 trace_i915_vma_bind(vma, flags); 3771 ret = i915_vma_bind(vma, obj->cache_level, flags); 3772 if (ret) 3773 goto err_remove_node; 3774 3775 list_move_tail(&obj->global_list, &dev_priv->mm.bound_list); 3776 list_add_tail(&vma->vm_link, &vm->inactive_list); 3777 3778 return vma; 3779 3780 err_remove_node: 3781 drm_mm_remove_node(&vma->node); 3782 err_free_vma: 3783 i915_gem_vma_destroy(vma); 3784 vma = ERR_PTR(ret); 3785 err_unpin: 3786 i915_gem_object_unpin_pages(obj); 3787 return vma; 3788 } 3789 3790 bool 3791 i915_gem_clflush_object(struct drm_i915_gem_object *obj, 3792 bool force) 3793 { 3794 /* If we don't have a page list set up, then we're not pinned 3795 * to GPU, and we can ignore the cache flush because it'll happen 3796 * again at bind time. 3797 */ 3798 if (obj->pages == NULL) 3799 return false; 3800 3801 /* 3802 * Stolen memory is always coherent with the GPU as it is explicitly 3803 * marked as wc by the system, or the system is cache-coherent. 3804 */ 3805 if (obj->stolen || obj->phys_handle) 3806 return false; 3807 3808 /* If the GPU is snooping the contents of the CPU cache, 3809 * we do not need to manually clear the CPU cache lines. However, 3810 * the caches are only snooped when the render cache is 3811 * flushed/invalidated. As we always have to emit invalidations 3812 * and flushes when moving into and out of the RENDER domain, correct 3813 * snooping behaviour occurs naturally as the result of our domain 3814 * tracking. 3815 */ 3816 if (!force && cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) { 3817 obj->cache_dirty = true; 3818 return false; 3819 } 3820 3821 trace_i915_gem_object_clflush(obj); 3822 drm_clflush_sg(obj->pages); 3823 obj->cache_dirty = false; 3824 3825 return true; 3826 } 3827 3828 /** Flushes the GTT write domain for the object if it's dirty. */ 3829 static void 3830 i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj) 3831 { 3832 uint32_t old_write_domain; 3833 3834 if (obj->base.write_domain != I915_GEM_DOMAIN_GTT) 3835 return; 3836 3837 /* No actual flushing is required for the GTT write domain. Writes 3838 * to it immediately go to main memory as far as we know, so there's 3839 * no chipset flush. It also doesn't land in render cache. 3840 * 3841 * However, we do have to enforce the order so that all writes through 3842 * the GTT land before any writes to the device, such as updates to 3843 * the GATT itself. 3844 */ 3845 wmb(); 3846 3847 old_write_domain = obj->base.write_domain; 3848 obj->base.write_domain = 0; 3849 3850 intel_fb_obj_flush(obj, false, ORIGIN_GTT); 3851 3852 trace_i915_gem_object_change_domain(obj, 3853 obj->base.read_domains, 3854 old_write_domain); 3855 } 3856 3857 /** Flushes the CPU write domain for the object if it's dirty. */ 3858 static void 3859 i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj) 3860 { 3861 uint32_t old_write_domain; 3862 3863 if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) 3864 return; 3865 3866 if (i915_gem_clflush_object(obj, obj->pin_display)) 3867 i915_gem_chipset_flush(obj->base.dev); 3868 3869 old_write_domain = obj->base.write_domain; 3870 obj->base.write_domain = 0; 3871 3872 intel_fb_obj_flush(obj, false, ORIGIN_CPU); 3873 3874 trace_i915_gem_object_change_domain(obj, 3875 obj->base.read_domains, 3876 old_write_domain); 3877 } 3878 3879 /** 3880 * Moves a single object to the GTT read, and possibly write domain. 3881 * 3882 * This function returns when the move is complete, including waiting on 3883 * flushes to occur. 3884 */ 3885 int 3886 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) 3887 { 3888 struct drm_device *dev = obj->base.dev; 3889 struct drm_i915_private *dev_priv = to_i915(dev); 3890 struct i915_ggtt *ggtt = &dev_priv->ggtt; 3891 uint32_t old_write_domain, old_read_domains; 3892 struct i915_vma *vma; 3893 int ret; 3894 3895 if (obj->base.write_domain == I915_GEM_DOMAIN_GTT) 3896 return 0; 3897 3898 ret = i915_gem_object_wait_rendering(obj, !write); 3899 if (ret) 3900 return ret; 3901 3902 /* Flush and acquire obj->pages so that we are coherent through 3903 * direct access in memory with previous cached writes through 3904 * shmemfs and that our cache domain tracking remains valid. 3905 * For example, if the obj->filp was moved to swap without us 3906 * being notified and releasing the pages, we would mistakenly 3907 * continue to assume that the obj remained out of the CPU cached 3908 * domain. 3909 */ 3910 ret = i915_gem_object_get_pages(obj); 3911 if (ret) 3912 return ret; 3913 3914 i915_gem_object_flush_cpu_write_domain(obj); 3915 3916 /* Serialise direct access to this object with the barriers for 3917 * coherent writes from the GPU, by effectively invalidating the 3918 * GTT domain upon first access. 3919 */ 3920 if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0) 3921 mb(); 3922 3923 old_write_domain = obj->base.write_domain; 3924 old_read_domains = obj->base.read_domains; 3925 3926 /* It should now be out of any other write domains, and we can update 3927 * the domain values for our changes. 3928 */ 3929 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_GTT) != 0); 3930 obj->base.read_domains |= I915_GEM_DOMAIN_GTT; 3931 if (write) { 3932 obj->base.read_domains = I915_GEM_DOMAIN_GTT; 3933 obj->base.write_domain = I915_GEM_DOMAIN_GTT; 3934 obj->dirty = 1; 3935 } 3936 3937 trace_i915_gem_object_change_domain(obj, 3938 old_read_domains, 3939 old_write_domain); 3940 3941 /* And bump the LRU for this access */ 3942 vma = i915_gem_obj_to_ggtt(obj); 3943 if (vma && drm_mm_node_allocated(&vma->node) && !obj->active) 3944 list_move_tail(&vma->vm_link, 3945 &ggtt->base.inactive_list); 3946 3947 return 0; 3948 } 3949 3950 /** 3951 * Changes the cache-level of an object across all VMA. 3952 * 3953 * After this function returns, the object will be in the new cache-level 3954 * across all GTT and the contents of the backing storage will be coherent, 3955 * with respect to the new cache-level. In order to keep the backing storage 3956 * coherent for all users, we only allow a single cache level to be set 3957 * globally on the object and prevent it from being changed whilst the 3958 * hardware is reading from the object. That is if the object is currently 3959 * on the scanout it will be set to uncached (or equivalent display 3960 * cache coherency) and all non-MOCS GPU access will also be uncached so 3961 * that all direct access to the scanout remains coherent. 3962 */ 3963 int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, 3964 enum i915_cache_level cache_level) 3965 { 3966 struct drm_device *dev = obj->base.dev; 3967 struct i915_vma *vma, *next; 3968 bool bound = false; 3969 int ret = 0; 3970 3971 if (obj->cache_level == cache_level) 3972 goto out; 3973 3974 /* Inspect the list of currently bound VMA and unbind any that would 3975 * be invalid given the new cache-level. This is principally to 3976 * catch the issue of the CS prefetch crossing page boundaries and 3977 * reading an invalid PTE on older architectures. 3978 */ 3979 list_for_each_entry_safe(vma, next, &obj->vma_list, obj_link) { 3980 if (!drm_mm_node_allocated(&vma->node)) 3981 continue; 3982 3983 if (vma->pin_count) { 3984 DRM_DEBUG("can not change the cache level of pinned objects\n"); 3985 return -EBUSY; 3986 } 3987 3988 if (!i915_gem_valid_gtt_space(vma, cache_level)) { 3989 ret = i915_vma_unbind(vma); 3990 if (ret) 3991 return ret; 3992 } else 3993 bound = true; 3994 } 3995 3996 /* We can reuse the existing drm_mm nodes but need to change the 3997 * cache-level on the PTE. We could simply unbind them all and 3998 * rebind with the correct cache-level on next use. However since 3999 * we already have a valid slot, dma mapping, pages etc, we may as 4000 * rewrite the PTE in the belief that doing so tramples upon less 4001 * state and so involves less work. 4002 */ 4003 if (bound) { 4004 /* Before we change the PTE, the GPU must not be accessing it. 4005 * If we wait upon the object, we know that all the bound 4006 * VMA are no longer active. 4007 */ 4008 ret = i915_gem_object_wait_rendering(obj, false); 4009 if (ret) 4010 return ret; 4011 4012 if (!HAS_LLC(dev) && cache_level != I915_CACHE_NONE) { 4013 /* Access to snoopable pages through the GTT is 4014 * incoherent and on some machines causes a hard 4015 * lockup. Relinquish the CPU mmaping to force 4016 * userspace to refault in the pages and we can 4017 * then double check if the GTT mapping is still 4018 * valid for that pointer access. 4019 */ 4020 i915_gem_release_mmap(obj); 4021 4022 /* As we no longer need a fence for GTT access, 4023 * we can relinquish it now (and so prevent having 4024 * to steal a fence from someone else on the next 4025 * fence request). Note GPU activity would have 4026 * dropped the fence as all snoopable access is 4027 * supposed to be linear. 4028 */ 4029 ret = i915_gem_object_put_fence(obj); 4030 if (ret) 4031 return ret; 4032 } else { 4033 /* We either have incoherent backing store and 4034 * so no GTT access or the architecture is fully 4035 * coherent. In such cases, existing GTT mmaps 4036 * ignore the cache bit in the PTE and we can 4037 * rewrite it without confusing the GPU or having 4038 * to force userspace to fault back in its mmaps. 4039 */ 4040 } 4041 4042 list_for_each_entry(vma, &obj->vma_list, obj_link) { 4043 if (!drm_mm_node_allocated(&vma->node)) 4044 continue; 4045 4046 ret = i915_vma_bind(vma, cache_level, PIN_UPDATE); 4047 if (ret) 4048 return ret; 4049 } 4050 } 4051 4052 list_for_each_entry(vma, &obj->vma_list, obj_link) 4053 vma->node.color = cache_level; 4054 obj->cache_level = cache_level; 4055 4056 out: 4057 /* Flush the dirty CPU caches to the backing storage so that the 4058 * object is now coherent at its new cache level (with respect 4059 * to the access domain). 4060 */ 4061 if (obj->cache_dirty && 4062 obj->base.write_domain != I915_GEM_DOMAIN_CPU && 4063 cpu_write_needs_clflush(obj)) { 4064 if (i915_gem_clflush_object(obj, true)) 4065 i915_gem_chipset_flush(obj->base.dev); 4066 } 4067 4068 return 0; 4069 } 4070 4071 int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data, 4072 struct drm_file *file) 4073 { 4074 struct drm_i915_gem_caching *args = data; 4075 struct drm_i915_gem_object *obj; 4076 4077 obj = to_intel_bo(drm_gem_object_lookup(file, args->handle)); 4078 if (&obj->base == NULL) 4079 return -ENOENT; 4080 4081 switch (obj->cache_level) { 4082 case I915_CACHE_LLC: 4083 case I915_CACHE_L3_LLC: 4084 args->caching = I915_CACHING_CACHED; 4085 break; 4086 4087 case I915_CACHE_WT: 4088 args->caching = I915_CACHING_DISPLAY; 4089 break; 4090 4091 default: 4092 args->caching = I915_CACHING_NONE; 4093 break; 4094 } 4095 4096 drm_gem_object_unreference_unlocked(&obj->base); 4097 return 0; 4098 } 4099 4100 int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data, 4101 struct drm_file *file) 4102 { 4103 struct drm_i915_private *dev_priv = dev->dev_private; 4104 struct drm_i915_gem_caching *args = data; 4105 struct drm_i915_gem_object *obj; 4106 enum i915_cache_level level; 4107 int ret; 4108 4109 switch (args->caching) { 4110 case I915_CACHING_NONE: 4111 level = I915_CACHE_NONE; 4112 break; 4113 case I915_CACHING_CACHED: 4114 /* 4115 * Due to a HW issue on BXT A stepping, GPU stores via a 4116 * snooped mapping may leave stale data in a corresponding CPU 4117 * cacheline, whereas normally such cachelines would get 4118 * invalidated. 4119 */ 4120 if (!HAS_LLC(dev) && !HAS_SNOOP(dev)) 4121 return -ENODEV; 4122 4123 level = I915_CACHE_LLC; 4124 break; 4125 case I915_CACHING_DISPLAY: 4126 level = HAS_WT(dev) ? I915_CACHE_WT : I915_CACHE_NONE; 4127 break; 4128 default: 4129 return -EINVAL; 4130 } 4131 4132 intel_runtime_pm_get(dev_priv); 4133 4134 ret = i915_mutex_lock_interruptible(dev); 4135 if (ret) 4136 goto rpm_put; 4137 4138 obj = to_intel_bo(drm_gem_object_lookup(file, args->handle)); 4139 if (&obj->base == NULL) { 4140 ret = -ENOENT; 4141 goto unlock; 4142 } 4143 4144 ret = i915_gem_object_set_cache_level(obj, level); 4145 4146 drm_gem_object_unreference(&obj->base); 4147 unlock: 4148 mutex_unlock(&dev->struct_mutex); 4149 rpm_put: 4150 intel_runtime_pm_put(dev_priv); 4151 4152 return ret; 4153 } 4154 4155 /* 4156 * Prepare buffer for display plane (scanout, cursors, etc). 4157 * Can be called from an uninterruptible phase (modesetting) and allows 4158 * any flushes to be pipelined (for pageflips). 4159 */ 4160 int 4161 i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, 4162 u32 alignment, 4163 const struct i915_ggtt_view *view) 4164 { 4165 u32 old_read_domains, old_write_domain; 4166 int ret; 4167 4168 /* Mark the pin_display early so that we account for the 4169 * display coherency whilst setting up the cache domains. 4170 */ 4171 obj->pin_display++; 4172 4173 /* The display engine is not coherent with the LLC cache on gen6. As 4174 * a result, we make sure that the pinning that is about to occur is 4175 * done with uncached PTEs. This is lowest common denominator for all 4176 * chipsets. 4177 * 4178 * However for gen6+, we could do better by using the GFDT bit instead 4179 * of uncaching, which would allow us to flush all the LLC-cached data 4180 * with that bit in the PTE to main memory with just one PIPE_CONTROL. 4181 */ 4182 ret = i915_gem_object_set_cache_level(obj, 4183 HAS_WT(obj->base.dev) ? I915_CACHE_WT : I915_CACHE_NONE); 4184 if (ret) 4185 goto err_unpin_display; 4186 4187 /* As the user may map the buffer once pinned in the display plane 4188 * (e.g. libkms for the bootup splash), we have to ensure that we 4189 * always use map_and_fenceable for all scanout buffers. 4190 */ 4191 ret = i915_gem_object_ggtt_pin(obj, view, alignment, 4192 view->type == I915_GGTT_VIEW_NORMAL ? 4193 PIN_MAPPABLE : 0); 4194 if (ret) 4195 goto err_unpin_display; 4196 4197 i915_gem_object_flush_cpu_write_domain(obj); 4198 4199 old_write_domain = obj->base.write_domain; 4200 old_read_domains = obj->base.read_domains; 4201 4202 /* It should now be out of any other write domains, and we can update 4203 * the domain values for our changes. 4204 */ 4205 obj->base.write_domain = 0; 4206 obj->base.read_domains |= I915_GEM_DOMAIN_GTT; 4207 4208 trace_i915_gem_object_change_domain(obj, 4209 old_read_domains, 4210 old_write_domain); 4211 4212 return 0; 4213 4214 err_unpin_display: 4215 obj->pin_display--; 4216 return ret; 4217 } 4218 4219 void 4220 i915_gem_object_unpin_from_display_plane(struct drm_i915_gem_object *obj, 4221 const struct i915_ggtt_view *view) 4222 { 4223 if (WARN_ON(obj->pin_display == 0)) 4224 return; 4225 4226 i915_gem_object_ggtt_unpin_view(obj, view); 4227 4228 obj->pin_display--; 4229 } 4230 4231 /** 4232 * Moves a single object to the CPU read, and possibly write domain. 4233 * 4234 * This function returns when the move is complete, including waiting on 4235 * flushes to occur. 4236 */ 4237 int 4238 i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) 4239 { 4240 uint32_t old_write_domain, old_read_domains; 4241 int ret; 4242 4243 if (obj->base.write_domain == I915_GEM_DOMAIN_CPU) 4244 return 0; 4245 4246 ret = i915_gem_object_wait_rendering(obj, !write); 4247 if (ret) 4248 return ret; 4249 4250 i915_gem_object_flush_gtt_write_domain(obj); 4251 4252 old_write_domain = obj->base.write_domain; 4253 old_read_domains = obj->base.read_domains; 4254 4255 /* Flush the CPU cache if it's still invalid. */ 4256 if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) { 4257 i915_gem_clflush_object(obj, false); 4258 4259 obj->base.read_domains |= I915_GEM_DOMAIN_CPU; 4260 } 4261 4262 /* It should now be out of any other write domains, and we can update 4263 * the domain values for our changes. 4264 */ 4265 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_CPU) != 0); 4266 4267 /* If we're writing through the CPU, then the GPU read domains will 4268 * need to be invalidated at next use. 4269 */ 4270 if (write) { 4271 obj->base.read_domains = I915_GEM_DOMAIN_CPU; 4272 obj->base.write_domain = I915_GEM_DOMAIN_CPU; 4273 } 4274 4275 trace_i915_gem_object_change_domain(obj, 4276 old_read_domains, 4277 old_write_domain); 4278 4279 return 0; 4280 } 4281 4282 /* Throttle our rendering by waiting until the ring has completed our requests 4283 * emitted over 20 msec ago. 4284 * 4285 * Note that if we were to use the current jiffies each time around the loop, 4286 * we wouldn't escape the function with any frames outstanding if the time to 4287 * render a frame was over 20ms. 4288 * 4289 * This should get us reasonable parallelism between CPU and GPU but also 4290 * relatively low latency when blocking on a particular request to finish. 4291 */ 4292 static int 4293 i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file) 4294 { 4295 struct drm_i915_private *dev_priv = dev->dev_private; 4296 struct drm_i915_file_private *file_priv = file->driver_priv; 4297 unsigned long recent_enough = jiffies - DRM_I915_THROTTLE_JIFFIES; 4298 struct drm_i915_gem_request *request, *target = NULL; 4299 int ret; 4300 4301 ret = i915_gem_wait_for_error(&dev_priv->gpu_error); 4302 if (ret) 4303 return ret; 4304 4305 /* ABI: return -EIO if already wedged */ 4306 if (i915_terminally_wedged(&dev_priv->gpu_error)) 4307 return -EIO; 4308 4309 spin_lock(&file_priv->mm.lock); 4310 list_for_each_entry(request, &file_priv->mm.request_list, client_list) { 4311 if (time_after_eq(request->emitted_jiffies, recent_enough)) 4312 break; 4313 4314 /* 4315 * Note that the request might not have been submitted yet. 4316 * In which case emitted_jiffies will be zero. 4317 */ 4318 if (!request->emitted_jiffies) 4319 continue; 4320 4321 target = request; 4322 } 4323 if (target) 4324 i915_gem_request_reference(target); 4325 spin_unlock(&file_priv->mm.lock); 4326 4327 if (target == NULL) 4328 return 0; 4329 4330 ret = __i915_wait_request(target, true, NULL, NULL); 4331 if (ret == 0) 4332 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 0); 4333 4334 i915_gem_request_unreference__unlocked(target); 4335 4336 return ret; 4337 } 4338 4339 static bool 4340 i915_vma_misplaced(struct i915_vma *vma, uint32_t alignment, uint64_t flags) 4341 { 4342 struct drm_i915_gem_object *obj = vma->obj; 4343 4344 if (alignment && 4345 vma->node.start & (alignment - 1)) 4346 return true; 4347 4348 if (flags & PIN_MAPPABLE && !obj->map_and_fenceable) 4349 return true; 4350 4351 if (flags & PIN_OFFSET_BIAS && 4352 vma->node.start < (flags & PIN_OFFSET_MASK)) 4353 return true; 4354 4355 if (flags & PIN_OFFSET_FIXED && 4356 vma->node.start != (flags & PIN_OFFSET_MASK)) 4357 return true; 4358 4359 return false; 4360 } 4361 4362 void __i915_vma_set_map_and_fenceable(struct i915_vma *vma) 4363 { 4364 struct drm_i915_gem_object *obj = vma->obj; 4365 bool mappable, fenceable; 4366 u32 fence_size, fence_alignment; 4367 4368 fence_size = i915_gem_get_gtt_size(obj->base.dev, 4369 obj->base.size, 4370 obj->tiling_mode); 4371 fence_alignment = i915_gem_get_gtt_alignment(obj->base.dev, 4372 obj->base.size, 4373 obj->tiling_mode, 4374 true); 4375 4376 fenceable = (vma->node.size == fence_size && 4377 (vma->node.start & (fence_alignment - 1)) == 0); 4378 4379 mappable = (vma->node.start + fence_size <= 4380 to_i915(obj->base.dev)->ggtt.mappable_end); 4381 4382 obj->map_and_fenceable = mappable && fenceable; 4383 } 4384 4385 static int 4386 i915_gem_object_do_pin(struct drm_i915_gem_object *obj, 4387 struct i915_address_space *vm, 4388 const struct i915_ggtt_view *ggtt_view, 4389 uint32_t alignment, 4390 uint64_t flags) 4391 { 4392 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 4393 struct i915_vma *vma; 4394 unsigned bound; 4395 int ret; 4396 4397 if (WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base)) 4398 return -ENODEV; 4399 4400 if (WARN_ON(flags & (PIN_GLOBAL | PIN_MAPPABLE) && !i915_is_ggtt(vm))) 4401 return -EINVAL; 4402 4403 if (WARN_ON((flags & (PIN_MAPPABLE | PIN_GLOBAL)) == PIN_MAPPABLE)) 4404 return -EINVAL; 4405 4406 if (WARN_ON(i915_is_ggtt(vm) != !!ggtt_view)) 4407 return -EINVAL; 4408 4409 vma = ggtt_view ? i915_gem_obj_to_ggtt_view(obj, ggtt_view) : 4410 i915_gem_obj_to_vma(obj, vm); 4411 4412 if (vma) { 4413 if (WARN_ON(vma->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT)) 4414 return -EBUSY; 4415 4416 if (i915_vma_misplaced(vma, alignment, flags)) { 4417 WARN(vma->pin_count, 4418 "bo is already pinned in %s with incorrect alignment:" 4419 " offset=%08x %08x, req.alignment=%x, req.map_and_fenceable=%d," 4420 " obj->map_and_fenceable=%d\n", 4421 ggtt_view ? "ggtt" : "ppgtt", 4422 upper_32_bits(vma->node.start), 4423 lower_32_bits(vma->node.start), 4424 alignment, 4425 !!(flags & PIN_MAPPABLE), 4426 obj->map_and_fenceable); 4427 ret = i915_vma_unbind(vma); 4428 if (ret) 4429 return ret; 4430 4431 vma = NULL; 4432 } 4433 } 4434 4435 bound = vma ? vma->bound : 0; 4436 if (vma == NULL || !drm_mm_node_allocated(&vma->node)) { 4437 vma = i915_gem_object_bind_to_vm(obj, vm, ggtt_view, alignment, 4438 flags); 4439 if (IS_ERR(vma)) 4440 return PTR_ERR(vma); 4441 } else { 4442 ret = i915_vma_bind(vma, obj->cache_level, flags); 4443 if (ret) 4444 return ret; 4445 } 4446 4447 if (ggtt_view && ggtt_view->type == I915_GGTT_VIEW_NORMAL && 4448 (bound ^ vma->bound) & GLOBAL_BIND) { 4449 __i915_vma_set_map_and_fenceable(vma); 4450 WARN_ON(flags & PIN_MAPPABLE && !obj->map_and_fenceable); 4451 } 4452 4453 vma->pin_count++; 4454 return 0; 4455 } 4456 4457 int 4458 i915_gem_object_pin(struct drm_i915_gem_object *obj, 4459 struct i915_address_space *vm, 4460 uint32_t alignment, 4461 uint64_t flags) 4462 { 4463 return i915_gem_object_do_pin(obj, vm, 4464 i915_is_ggtt(vm) ? &i915_ggtt_view_normal : NULL, 4465 alignment, flags); 4466 } 4467 4468 int 4469 i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, 4470 const struct i915_ggtt_view *view, 4471 uint32_t alignment, 4472 uint64_t flags) 4473 { 4474 struct drm_device *dev = obj->base.dev; 4475 struct drm_i915_private *dev_priv = to_i915(dev); 4476 struct i915_ggtt *ggtt = &dev_priv->ggtt; 4477 4478 BUG_ON(!view); 4479 4480 return i915_gem_object_do_pin(obj, &ggtt->base, view, 4481 alignment, flags | PIN_GLOBAL); 4482 } 4483 4484 void 4485 i915_gem_object_ggtt_unpin_view(struct drm_i915_gem_object *obj, 4486 const struct i915_ggtt_view *view) 4487 { 4488 struct i915_vma *vma = i915_gem_obj_to_ggtt_view(obj, view); 4489 4490 WARN_ON(vma->pin_count == 0); 4491 WARN_ON(!i915_gem_obj_ggtt_bound_view(obj, view)); 4492 4493 --vma->pin_count; 4494 } 4495 4496 int 4497 i915_gem_busy_ioctl(struct drm_device *dev, void *data, 4498 struct drm_file *file) 4499 { 4500 struct drm_i915_gem_busy *args = data; 4501 struct drm_i915_gem_object *obj; 4502 int ret; 4503 4504 ret = i915_mutex_lock_interruptible(dev); 4505 if (ret) 4506 return ret; 4507 4508 obj = to_intel_bo(drm_gem_object_lookup(file, args->handle)); 4509 if (&obj->base == NULL) { 4510 ret = -ENOENT; 4511 goto unlock; 4512 } 4513 4514 /* Count all active objects as busy, even if they are currently not used 4515 * by the gpu. Users of this interface expect objects to eventually 4516 * become non-busy without any further actions, therefore emit any 4517 * necessary flushes here. 4518 */ 4519 ret = i915_gem_object_flush_active(obj); 4520 if (ret) 4521 goto unref; 4522 4523 args->busy = 0; 4524 if (obj->active) { 4525 int i; 4526 4527 for (i = 0; i < I915_NUM_ENGINES; i++) { 4528 struct drm_i915_gem_request *req; 4529 4530 req = obj->last_read_req[i]; 4531 if (req) 4532 args->busy |= 1 << (16 + req->engine->exec_id); 4533 } 4534 if (obj->last_write_req) 4535 args->busy |= obj->last_write_req->engine->exec_id; 4536 } 4537 4538 unref: 4539 drm_gem_object_unreference(&obj->base); 4540 unlock: 4541 mutex_unlock(&dev->struct_mutex); 4542 return ret; 4543 } 4544 4545 int 4546 i915_gem_throttle_ioctl(struct drm_device *dev, void *data, 4547 struct drm_file *file_priv) 4548 { 4549 return i915_gem_ring_throttle(dev, file_priv); 4550 } 4551 4552 int 4553 i915_gem_madvise_ioctl(struct drm_device *dev, void *data, 4554 struct drm_file *file_priv) 4555 { 4556 struct drm_i915_private *dev_priv = dev->dev_private; 4557 struct drm_i915_gem_madvise *args = data; 4558 struct drm_i915_gem_object *obj; 4559 int ret; 4560 4561 switch (args->madv) { 4562 case I915_MADV_DONTNEED: 4563 case I915_MADV_WILLNEED: 4564 break; 4565 default: 4566 return -EINVAL; 4567 } 4568 4569 ret = i915_mutex_lock_interruptible(dev); 4570 if (ret) 4571 return ret; 4572 4573 obj = to_intel_bo(drm_gem_object_lookup(file_priv, args->handle)); 4574 if (&obj->base == NULL) { 4575 ret = -ENOENT; 4576 goto unlock; 4577 } 4578 4579 if (i915_gem_obj_is_pinned(obj)) { 4580 ret = -EINVAL; 4581 goto out; 4582 } 4583 4584 if (obj->pages && 4585 obj->tiling_mode != I915_TILING_NONE && 4586 dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) { 4587 if (obj->madv == I915_MADV_WILLNEED) 4588 i915_gem_object_unpin_pages(obj); 4589 if (args->madv == I915_MADV_WILLNEED) 4590 i915_gem_object_pin_pages(obj); 4591 } 4592 4593 if (obj->madv != __I915_MADV_PURGED) 4594 obj->madv = args->madv; 4595 4596 /* if the object is no longer attached, discard its backing storage */ 4597 if (obj->madv == I915_MADV_DONTNEED && obj->pages == NULL) 4598 i915_gem_object_truncate(obj); 4599 4600 args->retained = obj->madv != __I915_MADV_PURGED; 4601 4602 out: 4603 drm_gem_object_unreference(&obj->base); 4604 unlock: 4605 mutex_unlock(&dev->struct_mutex); 4606 return ret; 4607 } 4608 4609 void i915_gem_object_init(struct drm_i915_gem_object *obj, 4610 const struct drm_i915_gem_object_ops *ops) 4611 { 4612 int i; 4613 4614 INIT_LIST_HEAD(&obj->global_list); 4615 for (i = 0; i < I915_NUM_ENGINES; i++) 4616 INIT_LIST_HEAD(&obj->engine_list[i]); 4617 INIT_LIST_HEAD(&obj->obj_exec_link); 4618 INIT_LIST_HEAD(&obj->vma_list); 4619 INIT_LIST_HEAD(&obj->batch_pool_link); 4620 4621 obj->ops = ops; 4622 4623 obj->fence_reg = I915_FENCE_REG_NONE; 4624 obj->madv = I915_MADV_WILLNEED; 4625 4626 i915_gem_info_add_obj(obj->base.dev->dev_private, obj->base.size); 4627 } 4628 4629 static const struct drm_i915_gem_object_ops i915_gem_object_ops = { 4630 .flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE, 4631 .get_pages = i915_gem_object_get_pages_gtt, 4632 .put_pages = i915_gem_object_put_pages_gtt, 4633 }; 4634 4635 struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev, 4636 size_t size) 4637 { 4638 struct drm_i915_gem_object *obj; 4639 #if 0 4640 struct address_space *mapping; 4641 gfp_t mask; 4642 #endif 4643 4644 obj = i915_gem_object_alloc(dev); 4645 if (obj == NULL) 4646 return NULL; 4647 4648 if (drm_gem_object_init(dev, &obj->base, size) != 0) { 4649 i915_gem_object_free(obj); 4650 return NULL; 4651 } 4652 4653 #if 0 4654 mask = GFP_HIGHUSER | __GFP_RECLAIMABLE; 4655 if (IS_CRESTLINE(dev) || IS_BROADWATER(dev)) { 4656 /* 965gm cannot relocate objects above 4GiB. */ 4657 mask &= ~__GFP_HIGHMEM; 4658 mask |= __GFP_DMA32; 4659 } 4660 4661 mapping = file_inode(obj->base.filp)->i_mapping; 4662 mapping_set_gfp_mask(mapping, mask); 4663 #endif 4664 4665 i915_gem_object_init(obj, &i915_gem_object_ops); 4666 4667 obj->base.write_domain = I915_GEM_DOMAIN_CPU; 4668 obj->base.read_domains = I915_GEM_DOMAIN_CPU; 4669 4670 if (HAS_LLC(dev)) { 4671 /* On some devices, we can have the GPU use the LLC (the CPU 4672 * cache) for about a 10% performance improvement 4673 * compared to uncached. Graphics requests other than 4674 * display scanout are coherent with the CPU in 4675 * accessing this cache. This means in this mode we 4676 * don't need to clflush on the CPU side, and on the 4677 * GPU side we only need to flush internal caches to 4678 * get data visible to the CPU. 4679 * 4680 * However, we maintain the display planes as UC, and so 4681 * need to rebind when first used as such. 4682 */ 4683 obj->cache_level = I915_CACHE_LLC; 4684 } else 4685 obj->cache_level = I915_CACHE_NONE; 4686 4687 trace_i915_gem_object_create(obj); 4688 4689 return obj; 4690 } 4691 4692 static bool discard_backing_storage(struct drm_i915_gem_object *obj) 4693 { 4694 /* If we are the last user of the backing storage (be it shmemfs 4695 * pages or stolen etc), we know that the pages are going to be 4696 * immediately released. In this case, we can then skip copying 4697 * back the contents from the GPU. 4698 */ 4699 4700 if (obj->madv != I915_MADV_WILLNEED) 4701 return false; 4702 4703 if (obj->base.vm_obj == NULL) 4704 return true; 4705 4706 /* At first glance, this looks racy, but then again so would be 4707 * userspace racing mmap against close. However, the first external 4708 * reference to the filp can only be obtained through the 4709 * i915_gem_mmap_ioctl() which safeguards us against the user 4710 * acquiring such a reference whilst we are in the middle of 4711 * freeing the object. 4712 */ 4713 #if 0 4714 return atomic_long_read(&obj->base.filp->f_count) == 1; 4715 #else 4716 return false; 4717 #endif 4718 } 4719 4720 void i915_gem_free_object(struct drm_gem_object *gem_obj) 4721 { 4722 struct drm_i915_gem_object *obj = to_intel_bo(gem_obj); 4723 struct drm_device *dev = obj->base.dev; 4724 struct drm_i915_private *dev_priv = dev->dev_private; 4725 struct i915_vma *vma, *next; 4726 4727 intel_runtime_pm_get(dev_priv); 4728 4729 trace_i915_gem_object_destroy(obj); 4730 4731 list_for_each_entry_safe(vma, next, &obj->vma_list, obj_link) { 4732 int ret; 4733 4734 vma->pin_count = 0; 4735 ret = i915_vma_unbind(vma); 4736 if (WARN_ON(ret == -ERESTARTSYS)) { 4737 bool was_interruptible; 4738 4739 was_interruptible = dev_priv->mm.interruptible; 4740 dev_priv->mm.interruptible = false; 4741 4742 WARN_ON(i915_vma_unbind(vma)); 4743 4744 dev_priv->mm.interruptible = was_interruptible; 4745 } 4746 } 4747 4748 /* Stolen objects don't hold a ref, but do hold pin count. Fix that up 4749 * before progressing. */ 4750 if (obj->stolen) 4751 i915_gem_object_unpin_pages(obj); 4752 4753 WARN_ON(obj->frontbuffer_bits); 4754 4755 if (obj->pages && obj->madv == I915_MADV_WILLNEED && 4756 dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES && 4757 obj->tiling_mode != I915_TILING_NONE) 4758 i915_gem_object_unpin_pages(obj); 4759 4760 if (WARN_ON(obj->pages_pin_count)) 4761 obj->pages_pin_count = 0; 4762 if (discard_backing_storage(obj)) 4763 obj->madv = I915_MADV_DONTNEED; 4764 i915_gem_object_put_pages(obj); 4765 i915_gem_object_free_mmap_offset(obj); 4766 4767 BUG_ON(obj->pages); 4768 4769 #if 0 4770 if (obj->base.import_attach) 4771 drm_prime_gem_destroy(&obj->base, NULL); 4772 #endif 4773 4774 if (obj->ops->release) 4775 obj->ops->release(obj); 4776 4777 drm_gem_object_release(&obj->base); 4778 i915_gem_info_remove_obj(dev_priv, obj->base.size); 4779 4780 kfree(obj->bit_17); 4781 i915_gem_object_free(obj); 4782 4783 intel_runtime_pm_put(dev_priv); 4784 } 4785 4786 struct i915_vma *i915_gem_obj_to_vma(struct drm_i915_gem_object *obj, 4787 struct i915_address_space *vm) 4788 { 4789 struct i915_vma *vma; 4790 list_for_each_entry(vma, &obj->vma_list, obj_link) { 4791 if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL && 4792 vma->vm == vm) 4793 return vma; 4794 } 4795 return NULL; 4796 } 4797 4798 struct i915_vma *i915_gem_obj_to_ggtt_view(struct drm_i915_gem_object *obj, 4799 const struct i915_ggtt_view *view) 4800 { 4801 struct drm_device *dev = obj->base.dev; 4802 struct drm_i915_private *dev_priv = to_i915(dev); 4803 struct i915_ggtt *ggtt = &dev_priv->ggtt; 4804 struct i915_vma *vma; 4805 4806 BUG_ON(!view); 4807 4808 list_for_each_entry(vma, &obj->vma_list, obj_link) 4809 if (vma->vm == &ggtt->base && 4810 i915_ggtt_view_equal(&vma->ggtt_view, view)) 4811 return vma; 4812 return NULL; 4813 } 4814 4815 void i915_gem_vma_destroy(struct i915_vma *vma) 4816 { 4817 WARN_ON(vma->node.allocated); 4818 4819 /* Keep the vma as a placeholder in the execbuffer reservation lists */ 4820 if (!list_empty(&vma->exec_list)) 4821 return; 4822 4823 if (!vma->is_ggtt) 4824 i915_ppgtt_put(i915_vm_to_ppgtt(vma->vm)); 4825 4826 list_del(&vma->obj_link); 4827 4828 kfree(vma); 4829 } 4830 4831 static void 4832 i915_gem_stop_engines(struct drm_device *dev) 4833 { 4834 struct drm_i915_private *dev_priv = dev->dev_private; 4835 struct intel_engine_cs *engine; 4836 4837 for_each_engine(engine, dev_priv) 4838 dev_priv->gt.stop_engine(engine); 4839 } 4840 4841 int 4842 i915_gem_suspend(struct drm_device *dev) 4843 { 4844 struct drm_i915_private *dev_priv = dev->dev_private; 4845 int ret = 0; 4846 4847 mutex_lock(&dev->struct_mutex); 4848 ret = i915_gpu_idle(dev); 4849 if (ret) 4850 goto err; 4851 4852 i915_gem_retire_requests(dev); 4853 4854 i915_gem_stop_engines(dev); 4855 mutex_unlock(&dev->struct_mutex); 4856 4857 cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work); 4858 cancel_delayed_work_sync(&dev_priv->mm.retire_work); 4859 #if 0 4860 flush_delayed_work(&dev_priv->mm.idle_work); 4861 #endif 4862 4863 /* Assert that we sucessfully flushed all the work and 4864 * reset the GPU back to its idle, low power state. 4865 */ 4866 WARN_ON(dev_priv->mm.busy); 4867 4868 return 0; 4869 4870 err: 4871 mutex_unlock(&dev->struct_mutex); 4872 return ret; 4873 } 4874 4875 int i915_gem_l3_remap(struct drm_i915_gem_request *req, int slice) 4876 { 4877 struct intel_engine_cs *engine = req->engine; 4878 struct drm_device *dev = engine->dev; 4879 struct drm_i915_private *dev_priv = dev->dev_private; 4880 u32 *remap_info = dev_priv->l3_parity.remap_info[slice]; 4881 int i, ret; 4882 4883 if (!HAS_L3_DPF(dev) || !remap_info) 4884 return 0; 4885 4886 ret = intel_ring_begin(req, GEN7_L3LOG_SIZE / 4 * 3); 4887 if (ret) 4888 return ret; 4889 4890 /* 4891 * Note: We do not worry about the concurrent register cacheline hang 4892 * here because no other code should access these registers other than 4893 * at initialization time. 4894 */ 4895 for (i = 0; i < GEN7_L3LOG_SIZE / 4; i++) { 4896 intel_ring_emit(engine, MI_LOAD_REGISTER_IMM(1)); 4897 intel_ring_emit_reg(engine, GEN7_L3LOG(slice, i)); 4898 intel_ring_emit(engine, remap_info[i]); 4899 } 4900 4901 intel_ring_advance(engine); 4902 4903 return ret; 4904 } 4905 4906 void i915_gem_init_swizzling(struct drm_device *dev) 4907 { 4908 struct drm_i915_private *dev_priv = dev->dev_private; 4909 4910 if (INTEL_INFO(dev)->gen < 5 || 4911 dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE) 4912 return; 4913 4914 I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) | 4915 DISP_TILE_SURFACE_SWIZZLING); 4916 4917 if (IS_GEN5(dev)) 4918 return; 4919 4920 I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_SWZCTL); 4921 if (IS_GEN6(dev)) 4922 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_SNB)); 4923 else if (IS_GEN7(dev)) 4924 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_IVB)); 4925 else if (IS_GEN8(dev)) 4926 I915_WRITE(GAMTARBMODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_BDW)); 4927 else 4928 BUG(); 4929 } 4930 4931 static void init_unused_ring(struct drm_device *dev, u32 base) 4932 { 4933 struct drm_i915_private *dev_priv = dev->dev_private; 4934 4935 I915_WRITE(RING_CTL(base), 0); 4936 I915_WRITE(RING_HEAD(base), 0); 4937 I915_WRITE(RING_TAIL(base), 0); 4938 I915_WRITE(RING_START(base), 0); 4939 } 4940 4941 static void init_unused_rings(struct drm_device *dev) 4942 { 4943 if (IS_I830(dev)) { 4944 init_unused_ring(dev, PRB1_BASE); 4945 init_unused_ring(dev, SRB0_BASE); 4946 init_unused_ring(dev, SRB1_BASE); 4947 init_unused_ring(dev, SRB2_BASE); 4948 init_unused_ring(dev, SRB3_BASE); 4949 } else if (IS_GEN2(dev)) { 4950 init_unused_ring(dev, SRB0_BASE); 4951 init_unused_ring(dev, SRB1_BASE); 4952 } else if (IS_GEN3(dev)) { 4953 init_unused_ring(dev, PRB1_BASE); 4954 init_unused_ring(dev, PRB2_BASE); 4955 } 4956 } 4957 4958 int i915_gem_init_engines(struct drm_device *dev) 4959 { 4960 struct drm_i915_private *dev_priv = dev->dev_private; 4961 int ret; 4962 4963 ret = intel_init_render_ring_buffer(dev); 4964 if (ret) 4965 return ret; 4966 4967 if (HAS_BSD(dev)) { 4968 ret = intel_init_bsd_ring_buffer(dev); 4969 if (ret) 4970 goto cleanup_render_ring; 4971 } 4972 4973 if (HAS_BLT(dev)) { 4974 ret = intel_init_blt_ring_buffer(dev); 4975 if (ret) 4976 goto cleanup_bsd_ring; 4977 } 4978 4979 if (HAS_VEBOX(dev)) { 4980 ret = intel_init_vebox_ring_buffer(dev); 4981 if (ret) 4982 goto cleanup_blt_ring; 4983 } 4984 4985 if (HAS_BSD2(dev)) { 4986 ret = intel_init_bsd2_ring_buffer(dev); 4987 if (ret) 4988 goto cleanup_vebox_ring; 4989 } 4990 4991 return 0; 4992 4993 cleanup_vebox_ring: 4994 intel_cleanup_engine(&dev_priv->engine[VECS]); 4995 cleanup_blt_ring: 4996 intel_cleanup_engine(&dev_priv->engine[BCS]); 4997 cleanup_bsd_ring: 4998 intel_cleanup_engine(&dev_priv->engine[VCS]); 4999 cleanup_render_ring: 5000 intel_cleanup_engine(&dev_priv->engine[RCS]); 5001 5002 return ret; 5003 } 5004 5005 int 5006 i915_gem_init_hw(struct drm_device *dev) 5007 { 5008 struct drm_i915_private *dev_priv = dev->dev_private; 5009 struct intel_engine_cs *engine; 5010 int ret, j; 5011 5012 /* Double layer security blanket, see i915_gem_init() */ 5013 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 5014 5015 if (HAS_EDRAM(dev) && INTEL_GEN(dev_priv) < 9) 5016 I915_WRITE(HSW_IDICR, I915_READ(HSW_IDICR) | IDIHASHMSK(0xf)); 5017 5018 if (IS_HASWELL(dev)) 5019 I915_WRITE(MI_PREDICATE_RESULT_2, IS_HSW_GT3(dev) ? 5020 LOWER_SLICE_ENABLED : LOWER_SLICE_DISABLED); 5021 5022 if (HAS_PCH_NOP(dev)) { 5023 if (IS_IVYBRIDGE(dev)) { 5024 u32 temp = I915_READ(GEN7_MSG_CTL); 5025 temp &= ~(WAIT_FOR_PCH_FLR_ACK | WAIT_FOR_PCH_RESET_ACK); 5026 I915_WRITE(GEN7_MSG_CTL, temp); 5027 } else if (INTEL_INFO(dev)->gen >= 7) { 5028 u32 temp = I915_READ(HSW_NDE_RSTWRN_OPT); 5029 temp &= ~RESET_PCH_HANDSHAKE_ENABLE; 5030 I915_WRITE(HSW_NDE_RSTWRN_OPT, temp); 5031 } 5032 } 5033 5034 i915_gem_init_swizzling(dev); 5035 5036 /* 5037 * At least 830 can leave some of the unused rings 5038 * "active" (ie. head != tail) after resume which 5039 * will prevent c3 entry. Makes sure all unused rings 5040 * are totally idle. 5041 */ 5042 init_unused_rings(dev); 5043 5044 BUG_ON(!dev_priv->kernel_context); 5045 5046 ret = i915_ppgtt_init_hw(dev); 5047 if (ret) { 5048 DRM_ERROR("PPGTT enable HW failed %d\n", ret); 5049 goto out; 5050 } 5051 5052 /* Need to do basic initialisation of all rings first: */ 5053 for_each_engine(engine, dev_priv) { 5054 ret = engine->init_hw(engine); 5055 if (ret) 5056 goto out; 5057 } 5058 5059 intel_mocs_init_l3cc_table(dev); 5060 5061 /* We can't enable contexts until all firmware is loaded */ 5062 if (HAS_GUC_UCODE(dev)) { 5063 ret = intel_guc_ucode_load(dev); 5064 if (ret) { 5065 DRM_ERROR("Failed to initialize GuC, error %d\n", ret); 5066 ret = -EIO; 5067 goto out; 5068 } 5069 } 5070 5071 /* 5072 * Increment the next seqno by 0x100 so we have a visible break 5073 * on re-initialisation 5074 */ 5075 ret = i915_gem_set_seqno(dev, dev_priv->next_seqno+0x100); 5076 if (ret) 5077 goto out; 5078 5079 /* Now it is safe to go back round and do everything else: */ 5080 for_each_engine(engine, dev_priv) { 5081 struct drm_i915_gem_request *req; 5082 5083 req = i915_gem_request_alloc(engine, NULL); 5084 if (IS_ERR(req)) { 5085 ret = PTR_ERR(req); 5086 break; 5087 } 5088 5089 if (engine->id == RCS) { 5090 for (j = 0; j < NUM_L3_SLICES(dev); j++) { 5091 ret = i915_gem_l3_remap(req, j); 5092 if (ret) 5093 goto err_request; 5094 } 5095 } 5096 5097 ret = i915_ppgtt_init_ring(req); 5098 if (ret) 5099 goto err_request; 5100 5101 ret = i915_gem_context_enable(req); 5102 if (ret) 5103 goto err_request; 5104 5105 err_request: 5106 i915_add_request_no_flush(req); 5107 if (ret) { 5108 DRM_ERROR("Failed to enable %s, error=%d\n", 5109 engine->name, ret); 5110 i915_gem_cleanup_engines(dev); 5111 break; 5112 } 5113 } 5114 5115 out: 5116 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 5117 return ret; 5118 } 5119 5120 int i915_gem_init(struct drm_device *dev) 5121 { 5122 struct drm_i915_private *dev_priv = dev->dev_private; 5123 int ret; 5124 5125 i915.enable_execlists = intel_sanitize_enable_execlists(dev, 5126 i915.enable_execlists); 5127 5128 mutex_lock(&dev->struct_mutex); 5129 5130 if (!i915.enable_execlists) { 5131 dev_priv->gt.execbuf_submit = i915_gem_ringbuffer_submission; 5132 dev_priv->gt.init_engines = i915_gem_init_engines; 5133 dev_priv->gt.cleanup_engine = intel_cleanup_engine; 5134 dev_priv->gt.stop_engine = intel_stop_engine; 5135 } else { 5136 dev_priv->gt.execbuf_submit = intel_execlists_submission; 5137 dev_priv->gt.init_engines = intel_logical_rings_init; 5138 dev_priv->gt.cleanup_engine = intel_logical_ring_cleanup; 5139 dev_priv->gt.stop_engine = intel_logical_ring_stop; 5140 } 5141 5142 /* This is just a security blanket to placate dragons. 5143 * On some systems, we very sporadically observe that the first TLBs 5144 * used by the CS may be stale, despite us poking the TLB reset. If 5145 * we hold the forcewake during initialisation these problems 5146 * just magically go away. 5147 */ 5148 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 5149 5150 ret = i915_gem_init_userptr(dev); 5151 if (ret) 5152 goto out_unlock; 5153 5154 i915_gem_init_ggtt(dev); 5155 5156 ret = i915_gem_context_init(dev); 5157 if (ret) 5158 goto out_unlock; 5159 5160 ret = dev_priv->gt.init_engines(dev); 5161 if (ret) 5162 goto out_unlock; 5163 5164 ret = i915_gem_init_hw(dev); 5165 if (ret == -EIO) { 5166 /* Allow ring initialisation to fail by marking the GPU as 5167 * wedged. But we only want to do this where the GPU is angry, 5168 * for all other failure, such as an allocation failure, bail. 5169 */ 5170 DRM_ERROR("Failed to initialize GPU, declaring it wedged\n"); 5171 atomic_or(I915_WEDGED, &dev_priv->gpu_error.reset_counter); 5172 ret = 0; 5173 } 5174 5175 out_unlock: 5176 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 5177 mutex_unlock(&dev->struct_mutex); 5178 5179 return ret; 5180 } 5181 5182 void 5183 i915_gem_cleanup_engines(struct drm_device *dev) 5184 { 5185 struct drm_i915_private *dev_priv = dev->dev_private; 5186 struct intel_engine_cs *engine; 5187 5188 for_each_engine(engine, dev_priv) 5189 dev_priv->gt.cleanup_engine(engine); 5190 5191 if (i915.enable_execlists) 5192 /* 5193 * Neither the BIOS, ourselves or any other kernel 5194 * expects the system to be in execlists mode on startup, 5195 * so we need to reset the GPU back to legacy mode. 5196 */ 5197 intel_gpu_reset(dev, ALL_ENGINES); 5198 } 5199 5200 static void 5201 init_engine_lists(struct intel_engine_cs *engine) 5202 { 5203 INIT_LIST_HEAD(&engine->active_list); 5204 INIT_LIST_HEAD(&engine->request_list); 5205 } 5206 5207 void 5208 i915_gem_load_init_fences(struct drm_i915_private *dev_priv) 5209 { 5210 struct drm_device *dev = dev_priv->dev; 5211 5212 if (INTEL_INFO(dev_priv)->gen >= 7 && !IS_VALLEYVIEW(dev_priv) && 5213 !IS_CHERRYVIEW(dev_priv)) 5214 dev_priv->num_fence_regs = 32; 5215 else if (INTEL_INFO(dev_priv)->gen >= 4 || IS_I945G(dev_priv) || 5216 IS_I945GM(dev_priv) || IS_G33(dev_priv)) 5217 dev_priv->num_fence_regs = 16; 5218 else 5219 dev_priv->num_fence_regs = 8; 5220 5221 if (intel_vgpu_active(dev)) 5222 dev_priv->num_fence_regs = 5223 I915_READ(vgtif_reg(avail_rs.fence_num)); 5224 5225 /* Initialize fence registers to zero */ 5226 i915_gem_restore_fences(dev); 5227 5228 i915_gem_detect_bit_6_swizzle(dev); 5229 } 5230 5231 void 5232 i915_gem_load_init(struct drm_device *dev) 5233 { 5234 struct drm_i915_private *dev_priv = dev->dev_private; 5235 int i; 5236 5237 INIT_LIST_HEAD(&dev_priv->vm_list); 5238 INIT_LIST_HEAD(&dev_priv->context_list); 5239 INIT_LIST_HEAD(&dev_priv->mm.unbound_list); 5240 INIT_LIST_HEAD(&dev_priv->mm.bound_list); 5241 INIT_LIST_HEAD(&dev_priv->mm.fence_list); 5242 for (i = 0; i < I915_NUM_ENGINES; i++) 5243 init_engine_lists(&dev_priv->engine[i]); 5244 for (i = 0; i < I915_MAX_NUM_FENCES; i++) 5245 INIT_LIST_HEAD(&dev_priv->fence_regs[i].lru_list); 5246 INIT_DELAYED_WORK(&dev_priv->mm.retire_work, 5247 i915_gem_retire_work_handler); 5248 INIT_DELAYED_WORK(&dev_priv->mm.idle_work, 5249 i915_gem_idle_work_handler); 5250 init_waitqueue_head(&dev_priv->gpu_error.reset_queue); 5251 5252 dev_priv->relative_constants_mode = I915_EXEC_CONSTANTS_REL_GENERAL; 5253 5254 /* 5255 * Set initial sequence number for requests. 5256 * Using this number allows the wraparound to happen early, 5257 * catching any obvious problems. 5258 */ 5259 dev_priv->next_seqno = ((u32)~0 - 0x1100); 5260 dev_priv->last_seqno = ((u32)~0 - 0x1101); 5261 5262 INIT_LIST_HEAD(&dev_priv->mm.fence_list); 5263 5264 init_waitqueue_head(&dev_priv->pending_flip_queue); 5265 5266 dev_priv->mm.interruptible = true; 5267 5268 lockinit(&dev_priv->fb_tracking.lock, "drmftl", 0, LK_CANRECURSE); 5269 } 5270 5271 void i915_gem_load_cleanup(struct drm_device *dev) 5272 { 5273 #if 0 5274 struct drm_i915_private *dev_priv = to_i915(dev); 5275 5276 kmem_cache_destroy(dev_priv->requests); 5277 kmem_cache_destroy(dev_priv->vmas); 5278 kmem_cache_destroy(dev_priv->objects); 5279 #endif 5280 } 5281 5282 void i915_gem_release(struct drm_device *dev, struct drm_file *file) 5283 { 5284 struct drm_i915_file_private *file_priv = file->driver_priv; 5285 5286 /* Clean up our request list when the client is going away, so that 5287 * later retire_requests won't dereference our soon-to-be-gone 5288 * file_priv. 5289 */ 5290 spin_lock(&file_priv->mm.lock); 5291 while (!list_empty(&file_priv->mm.request_list)) { 5292 struct drm_i915_gem_request *request; 5293 5294 request = list_first_entry(&file_priv->mm.request_list, 5295 struct drm_i915_gem_request, 5296 client_list); 5297 list_del(&request->client_list); 5298 request->file_priv = NULL; 5299 } 5300 spin_unlock(&file_priv->mm.lock); 5301 5302 if (!list_empty(&file_priv->rps.link)) { 5303 lockmgr(&to_i915(dev)->rps.client_lock, LK_EXCLUSIVE); 5304 list_del(&file_priv->rps.link); 5305 lockmgr(&to_i915(dev)->rps.client_lock, LK_RELEASE); 5306 } 5307 } 5308 5309 int 5310 i915_gem_pager_ctor(void *handle, vm_ooffset_t size, vm_prot_t prot, 5311 vm_ooffset_t foff, struct ucred *cred, u_short *color) 5312 { 5313 *color = 0; /* XXXKIB */ 5314 return (0); 5315 } 5316 5317 void 5318 i915_gem_pager_dtor(void *handle) 5319 { 5320 struct drm_gem_object *obj; 5321 struct drm_device *dev; 5322 5323 obj = handle; 5324 dev = obj->dev; 5325 5326 mutex_lock(&dev->struct_mutex); 5327 drm_gem_free_mmap_offset(obj); 5328 i915_gem_release_mmap(to_intel_bo(obj)); 5329 drm_gem_object_unreference(obj); 5330 mutex_unlock(&dev->struct_mutex); 5331 } 5332 5333 int i915_gem_open(struct drm_device *dev, struct drm_file *file) 5334 { 5335 struct drm_i915_file_private *file_priv; 5336 int ret; 5337 5338 DRM_DEBUG_DRIVER("\n"); 5339 5340 file_priv = kzalloc(sizeof(*file_priv), GFP_KERNEL); 5341 if (!file_priv) 5342 return -ENOMEM; 5343 5344 file->driver_priv = file_priv; 5345 file_priv->dev_priv = dev->dev_private; 5346 file_priv->file = file; 5347 INIT_LIST_HEAD(&file_priv->rps.link); 5348 5349 spin_init(&file_priv->mm.lock, "i915_priv"); 5350 INIT_LIST_HEAD(&file_priv->mm.request_list); 5351 5352 file_priv->bsd_ring = -1; 5353 5354 ret = i915_gem_context_open(dev, file); 5355 if (ret) 5356 kfree(file_priv); 5357 5358 return ret; 5359 } 5360 5361 /** 5362 * i915_gem_track_fb - update frontbuffer tracking 5363 * @old: current GEM buffer for the frontbuffer slots 5364 * @new: new GEM buffer for the frontbuffer slots 5365 * @frontbuffer_bits: bitmask of frontbuffer slots 5366 * 5367 * This updates the frontbuffer tracking bits @frontbuffer_bits by clearing them 5368 * from @old and setting them in @new. Both @old and @new can be NULL. 5369 */ 5370 void i915_gem_track_fb(struct drm_i915_gem_object *old, 5371 struct drm_i915_gem_object *new, 5372 unsigned frontbuffer_bits) 5373 { 5374 if (old) { 5375 WARN_ON(!mutex_is_locked(&old->base.dev->struct_mutex)); 5376 WARN_ON(!(old->frontbuffer_bits & frontbuffer_bits)); 5377 old->frontbuffer_bits &= ~frontbuffer_bits; 5378 } 5379 5380 if (new) { 5381 WARN_ON(!mutex_is_locked(&new->base.dev->struct_mutex)); 5382 WARN_ON(new->frontbuffer_bits & frontbuffer_bits); 5383 new->frontbuffer_bits |= frontbuffer_bits; 5384 } 5385 } 5386 5387 /* All the new VM stuff */ 5388 u64 i915_gem_obj_offset(struct drm_i915_gem_object *o, 5389 struct i915_address_space *vm) 5390 { 5391 struct drm_i915_private *dev_priv = o->base.dev->dev_private; 5392 struct i915_vma *vma; 5393 5394 WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base); 5395 5396 list_for_each_entry(vma, &o->vma_list, obj_link) { 5397 if (vma->is_ggtt && 5398 vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL) 5399 continue; 5400 if (vma->vm == vm) 5401 return vma->node.start; 5402 } 5403 5404 WARN(1, "%s vma for this object not found.\n", 5405 i915_is_ggtt(vm) ? "global" : "ppgtt"); 5406 return -1; 5407 } 5408 5409 u64 i915_gem_obj_ggtt_offset_view(struct drm_i915_gem_object *o, 5410 const struct i915_ggtt_view *view) 5411 { 5412 struct drm_i915_private *dev_priv = to_i915(o->base.dev); 5413 struct i915_ggtt *ggtt = &dev_priv->ggtt; 5414 struct i915_vma *vma; 5415 5416 list_for_each_entry(vma, &o->vma_list, obj_link) 5417 if (vma->vm == &ggtt->base && 5418 i915_ggtt_view_equal(&vma->ggtt_view, view)) 5419 return vma->node.start; 5420 5421 WARN(1, "global vma for this object not found. (view=%u)\n", view->type); 5422 return -1; 5423 } 5424 5425 bool i915_gem_obj_bound(struct drm_i915_gem_object *o, 5426 struct i915_address_space *vm) 5427 { 5428 struct i915_vma *vma; 5429 5430 list_for_each_entry(vma, &o->vma_list, obj_link) { 5431 if (vma->is_ggtt && 5432 vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL) 5433 continue; 5434 if (vma->vm == vm && drm_mm_node_allocated(&vma->node)) 5435 return true; 5436 } 5437 5438 return false; 5439 } 5440 5441 bool i915_gem_obj_ggtt_bound_view(struct drm_i915_gem_object *o, 5442 const struct i915_ggtt_view *view) 5443 { 5444 struct drm_i915_private *dev_priv = to_i915(o->base.dev); 5445 struct i915_ggtt *ggtt = &dev_priv->ggtt; 5446 struct i915_vma *vma; 5447 5448 list_for_each_entry(vma, &o->vma_list, obj_link) 5449 if (vma->vm == &ggtt->base && 5450 i915_ggtt_view_equal(&vma->ggtt_view, view) && 5451 drm_mm_node_allocated(&vma->node)) 5452 return true; 5453 5454 return false; 5455 } 5456 5457 bool i915_gem_obj_bound_any(struct drm_i915_gem_object *o) 5458 { 5459 struct i915_vma *vma; 5460 5461 list_for_each_entry(vma, &o->vma_list, obj_link) 5462 if (drm_mm_node_allocated(&vma->node)) 5463 return true; 5464 5465 return false; 5466 } 5467 5468 unsigned long i915_gem_obj_size(struct drm_i915_gem_object *o, 5469 struct i915_address_space *vm) 5470 { 5471 struct drm_i915_private *dev_priv = o->base.dev->dev_private; 5472 struct i915_vma *vma; 5473 5474 WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base); 5475 5476 BUG_ON(list_empty(&o->vma_list)); 5477 5478 list_for_each_entry(vma, &o->vma_list, obj_link) { 5479 if (vma->is_ggtt && 5480 vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL) 5481 continue; 5482 if (vma->vm == vm) 5483 return vma->node.size; 5484 } 5485 return 0; 5486 } 5487 5488 bool i915_gem_obj_is_pinned(struct drm_i915_gem_object *obj) 5489 { 5490 struct i915_vma *vma; 5491 list_for_each_entry(vma, &obj->vma_list, obj_link) 5492 if (vma->pin_count > 0) 5493 return true; 5494 5495 return false; 5496 } 5497 5498 /* Like i915_gem_object_get_page(), but mark the returned page dirty */ 5499 struct vm_page * 5500 i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj, int n) 5501 { 5502 struct vm_page *page; 5503 5504 /* Only default objects have per-page dirty tracking */ 5505 if (WARN_ON((obj->ops->flags & I915_GEM_OBJECT_HAS_STRUCT_PAGE) == 0)) 5506 return NULL; 5507 5508 page = i915_gem_object_get_page(obj, n); 5509 set_page_dirty(page); 5510 return page; 5511 } 5512 5513 /* Allocate a new GEM object and fill it with the supplied data */ 5514 struct drm_i915_gem_object * 5515 i915_gem_object_create_from_data(struct drm_device *dev, 5516 const void *data, size_t size) 5517 { 5518 struct drm_i915_gem_object *obj; 5519 struct sg_table *sg; 5520 size_t bytes; 5521 int ret; 5522 5523 obj = i915_gem_alloc_object(dev, round_up(size, PAGE_SIZE)); 5524 if (IS_ERR_OR_NULL(obj)) 5525 return obj; 5526 5527 ret = i915_gem_object_set_to_cpu_domain(obj, true); 5528 if (ret) 5529 goto fail; 5530 5531 ret = i915_gem_object_get_pages(obj); 5532 if (ret) 5533 goto fail; 5534 5535 i915_gem_object_pin_pages(obj); 5536 sg = obj->pages; 5537 bytes = sg_copy_from_buffer(sg->sgl, sg->nents, data, size); 5538 obj->dirty = 1; /* Backing store is now out of date */ 5539 i915_gem_object_unpin_pages(obj); 5540 5541 if (WARN_ON(bytes != size)) { 5542 DRM_ERROR("Incomplete copy, wrote %zu of %zu", bytes, size); 5543 ret = -EFAULT; 5544 goto fail; 5545 } 5546 5547 return obj; 5548 5549 fail: 5550 drm_gem_object_unreference(&obj->base); 5551 return ERR_PTR(ret); 5552 } 5553