1 /* 2 * Copyright © 2008-2015 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Eric Anholt <eric@anholt.net> 25 * 26 */ 27 28 #include <drm/drmP.h> 29 #include <drm/drm_vma_manager.h> 30 #include <drm/i915_drm.h> 31 #include "i915_drv.h" 32 #include "i915_vgpu.h" 33 #include "i915_trace.h" 34 #include "intel_drv.h" 35 #include <linux/shmem_fs.h> 36 #include <linux/slab.h> 37 #include <linux/swap.h> 38 #include <linux/pci.h> 39 40 #define RQ_BUG_ON(expr) 41 42 static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj); 43 static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj); 44 static void 45 i915_gem_object_retire__write(struct drm_i915_gem_object *obj); 46 static void 47 i915_gem_object_retire__read(struct drm_i915_gem_object *obj, int ring); 48 49 static bool cpu_cache_is_coherent(struct drm_device *dev, 50 enum i915_cache_level level) 51 { 52 return HAS_LLC(dev) || level != I915_CACHE_NONE; 53 } 54 55 static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj) 56 { 57 if (!cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) 58 return true; 59 60 return obj->pin_display; 61 } 62 63 /* some bookkeeping */ 64 static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv, 65 size_t size) 66 { 67 spin_lock(&dev_priv->mm.object_stat_lock); 68 dev_priv->mm.object_count++; 69 dev_priv->mm.object_memory += size; 70 spin_unlock(&dev_priv->mm.object_stat_lock); 71 } 72 73 static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv, 74 size_t size) 75 { 76 spin_lock(&dev_priv->mm.object_stat_lock); 77 dev_priv->mm.object_count--; 78 dev_priv->mm.object_memory -= size; 79 spin_unlock(&dev_priv->mm.object_stat_lock); 80 } 81 82 static int 83 i915_gem_wait_for_error(struct i915_gpu_error *error) 84 { 85 int ret; 86 87 #define EXIT_COND (!i915_reset_in_progress(error) || \ 88 i915_terminally_wedged(error)) 89 if (EXIT_COND) 90 return 0; 91 92 /* 93 * Only wait 10 seconds for the gpu reset to complete to avoid hanging 94 * userspace. If it takes that long something really bad is going on and 95 * we should simply try to bail out and fail as gracefully as possible. 96 */ 97 ret = wait_event_interruptible_timeout(error->reset_queue, 98 EXIT_COND, 99 10*HZ); 100 if (ret == 0) { 101 DRM_ERROR("Timed out waiting for the gpu reset to complete\n"); 102 return -EIO; 103 } else if (ret < 0) { 104 return ret; 105 } 106 #undef EXIT_COND 107 108 return 0; 109 } 110 111 int i915_mutex_lock_interruptible(struct drm_device *dev) 112 { 113 struct drm_i915_private *dev_priv = dev->dev_private; 114 int ret; 115 116 ret = i915_gem_wait_for_error(&dev_priv->gpu_error); 117 if (ret) 118 return ret; 119 120 ret = mutex_lock_interruptible(&dev->struct_mutex); 121 if (ret) 122 return ret; 123 124 WARN_ON(i915_verify_lists(dev)); 125 return 0; 126 } 127 128 int 129 i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, 130 struct drm_file *file) 131 { 132 struct drm_i915_private *dev_priv = dev->dev_private; 133 struct drm_i915_gem_get_aperture *args = data; 134 struct i915_gtt *ggtt = &dev_priv->gtt; 135 struct i915_vma *vma; 136 size_t pinned; 137 138 pinned = 0; 139 mutex_lock(&dev->struct_mutex); 140 list_for_each_entry(vma, &ggtt->base.active_list, vm_link) 141 if (vma->pin_count) 142 pinned += vma->node.size; 143 list_for_each_entry(vma, &ggtt->base.inactive_list, vm_link) 144 if (vma->pin_count) 145 pinned += vma->node.size; 146 mutex_unlock(&dev->struct_mutex); 147 148 args->aper_size = dev_priv->gtt.base.total; 149 args->aper_available_size = args->aper_size - pinned; 150 151 return 0; 152 } 153 154 #if 0 155 static int 156 i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj) 157 { 158 struct address_space *mapping = file_inode(obj->base.filp)->i_mapping; 159 char *vaddr = obj->phys_handle->vaddr; 160 struct sg_table *st; 161 struct scatterlist *sg; 162 int i; 163 164 if (WARN_ON(i915_gem_object_needs_bit17_swizzle(obj))) 165 return -EINVAL; 166 167 for (i = 0; i < obj->base.size / PAGE_SIZE; i++) { 168 struct page *page; 169 char *src; 170 171 page = shmem_read_mapping_page(mapping, i); 172 if (IS_ERR(page)) 173 return PTR_ERR(page); 174 175 src = kmap_atomic(page); 176 memcpy(vaddr, src, PAGE_SIZE); 177 drm_clflush_virt_range(vaddr, PAGE_SIZE); 178 kunmap_atomic(src); 179 180 put_page(page); 181 vaddr += PAGE_SIZE; 182 } 183 184 i915_gem_chipset_flush(obj->base.dev); 185 186 st = kmalloc(sizeof(*st), GFP_KERNEL); 187 if (st == NULL) 188 return -ENOMEM; 189 190 if (sg_alloc_table(st, 1, GFP_KERNEL)) { 191 kfree(st); 192 return -ENOMEM; 193 } 194 195 sg = st->sgl; 196 sg->offset = 0; 197 sg->length = obj->base.size; 198 199 sg_dma_address(sg) = obj->phys_handle->busaddr; 200 sg_dma_len(sg) = obj->base.size; 201 202 obj->pages = st; 203 return 0; 204 } 205 206 static void 207 i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj) 208 { 209 int ret; 210 211 BUG_ON(obj->madv == __I915_MADV_PURGED); 212 213 ret = i915_gem_object_set_to_cpu_domain(obj, true); 214 if (ret) { 215 /* In the event of a disaster, abandon all caches and 216 * hope for the best. 217 */ 218 WARN_ON(ret != -EIO); 219 obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU; 220 } 221 222 if (obj->madv == I915_MADV_DONTNEED) 223 obj->dirty = 0; 224 225 if (obj->dirty) { 226 struct address_space *mapping = file_inode(obj->base.filp)->i_mapping; 227 char *vaddr = obj->phys_handle->vaddr; 228 int i; 229 230 for (i = 0; i < obj->base.size / PAGE_SIZE; i++) { 231 struct page *page; 232 char *dst; 233 234 page = shmem_read_mapping_page(mapping, i); 235 if (IS_ERR(page)) 236 continue; 237 238 dst = kmap_atomic(page); 239 drm_clflush_virt_range(vaddr, PAGE_SIZE); 240 memcpy(dst, vaddr, PAGE_SIZE); 241 kunmap_atomic(dst); 242 243 set_page_dirty(page); 244 if (obj->madv == I915_MADV_WILLNEED) 245 mark_page_accessed(page); 246 put_page(page); 247 vaddr += PAGE_SIZE; 248 } 249 obj->dirty = 0; 250 } 251 252 sg_free_table(obj->pages); 253 kfree(obj->pages); 254 } 255 256 static void 257 i915_gem_object_release_phys(struct drm_i915_gem_object *obj) 258 { 259 drm_pci_free(obj->base.dev, obj->phys_handle); 260 } 261 262 static const struct drm_i915_gem_object_ops i915_gem_phys_ops = { 263 .get_pages = i915_gem_object_get_pages_phys, 264 .put_pages = i915_gem_object_put_pages_phys, 265 .release = i915_gem_object_release_phys, 266 }; 267 #endif 268 269 static int 270 drop_pages(struct drm_i915_gem_object *obj) 271 { 272 struct i915_vma *vma, *next; 273 int ret; 274 275 drm_gem_object_reference(&obj->base); 276 list_for_each_entry_safe(vma, next, &obj->vma_list, obj_link) 277 if (i915_vma_unbind(vma)) 278 break; 279 280 ret = i915_gem_object_put_pages(obj); 281 drm_gem_object_unreference(&obj->base); 282 283 return ret; 284 } 285 286 int 287 i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, 288 int align) 289 { 290 drm_dma_handle_t *phys; 291 int ret; 292 293 if (obj->phys_handle) { 294 if ((unsigned long)obj->phys_handle->vaddr & (align -1)) 295 return -EBUSY; 296 297 return 0; 298 } 299 300 if (obj->madv != I915_MADV_WILLNEED) 301 return -EFAULT; 302 303 #if 0 304 if (obj->base.filp == NULL) 305 return -EINVAL; 306 #endif 307 308 ret = drop_pages(obj); 309 if (ret) 310 return ret; 311 312 /* create a new object */ 313 phys = drm_pci_alloc(obj->base.dev, obj->base.size, align); 314 if (!phys) 315 return -ENOMEM; 316 317 obj->phys_handle = phys; 318 #if 0 319 obj->ops = &i915_gem_phys_ops; 320 #endif 321 322 return i915_gem_object_get_pages(obj); 323 } 324 325 static int 326 i915_gem_phys_pwrite(struct drm_i915_gem_object *obj, 327 struct drm_i915_gem_pwrite *args, 328 struct drm_file *file_priv) 329 { 330 struct drm_device *dev = obj->base.dev; 331 void *vaddr = (char *)obj->phys_handle->vaddr + args->offset; 332 char __user *user_data = to_user_ptr(args->data_ptr); 333 int ret = 0; 334 335 /* We manually control the domain here and pretend that it 336 * remains coherent i.e. in the GTT domain, like shmem_pwrite. 337 */ 338 ret = i915_gem_object_wait_rendering(obj, false); 339 if (ret) 340 return ret; 341 342 intel_fb_obj_invalidate(obj, ORIGIN_CPU); 343 if (__copy_from_user_inatomic_nocache(vaddr, user_data, args->size)) { 344 unsigned long unwritten; 345 346 /* The physical object once assigned is fixed for the lifetime 347 * of the obj, so we can safely drop the lock and continue 348 * to access vaddr. 349 */ 350 mutex_unlock(&dev->struct_mutex); 351 unwritten = copy_from_user(vaddr, user_data, args->size); 352 mutex_lock(&dev->struct_mutex); 353 if (unwritten) { 354 ret = -EFAULT; 355 goto out; 356 } 357 } 358 359 drm_clflush_virt_range(vaddr, args->size); 360 i915_gem_chipset_flush(dev); 361 362 out: 363 intel_fb_obj_flush(obj, false, ORIGIN_CPU); 364 return ret; 365 } 366 367 void *i915_gem_object_alloc(struct drm_device *dev) 368 { 369 return kmalloc(sizeof(struct drm_i915_gem_object), 370 M_DRM, M_WAITOK | M_ZERO); 371 } 372 373 void i915_gem_object_free(struct drm_i915_gem_object *obj) 374 { 375 kfree(obj); 376 } 377 378 static int 379 i915_gem_create(struct drm_file *file, 380 struct drm_device *dev, 381 uint64_t size, 382 uint32_t *handle_p) 383 { 384 struct drm_i915_gem_object *obj; 385 int ret; 386 u32 handle; 387 388 size = roundup(size, PAGE_SIZE); 389 if (size == 0) 390 return -EINVAL; 391 392 /* Allocate the new object */ 393 obj = i915_gem_alloc_object(dev, size); 394 if (obj == NULL) 395 return -ENOMEM; 396 397 ret = drm_gem_handle_create(file, &obj->base, &handle); 398 /* drop reference from allocate - handle holds it now */ 399 drm_gem_object_unreference_unlocked(&obj->base); 400 if (ret) 401 return ret; 402 403 *handle_p = handle; 404 return 0; 405 } 406 407 int 408 i915_gem_dumb_create(struct drm_file *file, 409 struct drm_device *dev, 410 struct drm_mode_create_dumb *args) 411 { 412 /* have to work out size/pitch and return them */ 413 args->pitch = ALIGN(args->width * DIV_ROUND_UP(args->bpp, 8), 64); 414 args->size = args->pitch * args->height; 415 return i915_gem_create(file, dev, 416 args->size, &args->handle); 417 } 418 419 /** 420 * Creates a new mm object and returns a handle to it. 421 */ 422 int 423 i915_gem_create_ioctl(struct drm_device *dev, void *data, 424 struct drm_file *file) 425 { 426 struct drm_i915_gem_create *args = data; 427 428 return i915_gem_create(file, dev, 429 args->size, &args->handle); 430 } 431 432 static inline int 433 __copy_to_user_swizzled(char __user *cpu_vaddr, 434 const char *gpu_vaddr, int gpu_offset, 435 int length) 436 { 437 int ret, cpu_offset = 0; 438 439 while (length > 0) { 440 int cacheline_end = ALIGN(gpu_offset + 1, 64); 441 int this_length = min(cacheline_end - gpu_offset, length); 442 int swizzled_gpu_offset = gpu_offset ^ 64; 443 444 ret = __copy_to_user(cpu_vaddr + cpu_offset, 445 gpu_vaddr + swizzled_gpu_offset, 446 this_length); 447 if (ret) 448 return ret + length; 449 450 cpu_offset += this_length; 451 gpu_offset += this_length; 452 length -= this_length; 453 } 454 455 return 0; 456 } 457 458 static inline int 459 __copy_from_user_swizzled(char *gpu_vaddr, int gpu_offset, 460 const char __user *cpu_vaddr, 461 int length) 462 { 463 int ret, cpu_offset = 0; 464 465 while (length > 0) { 466 int cacheline_end = ALIGN(gpu_offset + 1, 64); 467 int this_length = min(cacheline_end - gpu_offset, length); 468 int swizzled_gpu_offset = gpu_offset ^ 64; 469 470 ret = __copy_from_user(gpu_vaddr + swizzled_gpu_offset, 471 cpu_vaddr + cpu_offset, 472 this_length); 473 if (ret) 474 return ret + length; 475 476 cpu_offset += this_length; 477 gpu_offset += this_length; 478 length -= this_length; 479 } 480 481 return 0; 482 } 483 484 /* 485 * Pins the specified object's pages and synchronizes the object with 486 * GPU accesses. Sets needs_clflush to non-zero if the caller should 487 * flush the object from the CPU cache. 488 */ 489 int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj, 490 int *needs_clflush) 491 { 492 int ret; 493 494 *needs_clflush = 0; 495 496 #if 0 497 if (WARN_ON((obj->ops->flags & I915_GEM_OBJECT_HAS_STRUCT_PAGE) == 0)) 498 return -EINVAL; 499 #endif 500 501 if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)) { 502 /* If we're not in the cpu read domain, set ourself into the gtt 503 * read domain and manually flush cachelines (if required). This 504 * optimizes for the case when the gpu will dirty the data 505 * anyway again before the next pread happens. */ 506 *needs_clflush = !cpu_cache_is_coherent(obj->base.dev, 507 obj->cache_level); 508 ret = i915_gem_object_wait_rendering(obj, true); 509 if (ret) 510 return ret; 511 } 512 513 ret = i915_gem_object_get_pages(obj); 514 if (ret) 515 return ret; 516 517 i915_gem_object_pin_pages(obj); 518 519 return ret; 520 } 521 522 /* Per-page copy function for the shmem pread fastpath. 523 * Flushes invalid cachelines before reading the target if 524 * needs_clflush is set. */ 525 static int 526 shmem_pread_fast(struct vm_page *page, int shmem_page_offset, int page_length, 527 char __user *user_data, 528 bool page_do_bit17_swizzling, bool needs_clflush) 529 { 530 char *vaddr; 531 int ret; 532 533 if (unlikely(page_do_bit17_swizzling)) 534 return -EINVAL; 535 536 vaddr = kmap_atomic(page); 537 if (needs_clflush) 538 drm_clflush_virt_range(vaddr + shmem_page_offset, 539 page_length); 540 ret = __copy_to_user_inatomic(user_data, 541 vaddr + shmem_page_offset, 542 page_length); 543 kunmap_atomic(vaddr); 544 545 return ret ? -EFAULT : 0; 546 } 547 548 static void 549 shmem_clflush_swizzled_range(char *addr, unsigned long length, 550 bool swizzled) 551 { 552 if (unlikely(swizzled)) { 553 unsigned long start = (unsigned long) addr; 554 unsigned long end = (unsigned long) addr + length; 555 556 /* For swizzling simply ensure that we always flush both 557 * channels. Lame, but simple and it works. Swizzled 558 * pwrite/pread is far from a hotpath - current userspace 559 * doesn't use it at all. */ 560 start = round_down(start, 128); 561 end = round_up(end, 128); 562 563 drm_clflush_virt_range((void *)start, end - start); 564 } else { 565 drm_clflush_virt_range(addr, length); 566 } 567 568 } 569 570 /* Only difference to the fast-path function is that this can handle bit17 571 * and uses non-atomic copy and kmap functions. */ 572 static int 573 shmem_pread_slow(struct vm_page *page, int shmem_page_offset, int page_length, 574 char __user *user_data, 575 bool page_do_bit17_swizzling, bool needs_clflush) 576 { 577 char *vaddr; 578 int ret; 579 580 vaddr = kmap(page); 581 if (needs_clflush) 582 shmem_clflush_swizzled_range(vaddr + shmem_page_offset, 583 page_length, 584 page_do_bit17_swizzling); 585 586 if (page_do_bit17_swizzling) 587 ret = __copy_to_user_swizzled(user_data, 588 vaddr, shmem_page_offset, 589 page_length); 590 else 591 ret = __copy_to_user(user_data, 592 vaddr + shmem_page_offset, 593 page_length); 594 kunmap(page); 595 596 return ret ? - EFAULT : 0; 597 } 598 599 static int 600 i915_gem_shmem_pread(struct drm_device *dev, 601 struct drm_i915_gem_object *obj, 602 struct drm_i915_gem_pread *args, 603 struct drm_file *file) 604 { 605 char __user *user_data; 606 ssize_t remain; 607 loff_t offset; 608 int shmem_page_offset, page_length, ret = 0; 609 int obj_do_bit17_swizzling, page_do_bit17_swizzling; 610 int prefaulted = 0; 611 int needs_clflush = 0; 612 struct sg_page_iter sg_iter; 613 614 user_data = to_user_ptr(args->data_ptr); 615 remain = args->size; 616 617 obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj); 618 619 ret = i915_gem_obj_prepare_shmem_read(obj, &needs_clflush); 620 if (ret) 621 return ret; 622 623 offset = args->offset; 624 625 for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 626 offset >> PAGE_SHIFT) { 627 struct vm_page *page = sg_page_iter_page(&sg_iter); 628 629 if (remain <= 0) 630 break; 631 632 /* Operation in this page 633 * 634 * shmem_page_offset = offset within page in shmem file 635 * page_length = bytes to copy for this page 636 */ 637 shmem_page_offset = offset_in_page(offset); 638 page_length = remain; 639 if ((shmem_page_offset + page_length) > PAGE_SIZE) 640 page_length = PAGE_SIZE - shmem_page_offset; 641 642 page_do_bit17_swizzling = obj_do_bit17_swizzling && 643 (page_to_phys(page) & (1 << 17)) != 0; 644 645 ret = shmem_pread_fast(page, shmem_page_offset, page_length, 646 user_data, page_do_bit17_swizzling, 647 needs_clflush); 648 if (ret == 0) 649 goto next_page; 650 651 mutex_unlock(&dev->struct_mutex); 652 653 if (likely(!i915.prefault_disable) && !prefaulted) { 654 ret = fault_in_multipages_writeable(user_data, remain); 655 /* Userspace is tricking us, but we've already clobbered 656 * its pages with the prefault and promised to write the 657 * data up to the first fault. Hence ignore any errors 658 * and just continue. */ 659 (void)ret; 660 prefaulted = 1; 661 } 662 663 ret = shmem_pread_slow(page, shmem_page_offset, page_length, 664 user_data, page_do_bit17_swizzling, 665 needs_clflush); 666 667 mutex_lock(&dev->struct_mutex); 668 669 if (ret) 670 goto out; 671 672 next_page: 673 remain -= page_length; 674 user_data += page_length; 675 offset += page_length; 676 } 677 678 out: 679 i915_gem_object_unpin_pages(obj); 680 681 return ret; 682 } 683 684 /** 685 * Reads data from the object referenced by handle. 686 * 687 * On error, the contents of *data are undefined. 688 */ 689 int 690 i915_gem_pread_ioctl(struct drm_device *dev, void *data, 691 struct drm_file *file) 692 { 693 struct drm_i915_gem_pread *args = data; 694 struct drm_i915_gem_object *obj; 695 int ret = 0; 696 697 if (args->size == 0) 698 return 0; 699 700 ret = i915_mutex_lock_interruptible(dev); 701 if (ret) 702 return ret; 703 704 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 705 if (&obj->base == NULL) { 706 ret = -ENOENT; 707 goto unlock; 708 } 709 710 /* Bounds check source. */ 711 if (args->offset > obj->base.size || 712 args->size > obj->base.size - args->offset) { 713 ret = -EINVAL; 714 goto out; 715 } 716 717 /* prime objects have no backing filp to GEM pread/pwrite 718 * pages from. 719 */ 720 721 trace_i915_gem_object_pread(obj, args->offset, args->size); 722 723 ret = i915_gem_shmem_pread(dev, obj, args, file); 724 725 out: 726 drm_gem_object_unreference(&obj->base); 727 unlock: 728 mutex_unlock(&dev->struct_mutex); 729 return ret; 730 } 731 732 /* This is the fast write path which cannot handle 733 * page faults in the source data 734 */ 735 736 static inline int 737 fast_user_write(struct io_mapping *mapping, 738 loff_t page_base, int page_offset, 739 char __user *user_data, 740 int length) 741 { 742 void __iomem *vaddr_atomic; 743 void *vaddr; 744 unsigned long unwritten; 745 746 vaddr_atomic = io_mapping_map_atomic_wc(mapping, page_base); 747 /* We can use the cpu mem copy function because this is X86. */ 748 vaddr = (char __force*)vaddr_atomic + page_offset; 749 unwritten = __copy_from_user_inatomic_nocache(vaddr, 750 user_data, length); 751 io_mapping_unmap_atomic(vaddr_atomic); 752 return unwritten; 753 } 754 755 /** 756 * This is the fast pwrite path, where we copy the data directly from the 757 * user into the GTT, uncached. 758 */ 759 static int 760 i915_gem_gtt_pwrite_fast(struct drm_device *dev, 761 struct drm_i915_gem_object *obj, 762 struct drm_i915_gem_pwrite *args, 763 struct drm_file *file) 764 { 765 struct drm_i915_private *dev_priv = dev->dev_private; 766 ssize_t remain; 767 loff_t offset, page_base; 768 char __user *user_data; 769 int page_offset, page_length, ret; 770 771 ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE | PIN_NONBLOCK); 772 if (ret) 773 goto out; 774 775 ret = i915_gem_object_set_to_gtt_domain(obj, true); 776 if (ret) 777 goto out_unpin; 778 779 ret = i915_gem_object_put_fence(obj); 780 if (ret) 781 goto out_unpin; 782 783 user_data = to_user_ptr(args->data_ptr); 784 remain = args->size; 785 786 offset = i915_gem_obj_ggtt_offset(obj) + args->offset; 787 788 intel_fb_obj_invalidate(obj, ORIGIN_GTT); 789 790 while (remain > 0) { 791 /* Operation in this page 792 * 793 * page_base = page offset within aperture 794 * page_offset = offset within page 795 * page_length = bytes to copy for this page 796 */ 797 page_base = offset & ~PAGE_MASK; 798 page_offset = offset_in_page(offset); 799 page_length = remain; 800 if ((page_offset + remain) > PAGE_SIZE) 801 page_length = PAGE_SIZE - page_offset; 802 803 /* If we get a fault while copying data, then (presumably) our 804 * source page isn't available. Return the error and we'll 805 * retry in the slow path. 806 */ 807 if (fast_user_write(dev_priv->gtt.mappable, page_base, 808 page_offset, user_data, page_length)) { 809 ret = -EFAULT; 810 goto out_flush; 811 } 812 813 remain -= page_length; 814 user_data += page_length; 815 offset += page_length; 816 } 817 818 out_flush: 819 intel_fb_obj_flush(obj, false, ORIGIN_GTT); 820 out_unpin: 821 i915_gem_object_ggtt_unpin(obj); 822 out: 823 return ret; 824 } 825 826 /* Per-page copy function for the shmem pwrite fastpath. 827 * Flushes invalid cachelines before writing to the target if 828 * needs_clflush_before is set and flushes out any written cachelines after 829 * writing if needs_clflush is set. */ 830 static int 831 shmem_pwrite_fast(struct vm_page *page, int shmem_page_offset, int page_length, 832 char __user *user_data, 833 bool page_do_bit17_swizzling, 834 bool needs_clflush_before, 835 bool needs_clflush_after) 836 { 837 char *vaddr; 838 int ret; 839 840 if (unlikely(page_do_bit17_swizzling)) 841 return -EINVAL; 842 843 vaddr = kmap_atomic(page); 844 if (needs_clflush_before) 845 drm_clflush_virt_range(vaddr + shmem_page_offset, 846 page_length); 847 ret = __copy_from_user_inatomic(vaddr + shmem_page_offset, 848 user_data, page_length); 849 if (needs_clflush_after) 850 drm_clflush_virt_range(vaddr + shmem_page_offset, 851 page_length); 852 kunmap_atomic(vaddr); 853 854 return ret ? -EFAULT : 0; 855 } 856 857 /* Only difference to the fast-path function is that this can handle bit17 858 * and uses non-atomic copy and kmap functions. */ 859 static int 860 shmem_pwrite_slow(struct vm_page *page, int shmem_page_offset, int page_length, 861 char __user *user_data, 862 bool page_do_bit17_swizzling, 863 bool needs_clflush_before, 864 bool needs_clflush_after) 865 { 866 char *vaddr; 867 int ret; 868 869 vaddr = kmap(page); 870 if (unlikely(needs_clflush_before || page_do_bit17_swizzling)) 871 shmem_clflush_swizzled_range(vaddr + shmem_page_offset, 872 page_length, 873 page_do_bit17_swizzling); 874 if (page_do_bit17_swizzling) 875 ret = __copy_from_user_swizzled(vaddr, shmem_page_offset, 876 user_data, 877 page_length); 878 else 879 ret = __copy_from_user(vaddr + shmem_page_offset, 880 user_data, 881 page_length); 882 if (needs_clflush_after) 883 shmem_clflush_swizzled_range(vaddr + shmem_page_offset, 884 page_length, 885 page_do_bit17_swizzling); 886 kunmap(page); 887 888 return ret ? -EFAULT : 0; 889 } 890 891 static int 892 i915_gem_shmem_pwrite(struct drm_device *dev, 893 struct drm_i915_gem_object *obj, 894 struct drm_i915_gem_pwrite *args, 895 struct drm_file *file) 896 { 897 ssize_t remain; 898 loff_t offset; 899 char __user *user_data; 900 int shmem_page_offset, page_length, ret = 0; 901 int obj_do_bit17_swizzling, page_do_bit17_swizzling; 902 int hit_slowpath = 0; 903 int needs_clflush_after = 0; 904 int needs_clflush_before = 0; 905 struct sg_page_iter sg_iter; 906 907 user_data = to_user_ptr(args->data_ptr); 908 remain = args->size; 909 910 obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj); 911 912 if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) { 913 /* If we're not in the cpu write domain, set ourself into the gtt 914 * write domain and manually flush cachelines (if required). This 915 * optimizes for the case when the gpu will use the data 916 * right away and we therefore have to clflush anyway. */ 917 needs_clflush_after = cpu_write_needs_clflush(obj); 918 ret = i915_gem_object_wait_rendering(obj, false); 919 if (ret) 920 return ret; 921 } 922 /* Same trick applies to invalidate partially written cachelines read 923 * before writing. */ 924 if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) 925 needs_clflush_before = 926 !cpu_cache_is_coherent(dev, obj->cache_level); 927 928 ret = i915_gem_object_get_pages(obj); 929 if (ret) 930 return ret; 931 932 intel_fb_obj_invalidate(obj, ORIGIN_CPU); 933 934 i915_gem_object_pin_pages(obj); 935 936 offset = args->offset; 937 obj->dirty = 1; 938 939 VM_OBJECT_LOCK(obj->base.vm_obj); 940 vm_object_pip_add(obj->base.vm_obj, 1); 941 942 for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 943 offset >> PAGE_SHIFT) { 944 struct vm_page *page = sg_page_iter_page(&sg_iter); 945 int partial_cacheline_write; 946 947 if (remain <= 0) 948 break; 949 950 /* Operation in this page 951 * 952 * shmem_page_offset = offset within page in shmem file 953 * page_length = bytes to copy for this page 954 */ 955 shmem_page_offset = offset_in_page(offset); 956 957 page_length = remain; 958 if ((shmem_page_offset + page_length) > PAGE_SIZE) 959 page_length = PAGE_SIZE - shmem_page_offset; 960 961 /* If we don't overwrite a cacheline completely we need to be 962 * careful to have up-to-date data by first clflushing. Don't 963 * overcomplicate things and flush the entire patch. */ 964 partial_cacheline_write = needs_clflush_before && 965 ((shmem_page_offset | page_length) 966 & (cpu_clflush_line_size - 1)); 967 968 page_do_bit17_swizzling = obj_do_bit17_swizzling && 969 (page_to_phys(page) & (1 << 17)) != 0; 970 971 ret = shmem_pwrite_fast(page, shmem_page_offset, page_length, 972 user_data, page_do_bit17_swizzling, 973 partial_cacheline_write, 974 needs_clflush_after); 975 if (ret == 0) 976 goto next_page; 977 978 hit_slowpath = 1; 979 mutex_unlock(&dev->struct_mutex); 980 ret = shmem_pwrite_slow(page, shmem_page_offset, page_length, 981 user_data, page_do_bit17_swizzling, 982 partial_cacheline_write, 983 needs_clflush_after); 984 985 mutex_lock(&dev->struct_mutex); 986 987 if (ret) 988 goto out; 989 990 next_page: 991 remain -= page_length; 992 user_data += page_length; 993 offset += page_length; 994 } 995 vm_object_pip_wakeup(obj->base.vm_obj); 996 VM_OBJECT_UNLOCK(obj->base.vm_obj); 997 998 out: 999 i915_gem_object_unpin_pages(obj); 1000 1001 if (hit_slowpath) { 1002 /* 1003 * Fixup: Flush cpu caches in case we didn't flush the dirty 1004 * cachelines in-line while writing and the object moved 1005 * out of the cpu write domain while we've dropped the lock. 1006 */ 1007 if (!needs_clflush_after && 1008 obj->base.write_domain != I915_GEM_DOMAIN_CPU) { 1009 if (i915_gem_clflush_object(obj, obj->pin_display)) 1010 needs_clflush_after = true; 1011 } 1012 } 1013 1014 if (needs_clflush_after) 1015 i915_gem_chipset_flush(dev); 1016 else 1017 obj->cache_dirty = true; 1018 1019 intel_fb_obj_flush(obj, false, ORIGIN_CPU); 1020 return ret; 1021 } 1022 1023 /** 1024 * Writes data to the object referenced by handle. 1025 * 1026 * On error, the contents of the buffer that were to be modified are undefined. 1027 */ 1028 int 1029 i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, 1030 struct drm_file *file) 1031 { 1032 struct drm_i915_private *dev_priv = dev->dev_private; 1033 struct drm_i915_gem_pwrite *args = data; 1034 struct drm_i915_gem_object *obj; 1035 int ret; 1036 1037 if (args->size == 0) 1038 return 0; 1039 1040 if (likely(!i915.prefault_disable)) { 1041 ret = fault_in_multipages_readable(to_user_ptr(args->data_ptr), 1042 args->size); 1043 if (ret) 1044 return -EFAULT; 1045 } 1046 1047 intel_runtime_pm_get(dev_priv); 1048 1049 ret = i915_mutex_lock_interruptible(dev); 1050 if (ret) 1051 goto put_rpm; 1052 1053 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 1054 if (&obj->base == NULL) { 1055 ret = -ENOENT; 1056 goto unlock; 1057 } 1058 1059 /* Bounds check destination. */ 1060 if (args->offset > obj->base.size || 1061 args->size > obj->base.size - args->offset) { 1062 ret = -EINVAL; 1063 goto out; 1064 } 1065 1066 /* prime objects have no backing filp to GEM pread/pwrite 1067 * pages from. 1068 */ 1069 1070 trace_i915_gem_object_pwrite(obj, args->offset, args->size); 1071 1072 ret = -EFAULT; 1073 /* We can only do the GTT pwrite on untiled buffers, as otherwise 1074 * it would end up going through the fenced access, and we'll get 1075 * different detiling behavior between reading and writing. 1076 * pread/pwrite currently are reading and writing from the CPU 1077 * perspective, requiring manual detiling by the client. 1078 */ 1079 if (obj->tiling_mode == I915_TILING_NONE && 1080 obj->base.write_domain != I915_GEM_DOMAIN_CPU && 1081 cpu_write_needs_clflush(obj)) { 1082 ret = i915_gem_gtt_pwrite_fast(dev, obj, args, file); 1083 /* Note that the gtt paths might fail with non-page-backed user 1084 * pointers (e.g. gtt mappings when moving data between 1085 * textures). Fallback to the shmem path in that case. */ 1086 } 1087 1088 if (ret == -EFAULT || ret == -ENOSPC) { 1089 if (obj->phys_handle) 1090 ret = i915_gem_phys_pwrite(obj, args, file); 1091 else 1092 ret = i915_gem_shmem_pwrite(dev, obj, args, file); 1093 } 1094 1095 out: 1096 drm_gem_object_unreference(&obj->base); 1097 unlock: 1098 mutex_unlock(&dev->struct_mutex); 1099 put_rpm: 1100 intel_runtime_pm_put(dev_priv); 1101 1102 return ret; 1103 } 1104 1105 int 1106 i915_gem_check_wedge(struct i915_gpu_error *error, 1107 bool interruptible) 1108 { 1109 if (i915_reset_in_progress(error)) { 1110 /* Non-interruptible callers can't handle -EAGAIN, hence return 1111 * -EIO unconditionally for these. */ 1112 if (!interruptible) 1113 return -EIO; 1114 1115 /* Recovery complete, but the reset failed ... */ 1116 if (i915_terminally_wedged(error)) 1117 return -EIO; 1118 1119 /* 1120 * Check if GPU Reset is in progress - we need intel_ring_begin 1121 * to work properly to reinit the hw state while the gpu is 1122 * still marked as reset-in-progress. Handle this with a flag. 1123 */ 1124 if (!error->reload_in_reset) 1125 return -EAGAIN; 1126 } 1127 1128 return 0; 1129 } 1130 1131 static void fake_irq(unsigned long data) 1132 { 1133 wakeup_one((void *)data); 1134 } 1135 1136 static bool missed_irq(struct drm_i915_private *dev_priv, 1137 struct intel_engine_cs *ring) 1138 { 1139 return test_bit(ring->id, &dev_priv->gpu_error.missed_irq_rings); 1140 } 1141 1142 #if 0 1143 static int __i915_spin_request(struct drm_i915_gem_request *req, int state) 1144 { 1145 unsigned long timeout; 1146 unsigned cpu; 1147 1148 /* When waiting for high frequency requests, e.g. during synchronous 1149 * rendering split between the CPU and GPU, the finite amount of time 1150 * required to set up the irq and wait upon it limits the response 1151 * rate. By busywaiting on the request completion for a short while we 1152 * can service the high frequency waits as quick as possible. However, 1153 * if it is a slow request, we want to sleep as quickly as possible. 1154 * The tradeoff between waiting and sleeping is roughly the time it 1155 * takes to sleep on a request, on the order of a microsecond. 1156 */ 1157 1158 if (req->ring->irq_refcount) 1159 return -EBUSY; 1160 1161 /* Only spin if we know the GPU is processing this request */ 1162 if (!i915_gem_request_started(req, true)) 1163 return -EAGAIN; 1164 1165 timeout = local_clock_us(&cpu) + 5; 1166 while (!need_resched()) { 1167 if (i915_gem_request_completed(req, true)) 1168 return 0; 1169 1170 if (signal_pending_state(state, current)) 1171 break; 1172 1173 if (busywait_stop(timeout, cpu)) 1174 break; 1175 1176 cpu_relax_lowlatency(); 1177 } 1178 1179 if (i915_gem_request_completed(req, false)) 1180 return 0; 1181 1182 return -EAGAIN; 1183 } 1184 #endif 1185 1186 /** 1187 * __i915_wait_request - wait until execution of request has finished 1188 * @req: duh! 1189 * @reset_counter: reset sequence associated with the given request 1190 * @interruptible: do an interruptible wait (normally yes) 1191 * @timeout: in - how long to wait (NULL forever); out - how much time remaining 1192 * 1193 * Note: It is of utmost importance that the passed in seqno and reset_counter 1194 * values have been read by the caller in an smp safe manner. Where read-side 1195 * locks are involved, it is sufficient to read the reset_counter before 1196 * unlocking the lock that protects the seqno. For lockless tricks, the 1197 * reset_counter _must_ be read before, and an appropriate smp_rmb must be 1198 * inserted. 1199 * 1200 * Returns 0 if the request was found within the alloted time. Else returns the 1201 * errno with remaining time filled in timeout argument. 1202 */ 1203 int __i915_wait_request(struct drm_i915_gem_request *req, 1204 unsigned reset_counter, 1205 bool interruptible, 1206 s64 *timeout, 1207 struct intel_rps_client *rps) 1208 { 1209 struct intel_engine_cs *ring = i915_gem_request_get_ring(req); 1210 struct drm_device *dev = ring->dev; 1211 struct drm_i915_private *dev_priv = dev->dev_private; 1212 const bool irq_test_in_progress = 1213 ACCESS_ONCE(dev_priv->gpu_error.test_irq_rings) & intel_ring_flag(ring); 1214 unsigned long timeout_expire; 1215 s64 before = 0; /* Only to silence a compiler warning. */ 1216 int ret, sl_timeout = 1; 1217 1218 WARN(!intel_irqs_enabled(dev_priv), "IRQs disabled"); 1219 1220 if (list_empty(&req->list)) 1221 return 0; 1222 1223 if (i915_gem_request_completed(req, true)) 1224 return 0; 1225 1226 timeout_expire = 0; 1227 if (timeout) { 1228 if (WARN_ON(*timeout < 0)) 1229 return -EINVAL; 1230 1231 if (*timeout == 0) 1232 return -ETIME; 1233 1234 timeout_expire = jiffies + nsecs_to_jiffies_timeout(*timeout); 1235 1236 /* 1237 * Record current time in case interrupted by signal, or wedged. 1238 */ 1239 before = ktime_get_raw_ns(); 1240 } 1241 1242 if (INTEL_INFO(dev_priv)->gen >= 6) 1243 gen6_rps_boost(dev_priv, rps, req->emitted_jiffies); 1244 1245 trace_i915_gem_request_wait_begin(req); 1246 1247 /* Optimistic spin for the next jiffie before touching IRQs */ 1248 #if 0 1249 ret = __i915_spin_request(req); 1250 if (ret == 0) 1251 goto out; 1252 #endif 1253 1254 if (!irq_test_in_progress && WARN_ON(!ring->irq_get(ring))) { 1255 ret = -ENODEV; 1256 goto out; 1257 } 1258 1259 lockmgr(&ring->irq_queue.lock, LK_EXCLUSIVE); 1260 for (;;) { 1261 struct timer_list timer; 1262 1263 /* We need to check whether any gpu reset happened in between 1264 * the caller grabbing the seqno and now ... */ 1265 if (reset_counter != atomic_read(&dev_priv->gpu_error.reset_counter)) { 1266 /* ... but upgrade the -EAGAIN to an -EIO if the gpu 1267 * is truely gone. */ 1268 ret = i915_gem_check_wedge(&dev_priv->gpu_error, interruptible); 1269 if (ret == 0) 1270 ret = -EAGAIN; 1271 break; 1272 } 1273 1274 if (i915_gem_request_completed(req, false)) { 1275 ret = 0; 1276 break; 1277 } 1278 1279 if (interruptible && signal_pending(curthread->td_lwp)) { 1280 ret = -ERESTARTSYS; 1281 break; 1282 } 1283 1284 if (timeout && time_after_eq(jiffies, timeout_expire)) { 1285 ret = -ETIME; 1286 break; 1287 } 1288 1289 timer.function = NULL; 1290 if (timeout || missed_irq(dev_priv, ring)) { 1291 unsigned long expire; 1292 1293 setup_timer_on_stack(&timer, fake_irq, (unsigned long)&ring->irq_queue); 1294 expire = missed_irq(dev_priv, ring) ? jiffies + 1 : timeout_expire; 1295 sl_timeout = expire - jiffies; 1296 if (sl_timeout < 1) 1297 sl_timeout = 1; 1298 mod_timer(&timer, expire); 1299 } 1300 1301 #if 0 1302 io_schedule(); 1303 #endif 1304 1305 if (timer.function) { 1306 del_singleshot_timer_sync(&timer); 1307 destroy_timer_on_stack(&timer); 1308 } 1309 1310 lksleep(&ring->irq_queue, &ring->irq_queue.lock, 1311 interruptible ? PCATCH : 0, "lwe", sl_timeout); 1312 } 1313 lockmgr(&ring->irq_queue.lock, LK_RELEASE); 1314 if (!irq_test_in_progress) 1315 ring->irq_put(ring); 1316 1317 out: 1318 trace_i915_gem_request_wait_end(req); 1319 1320 if (timeout) { 1321 s64 tres = *timeout - (ktime_get_raw_ns() - before); 1322 1323 *timeout = tres < 0 ? 0 : tres; 1324 1325 /* 1326 * Apparently ktime isn't accurate enough and occasionally has a 1327 * bit of mismatch in the jiffies<->nsecs<->ktime loop. So patch 1328 * things up to make the test happy. We allow up to 1 jiffy. 1329 * 1330 * This is a regrssion from the timespec->ktime conversion. 1331 */ 1332 if (ret == -ETIME && *timeout < jiffies_to_usecs(1)*1000) 1333 *timeout = 0; 1334 } 1335 1336 return ret; 1337 } 1338 1339 int i915_gem_request_add_to_client(struct drm_i915_gem_request *req, 1340 struct drm_file *file) 1341 { 1342 struct drm_i915_private *dev_private; 1343 struct drm_i915_file_private *file_priv; 1344 1345 WARN_ON(!req || !file || req->file_priv); 1346 1347 if (!req || !file) 1348 return -EINVAL; 1349 1350 if (req->file_priv) 1351 return -EINVAL; 1352 1353 dev_private = req->ring->dev->dev_private; 1354 file_priv = file->driver_priv; 1355 1356 spin_lock(&file_priv->mm.lock); 1357 req->file_priv = file_priv; 1358 list_add_tail(&req->client_list, &file_priv->mm.request_list); 1359 spin_unlock(&file_priv->mm.lock); 1360 1361 req->pid = curproc->p_pid; 1362 1363 return 0; 1364 } 1365 1366 static inline void 1367 i915_gem_request_remove_from_client(struct drm_i915_gem_request *request) 1368 { 1369 struct drm_i915_file_private *file_priv = request->file_priv; 1370 1371 if (!file_priv) 1372 return; 1373 1374 spin_lock(&file_priv->mm.lock); 1375 list_del(&request->client_list); 1376 request->file_priv = NULL; 1377 spin_unlock(&file_priv->mm.lock); 1378 1379 #if 0 1380 put_pid(request->pid); 1381 request->pid = NULL; 1382 #endif 1383 } 1384 1385 static void i915_gem_request_retire(struct drm_i915_gem_request *request) 1386 { 1387 trace_i915_gem_request_retire(request); 1388 1389 /* We know the GPU must have read the request to have 1390 * sent us the seqno + interrupt, so use the position 1391 * of tail of the request to update the last known position 1392 * of the GPU head. 1393 * 1394 * Note this requires that we are always called in request 1395 * completion order. 1396 */ 1397 request->ringbuf->last_retired_head = request->postfix; 1398 1399 list_del_init(&request->list); 1400 i915_gem_request_remove_from_client(request); 1401 1402 i915_gem_request_unreference(request); 1403 } 1404 1405 static void 1406 __i915_gem_request_retire__upto(struct drm_i915_gem_request *req) 1407 { 1408 struct intel_engine_cs *engine = req->ring; 1409 struct drm_i915_gem_request *tmp; 1410 1411 lockdep_assert_held(&engine->dev->struct_mutex); 1412 1413 if (list_empty(&req->list)) 1414 return; 1415 1416 do { 1417 tmp = list_first_entry(&engine->request_list, 1418 typeof(*tmp), list); 1419 1420 i915_gem_request_retire(tmp); 1421 } while (tmp != req); 1422 1423 WARN_ON(i915_verify_lists(engine->dev)); 1424 } 1425 1426 /** 1427 * Waits for a request to be signaled, and cleans up the 1428 * request and object lists appropriately for that event. 1429 */ 1430 int 1431 i915_wait_request(struct drm_i915_gem_request *req) 1432 { 1433 struct drm_device *dev; 1434 struct drm_i915_private *dev_priv; 1435 bool interruptible; 1436 int ret; 1437 1438 BUG_ON(req == NULL); 1439 1440 dev = req->ring->dev; 1441 dev_priv = dev->dev_private; 1442 interruptible = dev_priv->mm.interruptible; 1443 1444 BUG_ON(!mutex_is_locked(&dev->struct_mutex)); 1445 1446 ret = i915_gem_check_wedge(&dev_priv->gpu_error, interruptible); 1447 if (ret) 1448 return ret; 1449 1450 ret = __i915_wait_request(req, 1451 atomic_read(&dev_priv->gpu_error.reset_counter), 1452 interruptible, NULL, NULL); 1453 if (ret) 1454 return ret; 1455 1456 __i915_gem_request_retire__upto(req); 1457 return 0; 1458 } 1459 1460 /** 1461 * Ensures that all rendering to the object has completed and the object is 1462 * safe to unbind from the GTT or access from the CPU. 1463 */ 1464 int 1465 i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj, 1466 bool readonly) 1467 { 1468 int ret, i; 1469 1470 if (!obj->active) 1471 return 0; 1472 1473 if (readonly) { 1474 if (obj->last_write_req != NULL) { 1475 ret = i915_wait_request(obj->last_write_req); 1476 if (ret) 1477 return ret; 1478 1479 i = obj->last_write_req->ring->id; 1480 if (obj->last_read_req[i] == obj->last_write_req) 1481 i915_gem_object_retire__read(obj, i); 1482 else 1483 i915_gem_object_retire__write(obj); 1484 } 1485 } else { 1486 for (i = 0; i < I915_NUM_RINGS; i++) { 1487 if (obj->last_read_req[i] == NULL) 1488 continue; 1489 1490 ret = i915_wait_request(obj->last_read_req[i]); 1491 if (ret) 1492 return ret; 1493 1494 i915_gem_object_retire__read(obj, i); 1495 } 1496 RQ_BUG_ON(obj->active); 1497 } 1498 1499 return 0; 1500 } 1501 1502 static void 1503 i915_gem_object_retire_request(struct drm_i915_gem_object *obj, 1504 struct drm_i915_gem_request *req) 1505 { 1506 int ring = req->ring->id; 1507 1508 if (obj->last_read_req[ring] == req) 1509 i915_gem_object_retire__read(obj, ring); 1510 else if (obj->last_write_req == req) 1511 i915_gem_object_retire__write(obj); 1512 1513 __i915_gem_request_retire__upto(req); 1514 } 1515 1516 /* A nonblocking variant of the above wait. This is a highly dangerous routine 1517 * as the object state may change during this call. 1518 */ 1519 static __must_check int 1520 i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj, 1521 struct intel_rps_client *rps, 1522 bool readonly) 1523 { 1524 struct drm_device *dev = obj->base.dev; 1525 struct drm_i915_private *dev_priv = dev->dev_private; 1526 struct drm_i915_gem_request *requests[I915_NUM_RINGS]; 1527 unsigned reset_counter; 1528 int ret, i, n = 0; 1529 1530 BUG_ON(!mutex_is_locked(&dev->struct_mutex)); 1531 BUG_ON(!dev_priv->mm.interruptible); 1532 1533 if (!obj->active) 1534 return 0; 1535 1536 ret = i915_gem_check_wedge(&dev_priv->gpu_error, true); 1537 if (ret) 1538 return ret; 1539 1540 reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter); 1541 1542 if (readonly) { 1543 struct drm_i915_gem_request *req; 1544 1545 req = obj->last_write_req; 1546 if (req == NULL) 1547 return 0; 1548 1549 requests[n++] = i915_gem_request_reference(req); 1550 } else { 1551 for (i = 0; i < I915_NUM_RINGS; i++) { 1552 struct drm_i915_gem_request *req; 1553 1554 req = obj->last_read_req[i]; 1555 if (req == NULL) 1556 continue; 1557 1558 requests[n++] = i915_gem_request_reference(req); 1559 } 1560 } 1561 1562 mutex_unlock(&dev->struct_mutex); 1563 for (i = 0; ret == 0 && i < n; i++) 1564 ret = __i915_wait_request(requests[i], reset_counter, true, 1565 NULL, rps); 1566 mutex_lock(&dev->struct_mutex); 1567 1568 for (i = 0; i < n; i++) { 1569 if (ret == 0) 1570 i915_gem_object_retire_request(obj, requests[i]); 1571 i915_gem_request_unreference(requests[i]); 1572 } 1573 1574 return ret; 1575 } 1576 1577 static struct intel_rps_client *to_rps_client(struct drm_file *file) 1578 { 1579 struct drm_i915_file_private *fpriv = file->driver_priv; 1580 return &fpriv->rps; 1581 } 1582 1583 /** 1584 * Called when user space prepares to use an object with the CPU, either 1585 * through the mmap ioctl's mapping or a GTT mapping. 1586 */ 1587 int 1588 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, 1589 struct drm_file *file) 1590 { 1591 struct drm_i915_gem_set_domain *args = data; 1592 struct drm_i915_gem_object *obj; 1593 uint32_t read_domains = args->read_domains; 1594 uint32_t write_domain = args->write_domain; 1595 int ret; 1596 1597 /* Only handle setting domains to types used by the CPU. */ 1598 if (write_domain & I915_GEM_GPU_DOMAINS) 1599 return -EINVAL; 1600 1601 if (read_domains & I915_GEM_GPU_DOMAINS) 1602 return -EINVAL; 1603 1604 /* Having something in the write domain implies it's in the read 1605 * domain, and only that read domain. Enforce that in the request. 1606 */ 1607 if (write_domain != 0 && read_domains != write_domain) 1608 return -EINVAL; 1609 1610 ret = i915_mutex_lock_interruptible(dev); 1611 if (ret) 1612 return ret; 1613 1614 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 1615 if (&obj->base == NULL) { 1616 ret = -ENOENT; 1617 goto unlock; 1618 } 1619 1620 /* Try to flush the object off the GPU without holding the lock. 1621 * We will repeat the flush holding the lock in the normal manner 1622 * to catch cases where we are gazumped. 1623 */ 1624 ret = i915_gem_object_wait_rendering__nonblocking(obj, 1625 to_rps_client(file), 1626 !write_domain); 1627 if (ret) 1628 goto unref; 1629 1630 if (read_domains & I915_GEM_DOMAIN_GTT) 1631 ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0); 1632 else 1633 ret = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0); 1634 1635 if (write_domain != 0) 1636 intel_fb_obj_invalidate(obj, 1637 write_domain == I915_GEM_DOMAIN_GTT ? 1638 ORIGIN_GTT : ORIGIN_CPU); 1639 1640 unref: 1641 drm_gem_object_unreference(&obj->base); 1642 unlock: 1643 mutex_unlock(&dev->struct_mutex); 1644 return ret; 1645 } 1646 1647 /** 1648 * Called when user space has done writes to this buffer 1649 */ 1650 int 1651 i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data, 1652 struct drm_file *file) 1653 { 1654 struct drm_i915_gem_sw_finish *args = data; 1655 struct drm_i915_gem_object *obj; 1656 int ret = 0; 1657 1658 ret = i915_mutex_lock_interruptible(dev); 1659 if (ret) 1660 return ret; 1661 1662 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 1663 if (&obj->base == NULL) { 1664 ret = -ENOENT; 1665 goto unlock; 1666 } 1667 1668 /* Pinned buffers may be scanout, so flush the cache */ 1669 if (obj->pin_display) 1670 i915_gem_object_flush_cpu_write_domain(obj); 1671 1672 drm_gem_object_unreference(&obj->base); 1673 unlock: 1674 mutex_unlock(&dev->struct_mutex); 1675 return ret; 1676 } 1677 1678 /** 1679 * Maps the contents of an object, returning the address it is mapped 1680 * into. 1681 * 1682 * While the mapping holds a reference on the contents of the object, it doesn't 1683 * imply a ref on the object itself. 1684 * 1685 * IMPORTANT: 1686 * 1687 * DRM driver writers who look a this function as an example for how to do GEM 1688 * mmap support, please don't implement mmap support like here. The modern way 1689 * to implement DRM mmap support is with an mmap offset ioctl (like 1690 * i915_gem_mmap_gtt) and then using the mmap syscall on the DRM fd directly. 1691 * That way debug tooling like valgrind will understand what's going on, hiding 1692 * the mmap call in a driver private ioctl will break that. The i915 driver only 1693 * does cpu mmaps this way because we didn't know better. 1694 */ 1695 int 1696 i915_gem_mmap_ioctl(struct drm_device *dev, void *data, 1697 struct drm_file *file) 1698 { 1699 struct drm_i915_gem_mmap *args = data; 1700 struct drm_gem_object *obj; 1701 unsigned long addr; 1702 1703 struct proc *p = curproc; 1704 vm_map_t map = &p->p_vmspace->vm_map; 1705 vm_size_t size; 1706 int error = 0, rv; 1707 1708 if (args->flags & ~(I915_MMAP_WC)) 1709 return -EINVAL; 1710 1711 obj = drm_gem_object_lookup(dev, file, args->handle); 1712 if (obj == NULL) 1713 return -ENOENT; 1714 1715 if (args->size == 0) 1716 goto out; 1717 1718 size = round_page(args->size); 1719 if (map->size + size > p->p_rlimit[RLIMIT_VMEM].rlim_cur) { 1720 error = -ENOMEM; 1721 goto out; 1722 } 1723 1724 /* prime objects have no backing filp to GEM mmap 1725 * pages from. 1726 */ 1727 1728 /* 1729 * Call hint to ensure that NULL is not returned as a valid address 1730 * and to reduce vm_map traversals. XXX causes instability, use a 1731 * fixed low address as the start point instead to avoid the NULL 1732 * return issue. 1733 */ 1734 1735 addr = PAGE_SIZE; 1736 1737 /* 1738 * Use 256KB alignment. It is unclear why this matters for a 1739 * virtual address but it appears to fix a number of application/X 1740 * crashes and kms console switching is much faster. 1741 */ 1742 vm_object_hold(obj->vm_obj); 1743 vm_object_reference_locked(obj->vm_obj); 1744 vm_object_drop(obj->vm_obj); 1745 1746 rv = vm_map_find(map, obj->vm_obj, NULL, 1747 args->offset, &addr, args->size, 1748 256 * 1024, /* align */ 1749 TRUE, /* fitit */ 1750 VM_MAPTYPE_NORMAL, VM_SUBSYS_DRM_GEM, 1751 VM_PROT_READ | VM_PROT_WRITE, /* prot */ 1752 VM_PROT_READ | VM_PROT_WRITE, /* max */ 1753 MAP_SHARED /* cow */); 1754 if (rv != KERN_SUCCESS) { 1755 vm_object_deallocate(obj->vm_obj); 1756 error = -vm_mmap_to_errno(rv); 1757 } else { 1758 args->addr_ptr = (uint64_t)addr; 1759 } 1760 out: 1761 drm_gem_object_unreference(obj); 1762 return (error); 1763 } 1764 1765 /** 1766 * i915_gem_fault - fault a page into the GTT 1767 * 1768 * vm_obj is locked on entry and expected to be locked on return. 1769 * 1770 * The vm_pager has placemarked the object with an anonymous memory page 1771 * which we must replace atomically to avoid races against concurrent faults 1772 * on the same page. XXX we currently are unable to do this atomically. 1773 * 1774 * If we are to return an error we should not touch the anonymous page, 1775 * the caller will deallocate it. 1776 * 1777 * XXX Most GEM calls appear to be interruptable, but we can't hard loop 1778 * in that case. Release all resources and wait 1 tick before retrying. 1779 * This is a huge problem which needs to be fixed by getting rid of most 1780 * of the interruptability. The linux code does not retry but does appear 1781 * to have some sort of mechanism (VM_FAULT_NOPAGE ?) for the higher level 1782 * to be able to retry. 1783 * 1784 * -- 1785 * @vma: VMA in question 1786 * @vmf: fault info 1787 * 1788 * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped 1789 * from userspace. The fault handler takes care of binding the object to 1790 * the GTT (if needed), allocating and programming a fence register (again, 1791 * only if needed based on whether the old reg is still valid or the object 1792 * is tiled) and inserting a new PTE into the faulting process. 1793 * 1794 * Note that the faulting process may involve evicting existing objects 1795 * from the GTT and/or fence registers to make room. So performance may 1796 * suffer if the GTT working set is large or there are few fence registers 1797 * left. 1798 * 1799 * vm_obj is locked on entry and expected to be locked on return. The VM 1800 * pager has placed an anonymous memory page at (obj,offset) which we have 1801 * to replace. 1802 */ 1803 int i915_gem_fault(vm_object_t vm_obj, vm_ooffset_t offset, int prot, vm_page_t *mres) 1804 { 1805 struct drm_i915_gem_object *obj = to_intel_bo(vm_obj->handle); 1806 struct drm_device *dev = obj->base.dev; 1807 struct drm_i915_private *dev_priv = dev->dev_private; 1808 struct i915_ggtt_view view = i915_ggtt_view_normal; 1809 unsigned long page_offset; 1810 vm_page_t m, oldm = NULL; 1811 int ret = 0; 1812 bool write = !!(prot & VM_PROT_WRITE); 1813 1814 intel_runtime_pm_get(dev_priv); 1815 1816 /* We don't use vmf->pgoff since that has the fake offset */ 1817 page_offset = (unsigned long)offset; 1818 1819 retry: 1820 ret = i915_mutex_lock_interruptible(dev); 1821 if (ret) 1822 goto out; 1823 1824 trace_i915_gem_object_fault(obj, page_offset, true, write); 1825 1826 /* Try to flush the object off the GPU first without holding the lock. 1827 * Upon reacquiring the lock, we will perform our sanity checks and then 1828 * repeat the flush holding the lock in the normal manner to catch cases 1829 * where we are gazumped. 1830 */ 1831 ret = i915_gem_object_wait_rendering__nonblocking(obj, NULL, !write); 1832 if (ret) 1833 goto unlock; 1834 1835 /* Access to snoopable pages through the GTT is incoherent. */ 1836 if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(dev)) { 1837 ret = -EFAULT; 1838 goto unlock; 1839 } 1840 1841 /* Use a partial view if the object is bigger than the aperture. */ 1842 if (obj->base.size >= dev_priv->gtt.mappable_end && 1843 obj->tiling_mode == I915_TILING_NONE) { 1844 #if 0 1845 static const unsigned int chunk_size = 256; // 1 MiB 1846 1847 memset(&view, 0, sizeof(view)); 1848 view.type = I915_GGTT_VIEW_PARTIAL; 1849 view.params.partial.offset = rounddown(page_offset, chunk_size); 1850 view.params.partial.size = 1851 min_t(unsigned int, 1852 chunk_size, 1853 (vma->vm_end - vma->vm_start)/PAGE_SIZE - 1854 view.params.partial.offset); 1855 #endif 1856 } 1857 1858 /* Now pin it into the GTT if needed */ 1859 ret = i915_gem_object_ggtt_pin(obj, &view, 0, PIN_MAPPABLE); 1860 if (ret) 1861 goto unlock; 1862 1863 ret = i915_gem_object_set_to_gtt_domain(obj, write); 1864 if (ret) 1865 goto unpin; 1866 1867 ret = i915_gem_object_get_fence(obj); 1868 if (ret) 1869 goto unpin; 1870 1871 /* 1872 * START FREEBSD MAGIC 1873 * 1874 * Add a pip count to avoid destruction and certain other 1875 * complex operations (such as collapses?) while unlocked. 1876 */ 1877 vm_object_pip_add(vm_obj, 1); 1878 1879 /* 1880 * XXX We must currently remove the placeholder page now to avoid 1881 * a deadlock against a concurrent i915_gem_release_mmap(). 1882 * Otherwise concurrent operation will block on the busy page 1883 * while holding locks which we need to obtain. 1884 */ 1885 if (*mres != NULL) { 1886 oldm = *mres; 1887 if ((oldm->flags & PG_BUSY) == 0) 1888 kprintf("i915_gem_fault: Page was not busy\n"); 1889 else 1890 vm_page_remove(oldm); 1891 *mres = NULL; 1892 } else { 1893 oldm = NULL; 1894 } 1895 1896 ret = 0; 1897 m = NULL; 1898 1899 /* 1900 * Since the object lock was dropped, another thread might have 1901 * faulted on the same GTT address and instantiated the mapping. 1902 * Recheck. 1903 */ 1904 m = vm_page_lookup(vm_obj, OFF_TO_IDX(offset)); 1905 if (m != NULL) { 1906 /* 1907 * Try to busy the page, retry on failure (non-zero ret). 1908 */ 1909 if (vm_page_busy_try(m, false)) { 1910 kprintf("i915_gem_fault: PG_BUSY\n"); 1911 ret = -EINTR; 1912 goto unlock; 1913 } 1914 goto have_page; 1915 } 1916 /* 1917 * END FREEBSD MAGIC 1918 */ 1919 1920 obj->fault_mappable = true; 1921 1922 /* Finally, remap it using the new GTT offset */ 1923 m = vm_phys_fictitious_to_vm_page(dev_priv->gtt.mappable_base + 1924 i915_gem_obj_ggtt_offset_view(obj, &view) + offset); 1925 if (m == NULL) { 1926 ret = -EFAULT; 1927 goto unpin; 1928 } 1929 KASSERT((m->flags & PG_FICTITIOUS) != 0, ("not fictitious %p", m)); 1930 KASSERT(m->wire_count == 1, ("wire_count not 1 %p", m)); 1931 1932 /* 1933 * Try to busy the page. Fails on non-zero return. 1934 */ 1935 if (vm_page_busy_try(m, false)) { 1936 kprintf("i915_gem_fault: PG_BUSY(2)\n"); 1937 ret = -EINTR; 1938 goto unpin; 1939 } 1940 m->valid = VM_PAGE_BITS_ALL; 1941 1942 #if 0 1943 if (unlikely(view.type == I915_GGTT_VIEW_PARTIAL)) { 1944 /* Overriding existing pages in partial view does not cause 1945 * us any trouble as TLBs are still valid because the fault 1946 * is due to userspace losing part of the mapping or never 1947 * having accessed it before (at this partials' range). 1948 */ 1949 unsigned long base = vma->vm_start + 1950 (view.params.partial.offset << PAGE_SHIFT); 1951 unsigned int i; 1952 1953 for (i = 0; i < view.params.partial.size; i++) { 1954 ret = vm_insert_pfn(vma, base + i * PAGE_SIZE, pfn + i); 1955 if (ret) 1956 break; 1957 } 1958 1959 obj->fault_mappable = true; 1960 } else { 1961 if (!obj->fault_mappable) { 1962 unsigned long size = min_t(unsigned long, 1963 vma->vm_end - vma->vm_start, 1964 obj->base.size); 1965 int i; 1966 1967 for (i = 0; i < size >> PAGE_SHIFT; i++) { 1968 ret = vm_insert_pfn(vma, 1969 (unsigned long)vma->vm_start + i * PAGE_SIZE, 1970 pfn + i); 1971 if (ret) 1972 break; 1973 } 1974 1975 obj->fault_mappable = true; 1976 } else 1977 ret = vm_insert_pfn(vma, 1978 (unsigned long)vmf->virtual_address, 1979 pfn + page_offset); 1980 #endif 1981 vm_page_insert(m, vm_obj, OFF_TO_IDX(offset)); 1982 #if 0 1983 } 1984 #endif 1985 1986 have_page: 1987 *mres = m; 1988 1989 i915_gem_object_ggtt_unpin_view(obj, &view); 1990 mutex_unlock(&dev->struct_mutex); 1991 ret = VM_PAGER_OK; 1992 goto done; 1993 1994 /* 1995 * ALTERNATIVE ERROR RETURN. 1996 * 1997 * OBJECT EXPECTED TO BE LOCKED. 1998 */ 1999 unpin: 2000 i915_gem_object_ggtt_unpin_view(obj, &view); 2001 unlock: 2002 mutex_unlock(&dev->struct_mutex); 2003 out: 2004 switch (ret) { 2005 case -EIO: 2006 /* 2007 * We eat errors when the gpu is terminally wedged to avoid 2008 * userspace unduly crashing (gl has no provisions for mmaps to 2009 * fail). But any other -EIO isn't ours (e.g. swap in failure) 2010 * and so needs to be reported. 2011 */ 2012 if (!i915_terminally_wedged(&dev_priv->gpu_error)) { 2013 // ret = VM_FAULT_SIGBUS; 2014 break; 2015 } 2016 case -EAGAIN: 2017 /* 2018 * EAGAIN means the gpu is hung and we'll wait for the error 2019 * handler to reset everything when re-faulting in 2020 * i915_mutex_lock_interruptible. 2021 */ 2022 case -ERESTARTSYS: 2023 case -EINTR: 2024 VM_OBJECT_UNLOCK(vm_obj); 2025 int dummy; 2026 tsleep(&dummy, 0, "delay", 1); /* XXX */ 2027 VM_OBJECT_LOCK(vm_obj); 2028 goto retry; 2029 default: 2030 WARN_ONCE(ret, "unhandled error in i915_gem_fault: %i\n", ret); 2031 ret = VM_PAGER_ERROR; 2032 break; 2033 } 2034 2035 done: 2036 if (oldm != NULL) 2037 vm_page_free(oldm); 2038 vm_object_pip_wakeup(vm_obj); 2039 2040 intel_runtime_pm_put(dev_priv); 2041 return ret; 2042 } 2043 2044 /** 2045 * i915_gem_release_mmap - remove physical page mappings 2046 * @obj: obj in question 2047 * 2048 * Preserve the reservation of the mmapping with the DRM core code, but 2049 * relinquish ownership of the pages back to the system. 2050 * 2051 * It is vital that we remove the page mapping if we have mapped a tiled 2052 * object through the GTT and then lose the fence register due to 2053 * resource pressure. Similarly if the object has been moved out of the 2054 * aperture, than pages mapped into userspace must be revoked. Removing the 2055 * mapping will then trigger a page fault on the next user access, allowing 2056 * fixup by i915_gem_fault(). 2057 */ 2058 void 2059 i915_gem_release_mmap(struct drm_i915_gem_object *obj) 2060 { 2061 vm_object_t devobj; 2062 vm_page_t m; 2063 int i, page_count; 2064 2065 if (!obj->fault_mappable) 2066 return; 2067 2068 devobj = cdev_pager_lookup(obj); 2069 if (devobj != NULL) { 2070 page_count = OFF_TO_IDX(obj->base.size); 2071 2072 VM_OBJECT_LOCK(devobj); 2073 for (i = 0; i < page_count; i++) { 2074 m = vm_page_lookup_busy_wait(devobj, i, TRUE, "915unm"); 2075 if (m == NULL) 2076 continue; 2077 cdev_pager_free_page(devobj, m); 2078 } 2079 VM_OBJECT_UNLOCK(devobj); 2080 vm_object_deallocate(devobj); 2081 } 2082 2083 obj->fault_mappable = false; 2084 } 2085 2086 void 2087 i915_gem_release_all_mmaps(struct drm_i915_private *dev_priv) 2088 { 2089 struct drm_i915_gem_object *obj; 2090 2091 list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) 2092 i915_gem_release_mmap(obj); 2093 } 2094 2095 uint32_t 2096 i915_gem_get_gtt_size(struct drm_device *dev, uint32_t size, int tiling_mode) 2097 { 2098 uint32_t gtt_size; 2099 2100 if (INTEL_INFO(dev)->gen >= 4 || 2101 tiling_mode == I915_TILING_NONE) 2102 return size; 2103 2104 /* Previous chips need a power-of-two fence region when tiling */ 2105 if (INTEL_INFO(dev)->gen == 3) 2106 gtt_size = 1024*1024; 2107 else 2108 gtt_size = 512*1024; 2109 2110 while (gtt_size < size) 2111 gtt_size <<= 1; 2112 2113 return gtt_size; 2114 } 2115 2116 /** 2117 * i915_gem_get_gtt_alignment - return required GTT alignment for an object 2118 * @obj: object to check 2119 * 2120 * Return the required GTT alignment for an object, taking into account 2121 * potential fence register mapping. 2122 */ 2123 uint32_t 2124 i915_gem_get_gtt_alignment(struct drm_device *dev, uint32_t size, 2125 int tiling_mode, bool fenced) 2126 { 2127 /* 2128 * Minimum alignment is 4k (GTT page size), but might be greater 2129 * if a fence register is needed for the object. 2130 */ 2131 if (INTEL_INFO(dev)->gen >= 4 || (!fenced && IS_G33(dev)) || 2132 tiling_mode == I915_TILING_NONE) 2133 return 4096; 2134 2135 /* 2136 * Previous chips need to be aligned to the size of the smallest 2137 * fence register that can contain the object. 2138 */ 2139 return i915_gem_get_gtt_size(dev, size, tiling_mode); 2140 } 2141 2142 static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj) 2143 { 2144 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 2145 int ret; 2146 2147 #if 0 2148 if (drm_vma_node_has_offset(&obj->base.vma_node)) 2149 return 0; 2150 #endif 2151 2152 dev_priv->mm.shrinker_no_lock_stealing = true; 2153 2154 ret = drm_gem_create_mmap_offset(&obj->base); 2155 if (ret != -ENOSPC) 2156 goto out; 2157 2158 /* Badly fragmented mmap space? The only way we can recover 2159 * space is by destroying unwanted objects. We can't randomly release 2160 * mmap_offsets as userspace expects them to be persistent for the 2161 * lifetime of the objects. The closest we can is to release the 2162 * offsets on purgeable objects by truncating it and marking it purged, 2163 * which prevents userspace from ever using that object again. 2164 */ 2165 i915_gem_shrink(dev_priv, 2166 obj->base.size >> PAGE_SHIFT, 2167 I915_SHRINK_BOUND | 2168 I915_SHRINK_UNBOUND | 2169 I915_SHRINK_PURGEABLE); 2170 ret = drm_gem_create_mmap_offset(&obj->base); 2171 if (ret != -ENOSPC) 2172 goto out; 2173 2174 i915_gem_shrink_all(dev_priv); 2175 ret = drm_gem_create_mmap_offset(&obj->base); 2176 out: 2177 dev_priv->mm.shrinker_no_lock_stealing = false; 2178 2179 return ret; 2180 } 2181 2182 static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj) 2183 { 2184 drm_gem_free_mmap_offset(&obj->base); 2185 } 2186 2187 int 2188 i915_gem_mmap_gtt(struct drm_file *file, 2189 struct drm_device *dev, 2190 uint32_t handle, 2191 uint64_t *offset) 2192 { 2193 struct drm_i915_gem_object *obj; 2194 int ret; 2195 2196 ret = i915_mutex_lock_interruptible(dev); 2197 if (ret) 2198 return ret; 2199 2200 obj = to_intel_bo(drm_gem_object_lookup(dev, file, handle)); 2201 if (&obj->base == NULL) { 2202 ret = -ENOENT; 2203 goto unlock; 2204 } 2205 2206 if (obj->madv != I915_MADV_WILLNEED) { 2207 DRM_DEBUG("Attempting to mmap a purgeable buffer\n"); 2208 ret = -EFAULT; 2209 goto out; 2210 } 2211 2212 ret = i915_gem_object_create_mmap_offset(obj); 2213 if (ret) 2214 goto out; 2215 2216 *offset = DRM_GEM_MAPPING_OFF(obj->base.map_list.key) | 2217 DRM_GEM_MAPPING_KEY; 2218 2219 out: 2220 drm_gem_object_unreference(&obj->base); 2221 unlock: 2222 mutex_unlock(&dev->struct_mutex); 2223 return ret; 2224 } 2225 2226 /** 2227 * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing 2228 * @dev: DRM device 2229 * @data: GTT mapping ioctl data 2230 * @file: GEM object info 2231 * 2232 * Simply returns the fake offset to userspace so it can mmap it. 2233 * The mmap call will end up in drm_gem_mmap(), which will set things 2234 * up so we can get faults in the handler above. 2235 * 2236 * The fault handler will take care of binding the object into the GTT 2237 * (since it may have been evicted to make room for something), allocating 2238 * a fence register, and mapping the appropriate aperture address into 2239 * userspace. 2240 */ 2241 int 2242 i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data, 2243 struct drm_file *file) 2244 { 2245 struct drm_i915_gem_mmap_gtt *args = data; 2246 2247 return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset); 2248 } 2249 2250 /* Immediately discard the backing storage */ 2251 static void 2252 i915_gem_object_truncate(struct drm_i915_gem_object *obj) 2253 { 2254 vm_object_t vm_obj; 2255 2256 vm_obj = obj->base.vm_obj; 2257 VM_OBJECT_LOCK(vm_obj); 2258 vm_object_page_remove(vm_obj, 0, 0, false); 2259 VM_OBJECT_UNLOCK(vm_obj); 2260 2261 obj->madv = __I915_MADV_PURGED; 2262 } 2263 2264 /* Try to discard unwanted pages */ 2265 static void 2266 i915_gem_object_invalidate(struct drm_i915_gem_object *obj) 2267 { 2268 #if 0 2269 struct address_space *mapping; 2270 #endif 2271 2272 switch (obj->madv) { 2273 case I915_MADV_DONTNEED: 2274 i915_gem_object_truncate(obj); 2275 case __I915_MADV_PURGED: 2276 return; 2277 } 2278 2279 #if 0 2280 if (obj->base.filp == NULL) 2281 return; 2282 2283 mapping = file_inode(obj->base.filp)->i_mapping, 2284 invalidate_mapping_pages(mapping, 0, (loff_t)-1); 2285 #endif 2286 } 2287 2288 static void 2289 i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj) 2290 { 2291 struct sg_page_iter sg_iter; 2292 int ret; 2293 2294 BUG_ON(obj->madv == __I915_MADV_PURGED); 2295 2296 ret = i915_gem_object_set_to_cpu_domain(obj, true); 2297 if (ret) { 2298 /* In the event of a disaster, abandon all caches and 2299 * hope for the best. 2300 */ 2301 WARN_ON(ret != -EIO); 2302 i915_gem_clflush_object(obj, true); 2303 obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU; 2304 } 2305 2306 i915_gem_gtt_finish_object(obj); 2307 2308 if (i915_gem_object_needs_bit17_swizzle(obj)) 2309 i915_gem_object_save_bit_17_swizzle(obj); 2310 2311 if (obj->madv == I915_MADV_DONTNEED) 2312 obj->dirty = 0; 2313 2314 for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 0) { 2315 struct vm_page *page = sg_page_iter_page(&sg_iter); 2316 2317 if (obj->dirty) 2318 set_page_dirty(page); 2319 2320 if (obj->madv == I915_MADV_WILLNEED) 2321 mark_page_accessed(page); 2322 2323 vm_page_busy_wait(page, FALSE, "i915gem"); 2324 vm_page_unwire(page, 1); 2325 vm_page_wakeup(page); 2326 } 2327 obj->dirty = 0; 2328 2329 sg_free_table(obj->pages); 2330 kfree(obj->pages); 2331 } 2332 2333 int 2334 i915_gem_object_put_pages(struct drm_i915_gem_object *obj) 2335 { 2336 const struct drm_i915_gem_object_ops *ops = obj->ops; 2337 2338 if (obj->pages == NULL) 2339 return 0; 2340 2341 if (obj->pages_pin_count) 2342 return -EBUSY; 2343 2344 BUG_ON(i915_gem_obj_bound_any(obj)); 2345 2346 /* ->put_pages might need to allocate memory for the bit17 swizzle 2347 * array, hence protect them from being reaped by removing them from gtt 2348 * lists early. */ 2349 list_del(&obj->global_list); 2350 2351 ops->put_pages(obj); 2352 obj->pages = NULL; 2353 2354 i915_gem_object_invalidate(obj); 2355 2356 return 0; 2357 } 2358 2359 static int 2360 i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) 2361 { 2362 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 2363 int page_count, i; 2364 vm_object_t vm_obj; 2365 struct sg_table *st; 2366 struct scatterlist *sg; 2367 struct sg_page_iter sg_iter; 2368 struct vm_page *page; 2369 unsigned long last_pfn = 0; /* suppress gcc warning */ 2370 int ret; 2371 2372 /* Assert that the object is not currently in any GPU domain. As it 2373 * wasn't in the GTT, there shouldn't be any way it could have been in 2374 * a GPU cache 2375 */ 2376 BUG_ON(obj->base.read_domains & I915_GEM_GPU_DOMAINS); 2377 BUG_ON(obj->base.write_domain & I915_GEM_GPU_DOMAINS); 2378 2379 st = kmalloc(sizeof(*st), M_DRM, M_WAITOK); 2380 if (st == NULL) 2381 return -ENOMEM; 2382 2383 page_count = obj->base.size / PAGE_SIZE; 2384 if (sg_alloc_table(st, page_count, GFP_KERNEL)) { 2385 kfree(st); 2386 return -ENOMEM; 2387 } 2388 2389 /* Get the list of pages out of our struct file. They'll be pinned 2390 * at this point until we release them. 2391 * 2392 * Fail silently without starting the shrinker 2393 */ 2394 vm_obj = obj->base.vm_obj; 2395 VM_OBJECT_LOCK(vm_obj); 2396 sg = st->sgl; 2397 st->nents = 0; 2398 for (i = 0; i < page_count; i++) { 2399 page = shmem_read_mapping_page(vm_obj, i); 2400 if (IS_ERR(page)) { 2401 i915_gem_shrink(dev_priv, 2402 page_count, 2403 I915_SHRINK_BOUND | 2404 I915_SHRINK_UNBOUND | 2405 I915_SHRINK_PURGEABLE); 2406 page = shmem_read_mapping_page(vm_obj, i); 2407 } 2408 if (IS_ERR(page)) { 2409 /* We've tried hard to allocate the memory by reaping 2410 * our own buffer, now let the real VM do its job and 2411 * go down in flames if truly OOM. 2412 */ 2413 i915_gem_shrink_all(dev_priv); 2414 page = shmem_read_mapping_page(vm_obj, i); 2415 if (IS_ERR(page)) { 2416 ret = PTR_ERR(page); 2417 goto err_pages; 2418 } 2419 } 2420 #ifdef CONFIG_SWIOTLB 2421 if (swiotlb_nr_tbl()) { 2422 st->nents++; 2423 sg_set_page(sg, page, PAGE_SIZE, 0); 2424 sg = sg_next(sg); 2425 continue; 2426 } 2427 #endif 2428 if (!i || page_to_pfn(page) != last_pfn + 1) { 2429 if (i) 2430 sg = sg_next(sg); 2431 st->nents++; 2432 sg_set_page(sg, page, PAGE_SIZE, 0); 2433 } else { 2434 sg->length += PAGE_SIZE; 2435 } 2436 last_pfn = page_to_pfn(page); 2437 2438 /* Check that the i965g/gm workaround works. */ 2439 } 2440 #ifdef CONFIG_SWIOTLB 2441 if (!swiotlb_nr_tbl()) 2442 #endif 2443 sg_mark_end(sg); 2444 obj->pages = st; 2445 VM_OBJECT_UNLOCK(vm_obj); 2446 2447 ret = i915_gem_gtt_prepare_object(obj); 2448 if (ret) 2449 goto err_pages; 2450 2451 if (i915_gem_object_needs_bit17_swizzle(obj)) 2452 i915_gem_object_do_bit_17_swizzle(obj); 2453 2454 if (obj->tiling_mode != I915_TILING_NONE && 2455 dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) 2456 i915_gem_object_pin_pages(obj); 2457 2458 return 0; 2459 2460 err_pages: 2461 sg_mark_end(sg); 2462 for_each_sg_page(st->sgl, &sg_iter, st->nents, 0) { 2463 page = sg_page_iter_page(&sg_iter); 2464 vm_page_busy_wait(page, FALSE, "i915gem"); 2465 vm_page_unwire(page, 0); 2466 vm_page_wakeup(page); 2467 } 2468 VM_OBJECT_UNLOCK(vm_obj); 2469 sg_free_table(st); 2470 kfree(st); 2471 2472 /* shmemfs first checks if there is enough memory to allocate the page 2473 * and reports ENOSPC should there be insufficient, along with the usual 2474 * ENOMEM for a genuine allocation failure. 2475 * 2476 * We use ENOSPC in our driver to mean that we have run out of aperture 2477 * space and so want to translate the error from shmemfs back to our 2478 * usual understanding of ENOMEM. 2479 */ 2480 if (ret == -ENOSPC) 2481 ret = -ENOMEM; 2482 2483 return ret; 2484 } 2485 2486 /* Ensure that the associated pages are gathered from the backing storage 2487 * and pinned into our object. i915_gem_object_get_pages() may be called 2488 * multiple times before they are released by a single call to 2489 * i915_gem_object_put_pages() - once the pages are no longer referenced 2490 * either as a result of memory pressure (reaping pages under the shrinker) 2491 * or as the object is itself released. 2492 */ 2493 int 2494 i915_gem_object_get_pages(struct drm_i915_gem_object *obj) 2495 { 2496 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 2497 const struct drm_i915_gem_object_ops *ops = obj->ops; 2498 int ret; 2499 2500 if (obj->pages) 2501 return 0; 2502 2503 if (obj->madv != I915_MADV_WILLNEED) { 2504 DRM_DEBUG("Attempting to obtain a purgeable object\n"); 2505 return -EFAULT; 2506 } 2507 2508 BUG_ON(obj->pages_pin_count); 2509 2510 ret = ops->get_pages(obj); 2511 if (ret) 2512 return ret; 2513 2514 list_add_tail(&obj->global_list, &dev_priv->mm.unbound_list); 2515 2516 obj->get_page.sg = obj->pages->sgl; 2517 obj->get_page.last = 0; 2518 2519 return 0; 2520 } 2521 2522 void i915_vma_move_to_active(struct i915_vma *vma, 2523 struct drm_i915_gem_request *req) 2524 { 2525 struct drm_i915_gem_object *obj = vma->obj; 2526 struct intel_engine_cs *ring; 2527 2528 ring = i915_gem_request_get_ring(req); 2529 2530 /* Add a reference if we're newly entering the active list. */ 2531 if (obj->active == 0) 2532 drm_gem_object_reference(&obj->base); 2533 obj->active |= intel_ring_flag(ring); 2534 2535 list_move_tail(&obj->ring_list[ring->id], &ring->active_list); 2536 i915_gem_request_assign(&obj->last_read_req[ring->id], req); 2537 2538 list_move_tail(&vma->vm_link, &vma->vm->active_list); 2539 } 2540 2541 static void 2542 i915_gem_object_retire__write(struct drm_i915_gem_object *obj) 2543 { 2544 RQ_BUG_ON(obj->last_write_req == NULL); 2545 RQ_BUG_ON(!(obj->active & intel_ring_flag(obj->last_write_req->ring))); 2546 2547 i915_gem_request_assign(&obj->last_write_req, NULL); 2548 intel_fb_obj_flush(obj, true, ORIGIN_CS); 2549 } 2550 2551 static void 2552 i915_gem_object_retire__read(struct drm_i915_gem_object *obj, int ring) 2553 { 2554 struct i915_vma *vma; 2555 2556 RQ_BUG_ON(obj->last_read_req[ring] == NULL); 2557 RQ_BUG_ON(!(obj->active & (1 << ring))); 2558 2559 list_del_init(&obj->ring_list[ring]); 2560 i915_gem_request_assign(&obj->last_read_req[ring], NULL); 2561 2562 if (obj->last_write_req && obj->last_write_req->ring->id == ring) 2563 i915_gem_object_retire__write(obj); 2564 2565 obj->active &= ~(1 << ring); 2566 if (obj->active) 2567 return; 2568 2569 /* Bump our place on the bound list to keep it roughly in LRU order 2570 * so that we don't steal from recently used but inactive objects 2571 * (unless we are forced to ofc!) 2572 */ 2573 list_move_tail(&obj->global_list, 2574 &to_i915(obj->base.dev)->mm.bound_list); 2575 2576 list_for_each_entry(vma, &obj->vma_list, obj_link) { 2577 if (!list_empty(&vma->vm_link)) 2578 list_move_tail(&vma->vm_link, &vma->vm->inactive_list); 2579 } 2580 2581 i915_gem_request_assign(&obj->last_fenced_req, NULL); 2582 drm_gem_object_unreference(&obj->base); 2583 } 2584 2585 static int 2586 i915_gem_init_seqno(struct drm_device *dev, u32 seqno) 2587 { 2588 struct drm_i915_private *dev_priv = dev->dev_private; 2589 struct intel_engine_cs *ring; 2590 int ret, i, j; 2591 2592 /* Carefully retire all requests without writing to the rings */ 2593 for_each_ring(ring, dev_priv, i) { 2594 ret = intel_ring_idle(ring); 2595 if (ret) 2596 return ret; 2597 } 2598 i915_gem_retire_requests(dev); 2599 2600 /* Finally reset hw state */ 2601 for_each_ring(ring, dev_priv, i) { 2602 intel_ring_init_seqno(ring, seqno); 2603 2604 for (j = 0; j < ARRAY_SIZE(ring->semaphore.sync_seqno); j++) 2605 ring->semaphore.sync_seqno[j] = 0; 2606 } 2607 2608 return 0; 2609 } 2610 2611 int i915_gem_set_seqno(struct drm_device *dev, u32 seqno) 2612 { 2613 struct drm_i915_private *dev_priv = dev->dev_private; 2614 int ret; 2615 2616 if (seqno == 0) 2617 return -EINVAL; 2618 2619 /* HWS page needs to be set less than what we 2620 * will inject to ring 2621 */ 2622 ret = i915_gem_init_seqno(dev, seqno - 1); 2623 if (ret) 2624 return ret; 2625 2626 /* Carefully set the last_seqno value so that wrap 2627 * detection still works 2628 */ 2629 dev_priv->next_seqno = seqno; 2630 dev_priv->last_seqno = seqno - 1; 2631 if (dev_priv->last_seqno == 0) 2632 dev_priv->last_seqno--; 2633 2634 return 0; 2635 } 2636 2637 int 2638 i915_gem_get_seqno(struct drm_device *dev, u32 *seqno) 2639 { 2640 struct drm_i915_private *dev_priv = dev->dev_private; 2641 2642 /* reserve 0 for non-seqno */ 2643 if (dev_priv->next_seqno == 0) { 2644 int ret = i915_gem_init_seqno(dev, 0); 2645 if (ret) 2646 return ret; 2647 2648 dev_priv->next_seqno = 1; 2649 } 2650 2651 *seqno = dev_priv->last_seqno = dev_priv->next_seqno++; 2652 return 0; 2653 } 2654 2655 /* 2656 * NB: This function is not allowed to fail. Doing so would mean the the 2657 * request is not being tracked for completion but the work itself is 2658 * going to happen on the hardware. This would be a Bad Thing(tm). 2659 */ 2660 void __i915_add_request(struct drm_i915_gem_request *request, 2661 struct drm_i915_gem_object *obj, 2662 bool flush_caches) 2663 { 2664 struct intel_engine_cs *ring; 2665 struct drm_i915_private *dev_priv; 2666 struct intel_ringbuffer *ringbuf; 2667 u32 request_start; 2668 int ret; 2669 2670 if (WARN_ON(request == NULL)) 2671 return; 2672 2673 ring = request->ring; 2674 dev_priv = ring->dev->dev_private; 2675 ringbuf = request->ringbuf; 2676 2677 /* 2678 * To ensure that this call will not fail, space for its emissions 2679 * should already have been reserved in the ring buffer. Let the ring 2680 * know that it is time to use that space up. 2681 */ 2682 intel_ring_reserved_space_use(ringbuf); 2683 2684 request_start = intel_ring_get_tail(ringbuf); 2685 /* 2686 * Emit any outstanding flushes - execbuf can fail to emit the flush 2687 * after having emitted the batchbuffer command. Hence we need to fix 2688 * things up similar to emitting the lazy request. The difference here 2689 * is that the flush _must_ happen before the next request, no matter 2690 * what. 2691 */ 2692 if (flush_caches) { 2693 if (i915.enable_execlists) 2694 ret = logical_ring_flush_all_caches(request); 2695 else 2696 ret = intel_ring_flush_all_caches(request); 2697 /* Not allowed to fail! */ 2698 WARN(ret, "*_ring_flush_all_caches failed: %d!\n", ret); 2699 } 2700 2701 /* Record the position of the start of the request so that 2702 * should we detect the updated seqno part-way through the 2703 * GPU processing the request, we never over-estimate the 2704 * position of the head. 2705 */ 2706 request->postfix = intel_ring_get_tail(ringbuf); 2707 2708 if (i915.enable_execlists) 2709 ret = ring->emit_request(request); 2710 else { 2711 ret = ring->add_request(request); 2712 2713 request->tail = intel_ring_get_tail(ringbuf); 2714 } 2715 2716 /* Not allowed to fail! */ 2717 WARN(ret, "emit|add_request failed: %d!\n", ret); 2718 2719 request->head = request_start; 2720 2721 /* Whilst this request exists, batch_obj will be on the 2722 * active_list, and so will hold the active reference. Only when this 2723 * request is retired will the the batch_obj be moved onto the 2724 * inactive_list and lose its active reference. Hence we do not need 2725 * to explicitly hold another reference here. 2726 */ 2727 request->batch_obj = obj; 2728 2729 request->emitted_jiffies = jiffies; 2730 request->previous_seqno = ring->last_submitted_seqno; 2731 ring->last_submitted_seqno = request->seqno; 2732 list_add_tail(&request->list, &ring->request_list); 2733 2734 trace_i915_gem_request_add(request); 2735 2736 i915_queue_hangcheck(ring->dev); 2737 2738 queue_delayed_work(dev_priv->wq, 2739 &dev_priv->mm.retire_work, 2740 round_jiffies_up_relative(HZ)); 2741 intel_mark_busy(dev_priv->dev); 2742 2743 /* Sanity check that the reserved size was large enough. */ 2744 intel_ring_reserved_space_end(ringbuf); 2745 } 2746 2747 static bool i915_context_is_banned(struct drm_i915_private *dev_priv, 2748 const struct intel_context *ctx) 2749 { 2750 unsigned long elapsed; 2751 2752 elapsed = get_seconds() - ctx->hang_stats.guilty_ts; 2753 2754 if (ctx->hang_stats.banned) 2755 return true; 2756 2757 if (ctx->hang_stats.ban_period_seconds && 2758 elapsed <= ctx->hang_stats.ban_period_seconds) { 2759 if (!i915_gem_context_is_default(ctx)) { 2760 DRM_DEBUG("context hanging too fast, banning!\n"); 2761 return true; 2762 } else if (i915_stop_ring_allow_ban(dev_priv)) { 2763 if (i915_stop_ring_allow_warn(dev_priv)) 2764 DRM_ERROR("gpu hanging too fast, banning!\n"); 2765 return true; 2766 } 2767 } 2768 2769 return false; 2770 } 2771 2772 static void i915_set_reset_status(struct drm_i915_private *dev_priv, 2773 struct intel_context *ctx, 2774 const bool guilty) 2775 { 2776 struct i915_ctx_hang_stats *hs; 2777 2778 if (WARN_ON(!ctx)) 2779 return; 2780 2781 hs = &ctx->hang_stats; 2782 2783 if (guilty) { 2784 hs->banned = i915_context_is_banned(dev_priv, ctx); 2785 hs->batch_active++; 2786 hs->guilty_ts = get_seconds(); 2787 } else { 2788 hs->batch_pending++; 2789 } 2790 } 2791 2792 void i915_gem_request_free(struct kref *req_ref) 2793 { 2794 struct drm_i915_gem_request *req = container_of(req_ref, 2795 typeof(*req), ref); 2796 struct intel_context *ctx = req->ctx; 2797 2798 if (req->file_priv) 2799 i915_gem_request_remove_from_client(req); 2800 2801 if (ctx) { 2802 if (i915.enable_execlists && ctx != req->i915->kernel_context) 2803 intel_lr_context_unpin(ctx, req->ring); 2804 2805 i915_gem_context_unreference(ctx); 2806 } 2807 2808 kfree(req); 2809 } 2810 2811 static inline int 2812 __i915_gem_request_alloc(struct intel_engine_cs *ring, 2813 struct intel_context *ctx, 2814 struct drm_i915_gem_request **req_out) 2815 { 2816 struct drm_i915_private *dev_priv = to_i915(ring->dev); 2817 struct drm_i915_gem_request *req; 2818 int ret; 2819 2820 if (!req_out) 2821 return -EINVAL; 2822 2823 *req_out = NULL; 2824 2825 req = kzalloc(sizeof(*req), GFP_KERNEL); 2826 if (req == NULL) 2827 return -ENOMEM; 2828 2829 ret = i915_gem_get_seqno(ring->dev, &req->seqno); 2830 if (ret) 2831 goto err; 2832 2833 kref_init(&req->ref); 2834 req->i915 = dev_priv; 2835 req->ring = ring; 2836 req->ctx = ctx; 2837 i915_gem_context_reference(req->ctx); 2838 2839 if (i915.enable_execlists) 2840 ret = intel_logical_ring_alloc_request_extras(req); 2841 else 2842 ret = intel_ring_alloc_request_extras(req); 2843 if (ret) { 2844 i915_gem_context_unreference(req->ctx); 2845 goto err; 2846 } 2847 2848 /* 2849 * Reserve space in the ring buffer for all the commands required to 2850 * eventually emit this request. This is to guarantee that the 2851 * i915_add_request() call can't fail. Note that the reserve may need 2852 * to be redone if the request is not actually submitted straight 2853 * away, e.g. because a GPU scheduler has deferred it. 2854 */ 2855 if (i915.enable_execlists) 2856 ret = intel_logical_ring_reserve_space(req); 2857 else 2858 ret = intel_ring_reserve_space(req); 2859 if (ret) { 2860 /* 2861 * At this point, the request is fully allocated even if not 2862 * fully prepared. Thus it can be cleaned up using the proper 2863 * free code. 2864 */ 2865 i915_gem_request_cancel(req); 2866 return ret; 2867 } 2868 2869 *req_out = req; 2870 return 0; 2871 2872 err: 2873 kfree(req); 2874 return ret; 2875 } 2876 2877 /** 2878 * i915_gem_request_alloc - allocate a request structure 2879 * 2880 * @engine: engine that we wish to issue the request on. 2881 * @ctx: context that the request will be associated with. 2882 * This can be NULL if the request is not directly related to 2883 * any specific user context, in which case this function will 2884 * choose an appropriate context to use. 2885 * 2886 * Returns a pointer to the allocated request if successful, 2887 * or an error code if not. 2888 */ 2889 struct drm_i915_gem_request * 2890 i915_gem_request_alloc(struct intel_engine_cs *engine, 2891 struct intel_context *ctx) 2892 { 2893 struct drm_i915_gem_request *req; 2894 int err; 2895 2896 if (ctx == NULL) 2897 ctx = to_i915(engine->dev)->kernel_context; 2898 err = __i915_gem_request_alloc(engine, ctx, &req); 2899 return err ? ERR_PTR(err) : req; 2900 } 2901 2902 void i915_gem_request_cancel(struct drm_i915_gem_request *req) 2903 { 2904 intel_ring_reserved_space_cancel(req->ringbuf); 2905 2906 i915_gem_request_unreference(req); 2907 } 2908 2909 struct drm_i915_gem_request * 2910 i915_gem_find_active_request(struct intel_engine_cs *ring) 2911 { 2912 struct drm_i915_gem_request *request; 2913 2914 list_for_each_entry(request, &ring->request_list, list) { 2915 if (i915_gem_request_completed(request, false)) 2916 continue; 2917 2918 return request; 2919 } 2920 2921 return NULL; 2922 } 2923 2924 static void i915_gem_reset_ring_status(struct drm_i915_private *dev_priv, 2925 struct intel_engine_cs *ring) 2926 { 2927 struct drm_i915_gem_request *request; 2928 bool ring_hung; 2929 2930 request = i915_gem_find_active_request(ring); 2931 2932 if (request == NULL) 2933 return; 2934 2935 ring_hung = ring->hangcheck.score >= HANGCHECK_SCORE_RING_HUNG; 2936 2937 i915_set_reset_status(dev_priv, request->ctx, ring_hung); 2938 2939 list_for_each_entry_continue(request, &ring->request_list, list) 2940 i915_set_reset_status(dev_priv, request->ctx, false); 2941 } 2942 2943 static void i915_gem_reset_ring_cleanup(struct drm_i915_private *dev_priv, 2944 struct intel_engine_cs *ring) 2945 { 2946 struct intel_ringbuffer *buffer; 2947 2948 while (!list_empty(&ring->active_list)) { 2949 struct drm_i915_gem_object *obj; 2950 2951 obj = list_first_entry(&ring->active_list, 2952 struct drm_i915_gem_object, 2953 ring_list[ring->id]); 2954 2955 i915_gem_object_retire__read(obj, ring->id); 2956 } 2957 2958 /* 2959 * Clear the execlists queue up before freeing the requests, as those 2960 * are the ones that keep the context and ringbuffer backing objects 2961 * pinned in place. 2962 */ 2963 2964 if (i915.enable_execlists) { 2965 spin_lock_irq(&ring->execlist_lock); 2966 2967 /* list_splice_tail_init checks for empty lists */ 2968 list_splice_tail_init(&ring->execlist_queue, 2969 &ring->execlist_retired_req_list); 2970 2971 spin_unlock_irq(&ring->execlist_lock); 2972 intel_execlists_retire_requests(ring); 2973 } 2974 2975 /* 2976 * We must free the requests after all the corresponding objects have 2977 * been moved off active lists. Which is the same order as the normal 2978 * retire_requests function does. This is important if object hold 2979 * implicit references on things like e.g. ppgtt address spaces through 2980 * the request. 2981 */ 2982 while (!list_empty(&ring->request_list)) { 2983 struct drm_i915_gem_request *request; 2984 2985 request = list_first_entry(&ring->request_list, 2986 struct drm_i915_gem_request, 2987 list); 2988 2989 i915_gem_request_retire(request); 2990 } 2991 2992 /* Having flushed all requests from all queues, we know that all 2993 * ringbuffers must now be empty. However, since we do not reclaim 2994 * all space when retiring the request (to prevent HEADs colliding 2995 * with rapid ringbuffer wraparound) the amount of available space 2996 * upon reset is less than when we start. Do one more pass over 2997 * all the ringbuffers to reset last_retired_head. 2998 */ 2999 list_for_each_entry(buffer, &ring->buffers, link) { 3000 buffer->last_retired_head = buffer->tail; 3001 intel_ring_update_space(buffer); 3002 } 3003 } 3004 3005 void i915_gem_reset(struct drm_device *dev) 3006 { 3007 struct drm_i915_private *dev_priv = dev->dev_private; 3008 struct intel_engine_cs *ring; 3009 int i; 3010 3011 /* 3012 * Before we free the objects from the requests, we need to inspect 3013 * them for finding the guilty party. As the requests only borrow 3014 * their reference to the objects, the inspection must be done first. 3015 */ 3016 for_each_ring(ring, dev_priv, i) 3017 i915_gem_reset_ring_status(dev_priv, ring); 3018 3019 for_each_ring(ring, dev_priv, i) 3020 i915_gem_reset_ring_cleanup(dev_priv, ring); 3021 3022 i915_gem_context_reset(dev); 3023 3024 i915_gem_restore_fences(dev); 3025 3026 WARN_ON(i915_verify_lists(dev)); 3027 } 3028 3029 /** 3030 * This function clears the request list as sequence numbers are passed. 3031 */ 3032 void 3033 i915_gem_retire_requests_ring(struct intel_engine_cs *ring) 3034 { 3035 WARN_ON(i915_verify_lists(ring->dev)); 3036 3037 /* Retire requests first as we use it above for the early return. 3038 * If we retire requests last, we may use a later seqno and so clear 3039 * the requests lists without clearing the active list, leading to 3040 * confusion. 3041 */ 3042 while (!list_empty(&ring->request_list)) { 3043 struct drm_i915_gem_request *request; 3044 3045 request = list_first_entry(&ring->request_list, 3046 struct drm_i915_gem_request, 3047 list); 3048 3049 if (!i915_gem_request_completed(request, true)) 3050 break; 3051 3052 i915_gem_request_retire(request); 3053 } 3054 3055 /* Move any buffers on the active list that are no longer referenced 3056 * by the ringbuffer to the flushing/inactive lists as appropriate, 3057 * before we free the context associated with the requests. 3058 */ 3059 while (!list_empty(&ring->active_list)) { 3060 struct drm_i915_gem_object *obj; 3061 3062 obj = list_first_entry(&ring->active_list, 3063 struct drm_i915_gem_object, 3064 ring_list[ring->id]); 3065 3066 if (!list_empty(&obj->last_read_req[ring->id]->list)) 3067 break; 3068 3069 i915_gem_object_retire__read(obj, ring->id); 3070 } 3071 3072 if (unlikely(ring->trace_irq_req && 3073 i915_gem_request_completed(ring->trace_irq_req, true))) { 3074 ring->irq_put(ring); 3075 i915_gem_request_assign(&ring->trace_irq_req, NULL); 3076 } 3077 3078 WARN_ON(i915_verify_lists(ring->dev)); 3079 } 3080 3081 bool 3082 i915_gem_retire_requests(struct drm_device *dev) 3083 { 3084 struct drm_i915_private *dev_priv = dev->dev_private; 3085 struct intel_engine_cs *ring; 3086 bool idle = true; 3087 int i; 3088 3089 for_each_ring(ring, dev_priv, i) { 3090 i915_gem_retire_requests_ring(ring); 3091 idle &= list_empty(&ring->request_list); 3092 if (i915.enable_execlists) { 3093 spin_lock_irq(&ring->execlist_lock); 3094 idle &= list_empty(&ring->execlist_queue); 3095 spin_unlock_irq(&ring->execlist_lock); 3096 3097 intel_execlists_retire_requests(ring); 3098 } 3099 } 3100 3101 if (idle) 3102 mod_delayed_work(dev_priv->wq, 3103 &dev_priv->mm.idle_work, 3104 msecs_to_jiffies(100)); 3105 3106 return idle; 3107 } 3108 3109 static void 3110 i915_gem_retire_work_handler(struct work_struct *work) 3111 { 3112 struct drm_i915_private *dev_priv = 3113 container_of(work, typeof(*dev_priv), mm.retire_work.work); 3114 struct drm_device *dev = dev_priv->dev; 3115 bool idle; 3116 3117 /* Come back later if the device is busy... */ 3118 idle = false; 3119 if (mutex_trylock(&dev->struct_mutex)) { 3120 idle = i915_gem_retire_requests(dev); 3121 mutex_unlock(&dev->struct_mutex); 3122 } 3123 if (!idle) 3124 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 3125 round_jiffies_up_relative(HZ)); 3126 } 3127 3128 static void 3129 i915_gem_idle_work_handler(struct work_struct *work) 3130 { 3131 struct drm_i915_private *dev_priv = 3132 container_of(work, typeof(*dev_priv), mm.idle_work.work); 3133 struct drm_device *dev = dev_priv->dev; 3134 struct intel_engine_cs *ring; 3135 int i; 3136 3137 for_each_ring(ring, dev_priv, i) 3138 if (!list_empty(&ring->request_list)) 3139 return; 3140 3141 /* we probably should sync with hangcheck here, using cancel_work_sync. 3142 * Also locking seems to be fubar here, ring->request_list is protected 3143 * by dev->struct_mutex. */ 3144 3145 intel_mark_idle(dev); 3146 3147 if (mutex_trylock(&dev->struct_mutex)) { 3148 struct intel_engine_cs *ring; 3149 int i; 3150 3151 for_each_ring(ring, dev_priv, i) 3152 i915_gem_batch_pool_fini(&ring->batch_pool); 3153 3154 mutex_unlock(&dev->struct_mutex); 3155 } 3156 } 3157 3158 /** 3159 * Ensures that an object will eventually get non-busy by flushing any required 3160 * write domains, emitting any outstanding lazy request and retiring and 3161 * completed requests. 3162 */ 3163 static int 3164 i915_gem_object_flush_active(struct drm_i915_gem_object *obj) 3165 { 3166 int i; 3167 3168 if (!obj->active) 3169 return 0; 3170 3171 for (i = 0; i < I915_NUM_RINGS; i++) { 3172 struct drm_i915_gem_request *req; 3173 3174 req = obj->last_read_req[i]; 3175 if (req == NULL) 3176 continue; 3177 3178 if (list_empty(&req->list)) 3179 goto retire; 3180 3181 if (i915_gem_request_completed(req, true)) { 3182 __i915_gem_request_retire__upto(req); 3183 retire: 3184 i915_gem_object_retire__read(obj, i); 3185 } 3186 } 3187 3188 return 0; 3189 } 3190 3191 /** 3192 * i915_gem_wait_ioctl - implements DRM_IOCTL_I915_GEM_WAIT 3193 * @DRM_IOCTL_ARGS: standard ioctl arguments 3194 * 3195 * Returns 0 if successful, else an error is returned with the remaining time in 3196 * the timeout parameter. 3197 * -ETIME: object is still busy after timeout 3198 * -ERESTARTSYS: signal interrupted the wait 3199 * -ENONENT: object doesn't exist 3200 * Also possible, but rare: 3201 * -EAGAIN: GPU wedged 3202 * -ENOMEM: damn 3203 * -ENODEV: Internal IRQ fail 3204 * -E?: The add request failed 3205 * 3206 * The wait ioctl with a timeout of 0 reimplements the busy ioctl. With any 3207 * non-zero timeout parameter the wait ioctl will wait for the given number of 3208 * nanoseconds on an object becoming unbusy. Since the wait itself does so 3209 * without holding struct_mutex the object may become re-busied before this 3210 * function completes. A similar but shorter * race condition exists in the busy 3211 * ioctl 3212 */ 3213 int 3214 i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) 3215 { 3216 struct drm_i915_private *dev_priv = dev->dev_private; 3217 struct drm_i915_gem_wait *args = data; 3218 struct drm_i915_gem_object *obj; 3219 struct drm_i915_gem_request *req[I915_NUM_RINGS]; 3220 unsigned reset_counter; 3221 int i, n = 0; 3222 int ret; 3223 3224 if (args->flags != 0) 3225 return -EINVAL; 3226 3227 ret = i915_mutex_lock_interruptible(dev); 3228 if (ret) 3229 return ret; 3230 3231 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->bo_handle)); 3232 if (&obj->base == NULL) { 3233 mutex_unlock(&dev->struct_mutex); 3234 return -ENOENT; 3235 } 3236 3237 /* Need to make sure the object gets inactive eventually. */ 3238 ret = i915_gem_object_flush_active(obj); 3239 if (ret) 3240 goto out; 3241 3242 if (!obj->active) 3243 goto out; 3244 3245 /* Do this after OLR check to make sure we make forward progress polling 3246 * on this IOCTL with a timeout == 0 (like busy ioctl) 3247 */ 3248 if (args->timeout_ns == 0) { 3249 ret = -ETIME; 3250 goto out; 3251 } 3252 3253 drm_gem_object_unreference(&obj->base); 3254 reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter); 3255 3256 for (i = 0; i < I915_NUM_RINGS; i++) { 3257 if (obj->last_read_req[i] == NULL) 3258 continue; 3259 3260 req[n++] = i915_gem_request_reference(obj->last_read_req[i]); 3261 } 3262 3263 mutex_unlock(&dev->struct_mutex); 3264 3265 for (i = 0; i < n; i++) { 3266 if (ret == 0) 3267 ret = __i915_wait_request(req[i], reset_counter, true, 3268 args->timeout_ns > 0 ? &args->timeout_ns : NULL, 3269 to_rps_client(file)); 3270 i915_gem_request_unreference__unlocked(req[i]); 3271 } 3272 return ret; 3273 3274 out: 3275 drm_gem_object_unreference(&obj->base); 3276 mutex_unlock(&dev->struct_mutex); 3277 return ret; 3278 } 3279 3280 static int 3281 __i915_gem_object_sync(struct drm_i915_gem_object *obj, 3282 struct intel_engine_cs *to, 3283 struct drm_i915_gem_request *from_req, 3284 struct drm_i915_gem_request **to_req) 3285 { 3286 struct intel_engine_cs *from; 3287 int ret; 3288 3289 from = i915_gem_request_get_ring(from_req); 3290 if (to == from) 3291 return 0; 3292 3293 if (i915_gem_request_completed(from_req, true)) 3294 return 0; 3295 3296 if (!i915_semaphore_is_enabled(obj->base.dev)) { 3297 struct drm_i915_private *i915 = to_i915(obj->base.dev); 3298 ret = __i915_wait_request(from_req, 3299 atomic_read(&i915->gpu_error.reset_counter), 3300 i915->mm.interruptible, 3301 NULL, 3302 &i915->rps.semaphores); 3303 if (ret) 3304 return ret; 3305 3306 i915_gem_object_retire_request(obj, from_req); 3307 } else { 3308 int idx = intel_ring_sync_index(from, to); 3309 u32 seqno = i915_gem_request_get_seqno(from_req); 3310 3311 WARN_ON(!to_req); 3312 3313 if (seqno <= from->semaphore.sync_seqno[idx]) 3314 return 0; 3315 3316 if (*to_req == NULL) { 3317 struct drm_i915_gem_request *req; 3318 3319 req = i915_gem_request_alloc(to, NULL); 3320 if (IS_ERR(req)) 3321 return PTR_ERR(req); 3322 3323 *to_req = req; 3324 } 3325 3326 trace_i915_gem_ring_sync_to(*to_req, from, from_req); 3327 ret = to->semaphore.sync_to(*to_req, from, seqno); 3328 if (ret) 3329 return ret; 3330 3331 /* We use last_read_req because sync_to() 3332 * might have just caused seqno wrap under 3333 * the radar. 3334 */ 3335 from->semaphore.sync_seqno[idx] = 3336 i915_gem_request_get_seqno(obj->last_read_req[from->id]); 3337 } 3338 3339 return 0; 3340 } 3341 3342 /** 3343 * i915_gem_object_sync - sync an object to a ring. 3344 * 3345 * @obj: object which may be in use on another ring. 3346 * @to: ring we wish to use the object on. May be NULL. 3347 * @to_req: request we wish to use the object for. See below. 3348 * This will be allocated and returned if a request is 3349 * required but not passed in. 3350 * 3351 * This code is meant to abstract object synchronization with the GPU. 3352 * Calling with NULL implies synchronizing the object with the CPU 3353 * rather than a particular GPU ring. Conceptually we serialise writes 3354 * between engines inside the GPU. We only allow one engine to write 3355 * into a buffer at any time, but multiple readers. To ensure each has 3356 * a coherent view of memory, we must: 3357 * 3358 * - If there is an outstanding write request to the object, the new 3359 * request must wait for it to complete (either CPU or in hw, requests 3360 * on the same ring will be naturally ordered). 3361 * 3362 * - If we are a write request (pending_write_domain is set), the new 3363 * request must wait for outstanding read requests to complete. 3364 * 3365 * For CPU synchronisation (NULL to) no request is required. For syncing with 3366 * rings to_req must be non-NULL. However, a request does not have to be 3367 * pre-allocated. If *to_req is NULL and sync commands will be emitted then a 3368 * request will be allocated automatically and returned through *to_req. Note 3369 * that it is not guaranteed that commands will be emitted (because the system 3370 * might already be idle). Hence there is no need to create a request that 3371 * might never have any work submitted. Note further that if a request is 3372 * returned in *to_req, it is the responsibility of the caller to submit 3373 * that request (after potentially adding more work to it). 3374 * 3375 * Returns 0 if successful, else propagates up the lower layer error. 3376 */ 3377 int 3378 i915_gem_object_sync(struct drm_i915_gem_object *obj, 3379 struct intel_engine_cs *to, 3380 struct drm_i915_gem_request **to_req) 3381 { 3382 const bool readonly = obj->base.pending_write_domain == 0; 3383 struct drm_i915_gem_request *req[I915_NUM_RINGS]; 3384 int ret, i, n; 3385 3386 if (!obj->active) 3387 return 0; 3388 3389 if (to == NULL) 3390 return i915_gem_object_wait_rendering(obj, readonly); 3391 3392 n = 0; 3393 if (readonly) { 3394 if (obj->last_write_req) 3395 req[n++] = obj->last_write_req; 3396 } else { 3397 for (i = 0; i < I915_NUM_RINGS; i++) 3398 if (obj->last_read_req[i]) 3399 req[n++] = obj->last_read_req[i]; 3400 } 3401 for (i = 0; i < n; i++) { 3402 ret = __i915_gem_object_sync(obj, to, req[i], to_req); 3403 if (ret) 3404 return ret; 3405 } 3406 3407 return 0; 3408 } 3409 3410 static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj) 3411 { 3412 u32 old_write_domain, old_read_domains; 3413 3414 /* Force a pagefault for domain tracking on next user access */ 3415 i915_gem_release_mmap(obj); 3416 3417 if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0) 3418 return; 3419 3420 /* Wait for any direct GTT access to complete */ 3421 mb(); 3422 3423 old_read_domains = obj->base.read_domains; 3424 old_write_domain = obj->base.write_domain; 3425 3426 obj->base.read_domains &= ~I915_GEM_DOMAIN_GTT; 3427 obj->base.write_domain &= ~I915_GEM_DOMAIN_GTT; 3428 3429 trace_i915_gem_object_change_domain(obj, 3430 old_read_domains, 3431 old_write_domain); 3432 } 3433 3434 static int __i915_vma_unbind(struct i915_vma *vma, bool wait) 3435 { 3436 struct drm_i915_gem_object *obj = vma->obj; 3437 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 3438 int ret; 3439 3440 if (list_empty(&vma->obj_link)) 3441 return 0; 3442 3443 if (!drm_mm_node_allocated(&vma->node)) { 3444 i915_gem_vma_destroy(vma); 3445 return 0; 3446 } 3447 3448 if (vma->pin_count) 3449 return -EBUSY; 3450 3451 BUG_ON(obj->pages == NULL); 3452 3453 if (wait) { 3454 ret = i915_gem_object_wait_rendering(obj, false); 3455 if (ret) 3456 return ret; 3457 } 3458 3459 if (vma->is_ggtt && vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) { 3460 i915_gem_object_finish_gtt(obj); 3461 3462 /* release the fence reg _after_ flushing */ 3463 ret = i915_gem_object_put_fence(obj); 3464 if (ret) 3465 return ret; 3466 } 3467 3468 trace_i915_vma_unbind(vma); 3469 3470 vma->vm->unbind_vma(vma); 3471 vma->bound = 0; 3472 3473 list_del_init(&vma->vm_link); 3474 if (vma->is_ggtt) { 3475 if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) { 3476 obj->map_and_fenceable = false; 3477 } else if (vma->ggtt_view.pages) { 3478 sg_free_table(vma->ggtt_view.pages); 3479 kfree(vma->ggtt_view.pages); 3480 } 3481 vma->ggtt_view.pages = NULL; 3482 } 3483 3484 drm_mm_remove_node(&vma->node); 3485 i915_gem_vma_destroy(vma); 3486 3487 /* Since the unbound list is global, only move to that list if 3488 * no more VMAs exist. */ 3489 if (list_empty(&obj->vma_list)) 3490 list_move_tail(&obj->global_list, &dev_priv->mm.unbound_list); 3491 3492 /* And finally now the object is completely decoupled from this vma, 3493 * we can drop its hold on the backing storage and allow it to be 3494 * reaped by the shrinker. 3495 */ 3496 i915_gem_object_unpin_pages(obj); 3497 3498 return 0; 3499 } 3500 3501 int i915_vma_unbind(struct i915_vma *vma) 3502 { 3503 return __i915_vma_unbind(vma, true); 3504 } 3505 3506 int __i915_vma_unbind_no_wait(struct i915_vma *vma) 3507 { 3508 return __i915_vma_unbind(vma, false); 3509 } 3510 3511 int i915_gpu_idle(struct drm_device *dev) 3512 { 3513 struct drm_i915_private *dev_priv = dev->dev_private; 3514 struct intel_engine_cs *ring; 3515 int ret, i; 3516 3517 /* Flush everything onto the inactive list. */ 3518 for_each_ring(ring, dev_priv, i) { 3519 if (!i915.enable_execlists) { 3520 struct drm_i915_gem_request *req; 3521 3522 req = i915_gem_request_alloc(ring, NULL); 3523 if (IS_ERR(req)) 3524 return PTR_ERR(req); 3525 3526 ret = i915_switch_context(req); 3527 if (ret) { 3528 i915_gem_request_cancel(req); 3529 return ret; 3530 } 3531 3532 i915_add_request_no_flush(req); 3533 } 3534 3535 ret = intel_ring_idle(ring); 3536 if (ret) 3537 return ret; 3538 } 3539 3540 WARN_ON(i915_verify_lists(dev)); 3541 return 0; 3542 } 3543 3544 static bool i915_gem_valid_gtt_space(struct i915_vma *vma, 3545 unsigned long cache_level) 3546 { 3547 struct drm_mm_node *gtt_space = &vma->node; 3548 struct drm_mm_node *other; 3549 3550 /* 3551 * On some machines we have to be careful when putting differing types 3552 * of snoopable memory together to avoid the prefetcher crossing memory 3553 * domains and dying. During vm initialisation, we decide whether or not 3554 * these constraints apply and set the drm_mm.color_adjust 3555 * appropriately. 3556 */ 3557 if (vma->vm->mm.color_adjust == NULL) 3558 return true; 3559 3560 if (!drm_mm_node_allocated(gtt_space)) 3561 return true; 3562 3563 if (list_empty(>t_space->node_list)) 3564 return true; 3565 3566 other = list_entry(gtt_space->node_list.prev, struct drm_mm_node, node_list); 3567 if (other->allocated && !other->hole_follows && other->color != cache_level) 3568 return false; 3569 3570 other = list_entry(gtt_space->node_list.next, struct drm_mm_node, node_list); 3571 if (other->allocated && !gtt_space->hole_follows && other->color != cache_level) 3572 return false; 3573 3574 return true; 3575 } 3576 3577 /** 3578 * Finds free space in the GTT aperture and binds the object or a view of it 3579 * there. 3580 */ 3581 static struct i915_vma * 3582 i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj, 3583 struct i915_address_space *vm, 3584 const struct i915_ggtt_view *ggtt_view, 3585 unsigned alignment, 3586 uint64_t flags) 3587 { 3588 struct drm_device *dev = obj->base.dev; 3589 struct drm_i915_private *dev_priv = dev->dev_private; 3590 u32 fence_alignment, unfenced_alignment; 3591 u32 search_flag, alloc_flag; 3592 u64 start, end; 3593 u64 size, fence_size; 3594 struct i915_vma *vma; 3595 int ret; 3596 3597 if (i915_is_ggtt(vm)) { 3598 u32 view_size; 3599 3600 if (WARN_ON(!ggtt_view)) 3601 return ERR_PTR(-EINVAL); 3602 3603 view_size = i915_ggtt_view_size(obj, ggtt_view); 3604 3605 fence_size = i915_gem_get_gtt_size(dev, 3606 view_size, 3607 obj->tiling_mode); 3608 fence_alignment = i915_gem_get_gtt_alignment(dev, 3609 view_size, 3610 obj->tiling_mode, 3611 true); 3612 unfenced_alignment = i915_gem_get_gtt_alignment(dev, 3613 view_size, 3614 obj->tiling_mode, 3615 false); 3616 size = flags & PIN_MAPPABLE ? fence_size : view_size; 3617 } else { 3618 fence_size = i915_gem_get_gtt_size(dev, 3619 obj->base.size, 3620 obj->tiling_mode); 3621 fence_alignment = i915_gem_get_gtt_alignment(dev, 3622 obj->base.size, 3623 obj->tiling_mode, 3624 true); 3625 unfenced_alignment = 3626 i915_gem_get_gtt_alignment(dev, 3627 obj->base.size, 3628 obj->tiling_mode, 3629 false); 3630 size = flags & PIN_MAPPABLE ? fence_size : obj->base.size; 3631 } 3632 3633 start = flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0; 3634 end = vm->total; 3635 if (flags & PIN_MAPPABLE) 3636 end = min_t(u64, end, dev_priv->gtt.mappable_end); 3637 if (flags & PIN_ZONE_4G) 3638 end = min_t(u64, end, (1ULL << 32) - PAGE_SIZE); 3639 3640 if (alignment == 0) 3641 alignment = flags & PIN_MAPPABLE ? fence_alignment : 3642 unfenced_alignment; 3643 if (flags & PIN_MAPPABLE && alignment & (fence_alignment - 1)) { 3644 DRM_DEBUG("Invalid object (view type=%u) alignment requested %u\n", 3645 ggtt_view ? ggtt_view->type : 0, 3646 alignment); 3647 return ERR_PTR(-EINVAL); 3648 } 3649 3650 /* If binding the object/GGTT view requires more space than the entire 3651 * aperture has, reject it early before evicting everything in a vain 3652 * attempt to find space. 3653 */ 3654 if (size > end) { 3655 DRM_DEBUG("Attempting to bind an object (view type=%u) larger than the aperture: size=%lu > %s aperture=%lu\n", 3656 ggtt_view ? ggtt_view->type : 0, 3657 size, 3658 flags & PIN_MAPPABLE ? "mappable" : "total", 3659 end); 3660 return ERR_PTR(-E2BIG); 3661 } 3662 3663 ret = i915_gem_object_get_pages(obj); 3664 if (ret) 3665 return ERR_PTR(ret); 3666 3667 i915_gem_object_pin_pages(obj); 3668 3669 vma = ggtt_view ? i915_gem_obj_lookup_or_create_ggtt_vma(obj, ggtt_view) : 3670 i915_gem_obj_lookup_or_create_vma(obj, vm); 3671 3672 if (IS_ERR(vma)) 3673 goto err_unpin; 3674 3675 if (flags & PIN_OFFSET_FIXED) { 3676 uint64_t offset = flags & PIN_OFFSET_MASK; 3677 3678 if (offset & (alignment - 1) || offset + size > end) { 3679 ret = -EINVAL; 3680 goto err_free_vma; 3681 } 3682 vma->node.start = offset; 3683 vma->node.size = size; 3684 vma->node.color = obj->cache_level; 3685 ret = drm_mm_reserve_node(&vm->mm, &vma->node); 3686 if (ret) { 3687 ret = i915_gem_evict_for_vma(vma); 3688 if (ret == 0) 3689 ret = drm_mm_reserve_node(&vm->mm, &vma->node); 3690 } 3691 if (ret) 3692 goto err_free_vma; 3693 } else { 3694 if (flags & PIN_HIGH) { 3695 search_flag = DRM_MM_SEARCH_BELOW; 3696 alloc_flag = DRM_MM_CREATE_TOP; 3697 } else { 3698 search_flag = DRM_MM_SEARCH_DEFAULT; 3699 alloc_flag = DRM_MM_CREATE_DEFAULT; 3700 } 3701 3702 search_free: 3703 ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node, 3704 size, alignment, 3705 obj->cache_level, 3706 start, end, 3707 search_flag, 3708 alloc_flag); 3709 if (ret) { 3710 ret = i915_gem_evict_something(dev, vm, size, alignment, 3711 obj->cache_level, 3712 start, end, 3713 flags); 3714 if (ret == 0) 3715 goto search_free; 3716 3717 goto err_free_vma; 3718 } 3719 } 3720 if (WARN_ON(!i915_gem_valid_gtt_space(vma, obj->cache_level))) { 3721 ret = -EINVAL; 3722 goto err_remove_node; 3723 } 3724 3725 trace_i915_vma_bind(vma, flags); 3726 ret = i915_vma_bind(vma, obj->cache_level, flags); 3727 if (ret) 3728 goto err_remove_node; 3729 3730 list_move_tail(&obj->global_list, &dev_priv->mm.bound_list); 3731 list_add_tail(&vma->vm_link, &vm->inactive_list); 3732 3733 return vma; 3734 3735 err_remove_node: 3736 drm_mm_remove_node(&vma->node); 3737 err_free_vma: 3738 i915_gem_vma_destroy(vma); 3739 vma = ERR_PTR(ret); 3740 err_unpin: 3741 i915_gem_object_unpin_pages(obj); 3742 return vma; 3743 } 3744 3745 bool 3746 i915_gem_clflush_object(struct drm_i915_gem_object *obj, 3747 bool force) 3748 { 3749 /* If we don't have a page list set up, then we're not pinned 3750 * to GPU, and we can ignore the cache flush because it'll happen 3751 * again at bind time. 3752 */ 3753 if (obj->pages == NULL) 3754 return false; 3755 3756 /* 3757 * Stolen memory is always coherent with the GPU as it is explicitly 3758 * marked as wc by the system, or the system is cache-coherent. 3759 */ 3760 if (obj->stolen || obj->phys_handle) 3761 return false; 3762 3763 /* If the GPU is snooping the contents of the CPU cache, 3764 * we do not need to manually clear the CPU cache lines. However, 3765 * the caches are only snooped when the render cache is 3766 * flushed/invalidated. As we always have to emit invalidations 3767 * and flushes when moving into and out of the RENDER domain, correct 3768 * snooping behaviour occurs naturally as the result of our domain 3769 * tracking. 3770 */ 3771 if (!force && cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) { 3772 obj->cache_dirty = true; 3773 return false; 3774 } 3775 3776 trace_i915_gem_object_clflush(obj); 3777 drm_clflush_sg(obj->pages); 3778 obj->cache_dirty = false; 3779 3780 return true; 3781 } 3782 3783 /** Flushes the GTT write domain for the object if it's dirty. */ 3784 static void 3785 i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj) 3786 { 3787 uint32_t old_write_domain; 3788 3789 if (obj->base.write_domain != I915_GEM_DOMAIN_GTT) 3790 return; 3791 3792 /* No actual flushing is required for the GTT write domain. Writes 3793 * to it immediately go to main memory as far as we know, so there's 3794 * no chipset flush. It also doesn't land in render cache. 3795 * 3796 * However, we do have to enforce the order so that all writes through 3797 * the GTT land before any writes to the device, such as updates to 3798 * the GATT itself. 3799 */ 3800 wmb(); 3801 3802 old_write_domain = obj->base.write_domain; 3803 obj->base.write_domain = 0; 3804 3805 intel_fb_obj_flush(obj, false, ORIGIN_GTT); 3806 3807 trace_i915_gem_object_change_domain(obj, 3808 obj->base.read_domains, 3809 old_write_domain); 3810 } 3811 3812 /** Flushes the CPU write domain for the object if it's dirty. */ 3813 static void 3814 i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj) 3815 { 3816 uint32_t old_write_domain; 3817 3818 if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) 3819 return; 3820 3821 if (i915_gem_clflush_object(obj, obj->pin_display)) 3822 i915_gem_chipset_flush(obj->base.dev); 3823 3824 old_write_domain = obj->base.write_domain; 3825 obj->base.write_domain = 0; 3826 3827 intel_fb_obj_flush(obj, false, ORIGIN_CPU); 3828 3829 trace_i915_gem_object_change_domain(obj, 3830 obj->base.read_domains, 3831 old_write_domain); 3832 } 3833 3834 /** 3835 * Moves a single object to the GTT read, and possibly write domain. 3836 * 3837 * This function returns when the move is complete, including waiting on 3838 * flushes to occur. 3839 */ 3840 int 3841 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) 3842 { 3843 uint32_t old_write_domain, old_read_domains; 3844 struct i915_vma *vma; 3845 int ret; 3846 3847 if (obj->base.write_domain == I915_GEM_DOMAIN_GTT) 3848 return 0; 3849 3850 ret = i915_gem_object_wait_rendering(obj, !write); 3851 if (ret) 3852 return ret; 3853 3854 /* Flush and acquire obj->pages so that we are coherent through 3855 * direct access in memory with previous cached writes through 3856 * shmemfs and that our cache domain tracking remains valid. 3857 * For example, if the obj->filp was moved to swap without us 3858 * being notified and releasing the pages, we would mistakenly 3859 * continue to assume that the obj remained out of the CPU cached 3860 * domain. 3861 */ 3862 ret = i915_gem_object_get_pages(obj); 3863 if (ret) 3864 return ret; 3865 3866 i915_gem_object_flush_cpu_write_domain(obj); 3867 3868 /* Serialise direct access to this object with the barriers for 3869 * coherent writes from the GPU, by effectively invalidating the 3870 * GTT domain upon first access. 3871 */ 3872 if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0) 3873 mb(); 3874 3875 old_write_domain = obj->base.write_domain; 3876 old_read_domains = obj->base.read_domains; 3877 3878 /* It should now be out of any other write domains, and we can update 3879 * the domain values for our changes. 3880 */ 3881 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_GTT) != 0); 3882 obj->base.read_domains |= I915_GEM_DOMAIN_GTT; 3883 if (write) { 3884 obj->base.read_domains = I915_GEM_DOMAIN_GTT; 3885 obj->base.write_domain = I915_GEM_DOMAIN_GTT; 3886 obj->dirty = 1; 3887 } 3888 3889 trace_i915_gem_object_change_domain(obj, 3890 old_read_domains, 3891 old_write_domain); 3892 3893 /* And bump the LRU for this access */ 3894 vma = i915_gem_obj_to_ggtt(obj); 3895 if (vma && drm_mm_node_allocated(&vma->node) && !obj->active) 3896 list_move_tail(&vma->vm_link, 3897 &to_i915(obj->base.dev)->gtt.base.inactive_list); 3898 3899 return 0; 3900 } 3901 3902 /** 3903 * Changes the cache-level of an object across all VMA. 3904 * 3905 * After this function returns, the object will be in the new cache-level 3906 * across all GTT and the contents of the backing storage will be coherent, 3907 * with respect to the new cache-level. In order to keep the backing storage 3908 * coherent for all users, we only allow a single cache level to be set 3909 * globally on the object and prevent it from being changed whilst the 3910 * hardware is reading from the object. That is if the object is currently 3911 * on the scanout it will be set to uncached (or equivalent display 3912 * cache coherency) and all non-MOCS GPU access will also be uncached so 3913 * that all direct access to the scanout remains coherent. 3914 */ 3915 int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, 3916 enum i915_cache_level cache_level) 3917 { 3918 struct drm_device *dev = obj->base.dev; 3919 struct i915_vma *vma, *next; 3920 bool bound = false; 3921 int ret = 0; 3922 3923 if (obj->cache_level == cache_level) 3924 goto out; 3925 3926 /* Inspect the list of currently bound VMA and unbind any that would 3927 * be invalid given the new cache-level. This is principally to 3928 * catch the issue of the CS prefetch crossing page boundaries and 3929 * reading an invalid PTE on older architectures. 3930 */ 3931 list_for_each_entry_safe(vma, next, &obj->vma_list, obj_link) { 3932 if (!drm_mm_node_allocated(&vma->node)) 3933 continue; 3934 3935 if (vma->pin_count) { 3936 DRM_DEBUG("can not change the cache level of pinned objects\n"); 3937 return -EBUSY; 3938 } 3939 3940 if (!i915_gem_valid_gtt_space(vma, cache_level)) { 3941 ret = i915_vma_unbind(vma); 3942 if (ret) 3943 return ret; 3944 } else 3945 bound = true; 3946 } 3947 3948 /* We can reuse the existing drm_mm nodes but need to change the 3949 * cache-level on the PTE. We could simply unbind them all and 3950 * rebind with the correct cache-level on next use. However since 3951 * we already have a valid slot, dma mapping, pages etc, we may as 3952 * rewrite the PTE in the belief that doing so tramples upon less 3953 * state and so involves less work. 3954 */ 3955 if (bound) { 3956 /* Before we change the PTE, the GPU must not be accessing it. 3957 * If we wait upon the object, we know that all the bound 3958 * VMA are no longer active. 3959 */ 3960 ret = i915_gem_object_wait_rendering(obj, false); 3961 if (ret) 3962 return ret; 3963 3964 if (!HAS_LLC(dev) && cache_level != I915_CACHE_NONE) { 3965 /* Access to snoopable pages through the GTT is 3966 * incoherent and on some machines causes a hard 3967 * lockup. Relinquish the CPU mmaping to force 3968 * userspace to refault in the pages and we can 3969 * then double check if the GTT mapping is still 3970 * valid for that pointer access. 3971 */ 3972 i915_gem_release_mmap(obj); 3973 3974 /* As we no longer need a fence for GTT access, 3975 * we can relinquish it now (and so prevent having 3976 * to steal a fence from someone else on the next 3977 * fence request). Note GPU activity would have 3978 * dropped the fence as all snoopable access is 3979 * supposed to be linear. 3980 */ 3981 ret = i915_gem_object_put_fence(obj); 3982 if (ret) 3983 return ret; 3984 } else { 3985 /* We either have incoherent backing store and 3986 * so no GTT access or the architecture is fully 3987 * coherent. In such cases, existing GTT mmaps 3988 * ignore the cache bit in the PTE and we can 3989 * rewrite it without confusing the GPU or having 3990 * to force userspace to fault back in its mmaps. 3991 */ 3992 } 3993 3994 list_for_each_entry(vma, &obj->vma_list, obj_link) { 3995 if (!drm_mm_node_allocated(&vma->node)) 3996 continue; 3997 3998 ret = i915_vma_bind(vma, cache_level, PIN_UPDATE); 3999 if (ret) 4000 return ret; 4001 } 4002 } 4003 4004 list_for_each_entry(vma, &obj->vma_list, obj_link) 4005 vma->node.color = cache_level; 4006 obj->cache_level = cache_level; 4007 4008 out: 4009 /* Flush the dirty CPU caches to the backing storage so that the 4010 * object is now coherent at its new cache level (with respect 4011 * to the access domain). 4012 */ 4013 if (obj->cache_dirty && 4014 obj->base.write_domain != I915_GEM_DOMAIN_CPU && 4015 cpu_write_needs_clflush(obj)) { 4016 if (i915_gem_clflush_object(obj, true)) 4017 i915_gem_chipset_flush(obj->base.dev); 4018 } 4019 4020 return 0; 4021 } 4022 4023 int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data, 4024 struct drm_file *file) 4025 { 4026 struct drm_i915_gem_caching *args = data; 4027 struct drm_i915_gem_object *obj; 4028 4029 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 4030 if (&obj->base == NULL) 4031 return -ENOENT; 4032 4033 switch (obj->cache_level) { 4034 case I915_CACHE_LLC: 4035 case I915_CACHE_L3_LLC: 4036 args->caching = I915_CACHING_CACHED; 4037 break; 4038 4039 case I915_CACHE_WT: 4040 args->caching = I915_CACHING_DISPLAY; 4041 break; 4042 4043 default: 4044 args->caching = I915_CACHING_NONE; 4045 break; 4046 } 4047 4048 drm_gem_object_unreference_unlocked(&obj->base); 4049 return 0; 4050 } 4051 4052 int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data, 4053 struct drm_file *file) 4054 { 4055 struct drm_i915_private *dev_priv = dev->dev_private; 4056 struct drm_i915_gem_caching *args = data; 4057 struct drm_i915_gem_object *obj; 4058 enum i915_cache_level level; 4059 int ret; 4060 4061 switch (args->caching) { 4062 case I915_CACHING_NONE: 4063 level = I915_CACHE_NONE; 4064 break; 4065 case I915_CACHING_CACHED: 4066 /* 4067 * Due to a HW issue on BXT A stepping, GPU stores via a 4068 * snooped mapping may leave stale data in a corresponding CPU 4069 * cacheline, whereas normally such cachelines would get 4070 * invalidated. 4071 */ 4072 if (IS_BXT_REVID(dev, 0, BXT_REVID_A1)) 4073 return -ENODEV; 4074 4075 level = I915_CACHE_LLC; 4076 break; 4077 case I915_CACHING_DISPLAY: 4078 level = HAS_WT(dev) ? I915_CACHE_WT : I915_CACHE_NONE; 4079 break; 4080 default: 4081 return -EINVAL; 4082 } 4083 4084 intel_runtime_pm_get(dev_priv); 4085 4086 ret = i915_mutex_lock_interruptible(dev); 4087 if (ret) 4088 goto rpm_put; 4089 4090 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 4091 if (&obj->base == NULL) { 4092 ret = -ENOENT; 4093 goto unlock; 4094 } 4095 4096 ret = i915_gem_object_set_cache_level(obj, level); 4097 4098 drm_gem_object_unreference(&obj->base); 4099 unlock: 4100 mutex_unlock(&dev->struct_mutex); 4101 rpm_put: 4102 intel_runtime_pm_put(dev_priv); 4103 4104 return ret; 4105 } 4106 4107 /* 4108 * Prepare buffer for display plane (scanout, cursors, etc). 4109 * Can be called from an uninterruptible phase (modesetting) and allows 4110 * any flushes to be pipelined (for pageflips). 4111 */ 4112 int 4113 i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, 4114 u32 alignment, 4115 const struct i915_ggtt_view *view) 4116 { 4117 u32 old_read_domains, old_write_domain; 4118 int ret; 4119 4120 /* Mark the pin_display early so that we account for the 4121 * display coherency whilst setting up the cache domains. 4122 */ 4123 obj->pin_display++; 4124 4125 /* The display engine is not coherent with the LLC cache on gen6. As 4126 * a result, we make sure that the pinning that is about to occur is 4127 * done with uncached PTEs. This is lowest common denominator for all 4128 * chipsets. 4129 * 4130 * However for gen6+, we could do better by using the GFDT bit instead 4131 * of uncaching, which would allow us to flush all the LLC-cached data 4132 * with that bit in the PTE to main memory with just one PIPE_CONTROL. 4133 */ 4134 ret = i915_gem_object_set_cache_level(obj, 4135 HAS_WT(obj->base.dev) ? I915_CACHE_WT : I915_CACHE_NONE); 4136 if (ret) 4137 goto err_unpin_display; 4138 4139 /* As the user may map the buffer once pinned in the display plane 4140 * (e.g. libkms for the bootup splash), we have to ensure that we 4141 * always use map_and_fenceable for all scanout buffers. 4142 */ 4143 ret = i915_gem_object_ggtt_pin(obj, view, alignment, 4144 view->type == I915_GGTT_VIEW_NORMAL ? 4145 PIN_MAPPABLE : 0); 4146 if (ret) 4147 goto err_unpin_display; 4148 4149 i915_gem_object_flush_cpu_write_domain(obj); 4150 4151 old_write_domain = obj->base.write_domain; 4152 old_read_domains = obj->base.read_domains; 4153 4154 /* It should now be out of any other write domains, and we can update 4155 * the domain values for our changes. 4156 */ 4157 obj->base.write_domain = 0; 4158 obj->base.read_domains |= I915_GEM_DOMAIN_GTT; 4159 4160 trace_i915_gem_object_change_domain(obj, 4161 old_read_domains, 4162 old_write_domain); 4163 4164 return 0; 4165 4166 err_unpin_display: 4167 obj->pin_display--; 4168 return ret; 4169 } 4170 4171 void 4172 i915_gem_object_unpin_from_display_plane(struct drm_i915_gem_object *obj, 4173 const struct i915_ggtt_view *view) 4174 { 4175 if (WARN_ON(obj->pin_display == 0)) 4176 return; 4177 4178 i915_gem_object_ggtt_unpin_view(obj, view); 4179 4180 obj->pin_display--; 4181 } 4182 4183 /** 4184 * Moves a single object to the CPU read, and possibly write domain. 4185 * 4186 * This function returns when the move is complete, including waiting on 4187 * flushes to occur. 4188 */ 4189 int 4190 i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) 4191 { 4192 uint32_t old_write_domain, old_read_domains; 4193 int ret; 4194 4195 if (obj->base.write_domain == I915_GEM_DOMAIN_CPU) 4196 return 0; 4197 4198 ret = i915_gem_object_wait_rendering(obj, !write); 4199 if (ret) 4200 return ret; 4201 4202 i915_gem_object_flush_gtt_write_domain(obj); 4203 4204 old_write_domain = obj->base.write_domain; 4205 old_read_domains = obj->base.read_domains; 4206 4207 /* Flush the CPU cache if it's still invalid. */ 4208 if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) { 4209 i915_gem_clflush_object(obj, false); 4210 4211 obj->base.read_domains |= I915_GEM_DOMAIN_CPU; 4212 } 4213 4214 /* It should now be out of any other write domains, and we can update 4215 * the domain values for our changes. 4216 */ 4217 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_CPU) != 0); 4218 4219 /* If we're writing through the CPU, then the GPU read domains will 4220 * need to be invalidated at next use. 4221 */ 4222 if (write) { 4223 obj->base.read_domains = I915_GEM_DOMAIN_CPU; 4224 obj->base.write_domain = I915_GEM_DOMAIN_CPU; 4225 } 4226 4227 trace_i915_gem_object_change_domain(obj, 4228 old_read_domains, 4229 old_write_domain); 4230 4231 return 0; 4232 } 4233 4234 /* Throttle our rendering by waiting until the ring has completed our requests 4235 * emitted over 20 msec ago. 4236 * 4237 * Note that if we were to use the current jiffies each time around the loop, 4238 * we wouldn't escape the function with any frames outstanding if the time to 4239 * render a frame was over 20ms. 4240 * 4241 * This should get us reasonable parallelism between CPU and GPU but also 4242 * relatively low latency when blocking on a particular request to finish. 4243 */ 4244 static int 4245 i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file) 4246 { 4247 struct drm_i915_private *dev_priv = dev->dev_private; 4248 struct drm_i915_file_private *file_priv = file->driver_priv; 4249 unsigned long recent_enough = jiffies - DRM_I915_THROTTLE_JIFFIES; 4250 struct drm_i915_gem_request *request, *target = NULL; 4251 unsigned reset_counter; 4252 int ret; 4253 4254 ret = i915_gem_wait_for_error(&dev_priv->gpu_error); 4255 if (ret) 4256 return ret; 4257 4258 ret = i915_gem_check_wedge(&dev_priv->gpu_error, false); 4259 if (ret) 4260 return ret; 4261 4262 spin_lock(&file_priv->mm.lock); 4263 list_for_each_entry(request, &file_priv->mm.request_list, client_list) { 4264 if (time_after_eq(request->emitted_jiffies, recent_enough)) 4265 break; 4266 4267 /* 4268 * Note that the request might not have been submitted yet. 4269 * In which case emitted_jiffies will be zero. 4270 */ 4271 if (!request->emitted_jiffies) 4272 continue; 4273 4274 target = request; 4275 } 4276 reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter); 4277 if (target) 4278 i915_gem_request_reference(target); 4279 spin_unlock(&file_priv->mm.lock); 4280 4281 if (target == NULL) 4282 return 0; 4283 4284 ret = __i915_wait_request(target, reset_counter, true, NULL, NULL); 4285 if (ret == 0) 4286 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 0); 4287 4288 i915_gem_request_unreference__unlocked(target); 4289 4290 return ret; 4291 } 4292 4293 static bool 4294 i915_vma_misplaced(struct i915_vma *vma, uint32_t alignment, uint64_t flags) 4295 { 4296 struct drm_i915_gem_object *obj = vma->obj; 4297 4298 if (alignment && 4299 vma->node.start & (alignment - 1)) 4300 return true; 4301 4302 if (flags & PIN_MAPPABLE && !obj->map_and_fenceable) 4303 return true; 4304 4305 if (flags & PIN_OFFSET_BIAS && 4306 vma->node.start < (flags & PIN_OFFSET_MASK)) 4307 return true; 4308 4309 if (flags & PIN_OFFSET_FIXED && 4310 vma->node.start != (flags & PIN_OFFSET_MASK)) 4311 return true; 4312 4313 return false; 4314 } 4315 4316 void __i915_vma_set_map_and_fenceable(struct i915_vma *vma) 4317 { 4318 struct drm_i915_gem_object *obj = vma->obj; 4319 bool mappable, fenceable; 4320 u32 fence_size, fence_alignment; 4321 4322 fence_size = i915_gem_get_gtt_size(obj->base.dev, 4323 obj->base.size, 4324 obj->tiling_mode); 4325 fence_alignment = i915_gem_get_gtt_alignment(obj->base.dev, 4326 obj->base.size, 4327 obj->tiling_mode, 4328 true); 4329 4330 fenceable = (vma->node.size == fence_size && 4331 (vma->node.start & (fence_alignment - 1)) == 0); 4332 4333 mappable = (vma->node.start + fence_size <= 4334 to_i915(obj->base.dev)->gtt.mappable_end); 4335 4336 obj->map_and_fenceable = mappable && fenceable; 4337 } 4338 4339 static int 4340 i915_gem_object_do_pin(struct drm_i915_gem_object *obj, 4341 struct i915_address_space *vm, 4342 const struct i915_ggtt_view *ggtt_view, 4343 uint32_t alignment, 4344 uint64_t flags) 4345 { 4346 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 4347 struct i915_vma *vma; 4348 unsigned bound; 4349 int ret; 4350 4351 if (WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base)) 4352 return -ENODEV; 4353 4354 if (WARN_ON(flags & (PIN_GLOBAL | PIN_MAPPABLE) && !i915_is_ggtt(vm))) 4355 return -EINVAL; 4356 4357 if (WARN_ON((flags & (PIN_MAPPABLE | PIN_GLOBAL)) == PIN_MAPPABLE)) 4358 return -EINVAL; 4359 4360 if (WARN_ON(i915_is_ggtt(vm) != !!ggtt_view)) 4361 return -EINVAL; 4362 4363 vma = ggtt_view ? i915_gem_obj_to_ggtt_view(obj, ggtt_view) : 4364 i915_gem_obj_to_vma(obj, vm); 4365 4366 if (IS_ERR(vma)) 4367 return PTR_ERR(vma); 4368 4369 if (vma) { 4370 if (WARN_ON(vma->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT)) 4371 return -EBUSY; 4372 4373 if (i915_vma_misplaced(vma, alignment, flags)) { 4374 WARN(vma->pin_count, 4375 "bo is already pinned in %s with incorrect alignment:" 4376 " offset=%08x %08x, req.alignment=%x, req.map_and_fenceable=%d," 4377 " obj->map_and_fenceable=%d\n", 4378 ggtt_view ? "ggtt" : "ppgtt", 4379 upper_32_bits(vma->node.start), 4380 lower_32_bits(vma->node.start), 4381 alignment, 4382 !!(flags & PIN_MAPPABLE), 4383 obj->map_and_fenceable); 4384 ret = i915_vma_unbind(vma); 4385 if (ret) 4386 return ret; 4387 4388 vma = NULL; 4389 } 4390 } 4391 4392 bound = vma ? vma->bound : 0; 4393 if (vma == NULL || !drm_mm_node_allocated(&vma->node)) { 4394 vma = i915_gem_object_bind_to_vm(obj, vm, ggtt_view, alignment, 4395 flags); 4396 if (IS_ERR(vma)) 4397 return PTR_ERR(vma); 4398 } else { 4399 ret = i915_vma_bind(vma, obj->cache_level, flags); 4400 if (ret) 4401 return ret; 4402 } 4403 4404 if (ggtt_view && ggtt_view->type == I915_GGTT_VIEW_NORMAL && 4405 (bound ^ vma->bound) & GLOBAL_BIND) { 4406 __i915_vma_set_map_and_fenceable(vma); 4407 WARN_ON(flags & PIN_MAPPABLE && !obj->map_and_fenceable); 4408 } 4409 4410 vma->pin_count++; 4411 return 0; 4412 } 4413 4414 int 4415 i915_gem_object_pin(struct drm_i915_gem_object *obj, 4416 struct i915_address_space *vm, 4417 uint32_t alignment, 4418 uint64_t flags) 4419 { 4420 return i915_gem_object_do_pin(obj, vm, 4421 i915_is_ggtt(vm) ? &i915_ggtt_view_normal : NULL, 4422 alignment, flags); 4423 } 4424 4425 int 4426 i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, 4427 const struct i915_ggtt_view *view, 4428 uint32_t alignment, 4429 uint64_t flags) 4430 { 4431 if (WARN_ONCE(!view, "no view specified")) 4432 return -EINVAL; 4433 4434 return i915_gem_object_do_pin(obj, i915_obj_to_ggtt(obj), view, 4435 alignment, flags | PIN_GLOBAL); 4436 } 4437 4438 void 4439 i915_gem_object_ggtt_unpin_view(struct drm_i915_gem_object *obj, 4440 const struct i915_ggtt_view *view) 4441 { 4442 struct i915_vma *vma = i915_gem_obj_to_ggtt_view(obj, view); 4443 4444 BUG_ON(!vma); 4445 WARN_ON(vma->pin_count == 0); 4446 WARN_ON(!i915_gem_obj_ggtt_bound_view(obj, view)); 4447 4448 --vma->pin_count; 4449 } 4450 4451 int 4452 i915_gem_busy_ioctl(struct drm_device *dev, void *data, 4453 struct drm_file *file) 4454 { 4455 struct drm_i915_gem_busy *args = data; 4456 struct drm_i915_gem_object *obj; 4457 int ret; 4458 4459 ret = i915_mutex_lock_interruptible(dev); 4460 if (ret) 4461 return ret; 4462 4463 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 4464 if (&obj->base == NULL) { 4465 ret = -ENOENT; 4466 goto unlock; 4467 } 4468 4469 /* Count all active objects as busy, even if they are currently not used 4470 * by the gpu. Users of this interface expect objects to eventually 4471 * become non-busy without any further actions, therefore emit any 4472 * necessary flushes here. 4473 */ 4474 ret = i915_gem_object_flush_active(obj); 4475 if (ret) 4476 goto unref; 4477 4478 args->busy = 0; 4479 if (obj->active) { 4480 int i; 4481 4482 for (i = 0; i < I915_NUM_RINGS; i++) { 4483 struct drm_i915_gem_request *req; 4484 4485 req = obj->last_read_req[i]; 4486 if (req) 4487 args->busy |= 1 << (16 + req->ring->exec_id); 4488 } 4489 if (obj->last_write_req) 4490 args->busy |= obj->last_write_req->ring->exec_id; 4491 } 4492 4493 unref: 4494 drm_gem_object_unreference(&obj->base); 4495 unlock: 4496 mutex_unlock(&dev->struct_mutex); 4497 return ret; 4498 } 4499 4500 int 4501 i915_gem_throttle_ioctl(struct drm_device *dev, void *data, 4502 struct drm_file *file_priv) 4503 { 4504 return i915_gem_ring_throttle(dev, file_priv); 4505 } 4506 4507 int 4508 i915_gem_madvise_ioctl(struct drm_device *dev, void *data, 4509 struct drm_file *file_priv) 4510 { 4511 struct drm_i915_private *dev_priv = dev->dev_private; 4512 struct drm_i915_gem_madvise *args = data; 4513 struct drm_i915_gem_object *obj; 4514 int ret; 4515 4516 switch (args->madv) { 4517 case I915_MADV_DONTNEED: 4518 case I915_MADV_WILLNEED: 4519 break; 4520 default: 4521 return -EINVAL; 4522 } 4523 4524 ret = i915_mutex_lock_interruptible(dev); 4525 if (ret) 4526 return ret; 4527 4528 obj = to_intel_bo(drm_gem_object_lookup(dev, file_priv, args->handle)); 4529 if (&obj->base == NULL) { 4530 ret = -ENOENT; 4531 goto unlock; 4532 } 4533 4534 if (i915_gem_obj_is_pinned(obj)) { 4535 ret = -EINVAL; 4536 goto out; 4537 } 4538 4539 if (obj->pages && 4540 obj->tiling_mode != I915_TILING_NONE && 4541 dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) { 4542 if (obj->madv == I915_MADV_WILLNEED) 4543 i915_gem_object_unpin_pages(obj); 4544 if (args->madv == I915_MADV_WILLNEED) 4545 i915_gem_object_pin_pages(obj); 4546 } 4547 4548 if (obj->madv != __I915_MADV_PURGED) 4549 obj->madv = args->madv; 4550 4551 /* if the object is no longer attached, discard its backing storage */ 4552 if (obj->madv == I915_MADV_DONTNEED && obj->pages == NULL) 4553 i915_gem_object_truncate(obj); 4554 4555 args->retained = obj->madv != __I915_MADV_PURGED; 4556 4557 out: 4558 drm_gem_object_unreference(&obj->base); 4559 unlock: 4560 mutex_unlock(&dev->struct_mutex); 4561 return ret; 4562 } 4563 4564 void i915_gem_object_init(struct drm_i915_gem_object *obj, 4565 const struct drm_i915_gem_object_ops *ops) 4566 { 4567 int i; 4568 4569 INIT_LIST_HEAD(&obj->global_list); 4570 for (i = 0; i < I915_NUM_RINGS; i++) 4571 INIT_LIST_HEAD(&obj->ring_list[i]); 4572 INIT_LIST_HEAD(&obj->obj_exec_link); 4573 INIT_LIST_HEAD(&obj->vma_list); 4574 INIT_LIST_HEAD(&obj->batch_pool_link); 4575 4576 obj->ops = ops; 4577 4578 obj->fence_reg = I915_FENCE_REG_NONE; 4579 obj->madv = I915_MADV_WILLNEED; 4580 4581 i915_gem_info_add_obj(obj->base.dev->dev_private, obj->base.size); 4582 } 4583 4584 static const struct drm_i915_gem_object_ops i915_gem_object_ops = { 4585 .flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE, 4586 .get_pages = i915_gem_object_get_pages_gtt, 4587 .put_pages = i915_gem_object_put_pages_gtt, 4588 }; 4589 4590 struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev, 4591 size_t size) 4592 { 4593 struct drm_i915_gem_object *obj; 4594 #if 0 4595 struct address_space *mapping; 4596 gfp_t mask; 4597 #endif 4598 4599 obj = i915_gem_object_alloc(dev); 4600 if (obj == NULL) 4601 return NULL; 4602 4603 if (drm_gem_object_init(dev, &obj->base, size) != 0) { 4604 i915_gem_object_free(obj); 4605 return NULL; 4606 } 4607 4608 #if 0 4609 mask = GFP_HIGHUSER | __GFP_RECLAIMABLE; 4610 if (IS_CRESTLINE(dev) || IS_BROADWATER(dev)) { 4611 /* 965gm cannot relocate objects above 4GiB. */ 4612 mask &= ~__GFP_HIGHMEM; 4613 mask |= __GFP_DMA32; 4614 } 4615 4616 mapping = file_inode(obj->base.filp)->i_mapping; 4617 mapping_set_gfp_mask(mapping, mask); 4618 #endif 4619 4620 i915_gem_object_init(obj, &i915_gem_object_ops); 4621 4622 obj->base.write_domain = I915_GEM_DOMAIN_CPU; 4623 obj->base.read_domains = I915_GEM_DOMAIN_CPU; 4624 4625 if (HAS_LLC(dev)) { 4626 /* On some devices, we can have the GPU use the LLC (the CPU 4627 * cache) for about a 10% performance improvement 4628 * compared to uncached. Graphics requests other than 4629 * display scanout are coherent with the CPU in 4630 * accessing this cache. This means in this mode we 4631 * don't need to clflush on the CPU side, and on the 4632 * GPU side we only need to flush internal caches to 4633 * get data visible to the CPU. 4634 * 4635 * However, we maintain the display planes as UC, and so 4636 * need to rebind when first used as such. 4637 */ 4638 obj->cache_level = I915_CACHE_LLC; 4639 } else 4640 obj->cache_level = I915_CACHE_NONE; 4641 4642 trace_i915_gem_object_create(obj); 4643 4644 return obj; 4645 } 4646 4647 static bool discard_backing_storage(struct drm_i915_gem_object *obj) 4648 { 4649 /* If we are the last user of the backing storage (be it shmemfs 4650 * pages or stolen etc), we know that the pages are going to be 4651 * immediately released. In this case, we can then skip copying 4652 * back the contents from the GPU. 4653 */ 4654 4655 if (obj->madv != I915_MADV_WILLNEED) 4656 return false; 4657 4658 if (obj->base.vm_obj == NULL) 4659 return true; 4660 4661 /* At first glance, this looks racy, but then again so would be 4662 * userspace racing mmap against close. However, the first external 4663 * reference to the filp can only be obtained through the 4664 * i915_gem_mmap_ioctl() which safeguards us against the user 4665 * acquiring such a reference whilst we are in the middle of 4666 * freeing the object. 4667 */ 4668 #if 0 4669 return atomic_long_read(&obj->base.filp->f_count) == 1; 4670 #else 4671 return false; 4672 #endif 4673 } 4674 4675 void i915_gem_free_object(struct drm_gem_object *gem_obj) 4676 { 4677 struct drm_i915_gem_object *obj = to_intel_bo(gem_obj); 4678 struct drm_device *dev = obj->base.dev; 4679 struct drm_i915_private *dev_priv = dev->dev_private; 4680 struct i915_vma *vma, *next; 4681 4682 intel_runtime_pm_get(dev_priv); 4683 4684 trace_i915_gem_object_destroy(obj); 4685 4686 list_for_each_entry_safe(vma, next, &obj->vma_list, obj_link) { 4687 int ret; 4688 4689 vma->pin_count = 0; 4690 ret = i915_vma_unbind(vma); 4691 if (WARN_ON(ret == -ERESTARTSYS)) { 4692 bool was_interruptible; 4693 4694 was_interruptible = dev_priv->mm.interruptible; 4695 dev_priv->mm.interruptible = false; 4696 4697 WARN_ON(i915_vma_unbind(vma)); 4698 4699 dev_priv->mm.interruptible = was_interruptible; 4700 } 4701 } 4702 4703 /* Stolen objects don't hold a ref, but do hold pin count. Fix that up 4704 * before progressing. */ 4705 if (obj->stolen) 4706 i915_gem_object_unpin_pages(obj); 4707 4708 WARN_ON(obj->frontbuffer_bits); 4709 4710 if (obj->pages && obj->madv == I915_MADV_WILLNEED && 4711 dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES && 4712 obj->tiling_mode != I915_TILING_NONE) 4713 i915_gem_object_unpin_pages(obj); 4714 4715 if (WARN_ON(obj->pages_pin_count)) 4716 obj->pages_pin_count = 0; 4717 if (discard_backing_storage(obj)) 4718 obj->madv = I915_MADV_DONTNEED; 4719 i915_gem_object_put_pages(obj); 4720 i915_gem_object_free_mmap_offset(obj); 4721 4722 BUG_ON(obj->pages); 4723 4724 #if 0 4725 if (obj->base.import_attach) 4726 drm_prime_gem_destroy(&obj->base, NULL); 4727 #endif 4728 4729 if (obj->ops->release) 4730 obj->ops->release(obj); 4731 4732 drm_gem_object_release(&obj->base); 4733 i915_gem_info_remove_obj(dev_priv, obj->base.size); 4734 4735 kfree(obj->bit_17); 4736 i915_gem_object_free(obj); 4737 4738 intel_runtime_pm_put(dev_priv); 4739 } 4740 4741 struct i915_vma *i915_gem_obj_to_vma(struct drm_i915_gem_object *obj, 4742 struct i915_address_space *vm) 4743 { 4744 struct i915_vma *vma; 4745 list_for_each_entry(vma, &obj->vma_list, obj_link) { 4746 if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL && 4747 vma->vm == vm) 4748 return vma; 4749 } 4750 return NULL; 4751 } 4752 4753 struct i915_vma *i915_gem_obj_to_ggtt_view(struct drm_i915_gem_object *obj, 4754 const struct i915_ggtt_view *view) 4755 { 4756 struct i915_address_space *ggtt = i915_obj_to_ggtt(obj); 4757 struct i915_vma *vma; 4758 4759 if (WARN_ONCE(!view, "no view specified")) 4760 return ERR_PTR(-EINVAL); 4761 4762 list_for_each_entry(vma, &obj->vma_list, obj_link) 4763 if (vma->vm == ggtt && 4764 i915_ggtt_view_equal(&vma->ggtt_view, view)) 4765 return vma; 4766 return NULL; 4767 } 4768 4769 void i915_gem_vma_destroy(struct i915_vma *vma) 4770 { 4771 WARN_ON(vma->node.allocated); 4772 4773 /* Keep the vma as a placeholder in the execbuffer reservation lists */ 4774 if (!list_empty(&vma->exec_list)) 4775 return; 4776 4777 if (!vma->is_ggtt) 4778 i915_ppgtt_put(i915_vm_to_ppgtt(vma->vm)); 4779 4780 list_del(&vma->obj_link); 4781 4782 kfree(vma); 4783 } 4784 4785 static void 4786 i915_gem_stop_ringbuffers(struct drm_device *dev) 4787 { 4788 struct drm_i915_private *dev_priv = dev->dev_private; 4789 struct intel_engine_cs *ring; 4790 int i; 4791 4792 for_each_ring(ring, dev_priv, i) 4793 dev_priv->gt.stop_ring(ring); 4794 } 4795 4796 int 4797 i915_gem_suspend(struct drm_device *dev) 4798 { 4799 struct drm_i915_private *dev_priv = dev->dev_private; 4800 int ret = 0; 4801 4802 mutex_lock(&dev->struct_mutex); 4803 ret = i915_gpu_idle(dev); 4804 if (ret) 4805 goto err; 4806 4807 i915_gem_retire_requests(dev); 4808 4809 i915_gem_stop_ringbuffers(dev); 4810 mutex_unlock(&dev->struct_mutex); 4811 4812 cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work); 4813 cancel_delayed_work_sync(&dev_priv->mm.retire_work); 4814 #if 0 4815 flush_delayed_work(&dev_priv->mm.idle_work); 4816 #endif 4817 4818 /* Assert that we sucessfully flushed all the work and 4819 * reset the GPU back to its idle, low power state. 4820 */ 4821 WARN_ON(dev_priv->mm.busy); 4822 4823 return 0; 4824 4825 err: 4826 mutex_unlock(&dev->struct_mutex); 4827 return ret; 4828 } 4829 4830 int i915_gem_l3_remap(struct drm_i915_gem_request *req, int slice) 4831 { 4832 struct intel_engine_cs *ring = req->ring; 4833 struct drm_device *dev = ring->dev; 4834 struct drm_i915_private *dev_priv = dev->dev_private; 4835 u32 *remap_info = dev_priv->l3_parity.remap_info[slice]; 4836 int i, ret; 4837 4838 if (!HAS_L3_DPF(dev) || !remap_info) 4839 return 0; 4840 4841 ret = intel_ring_begin(req, GEN7_L3LOG_SIZE / 4 * 3); 4842 if (ret) 4843 return ret; 4844 4845 /* 4846 * Note: We do not worry about the concurrent register cacheline hang 4847 * here because no other code should access these registers other than 4848 * at initialization time. 4849 */ 4850 for (i = 0; i < GEN7_L3LOG_SIZE / 4; i++) { 4851 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); 4852 intel_ring_emit_reg(ring, GEN7_L3LOG(slice, i)); 4853 intel_ring_emit(ring, remap_info[i]); 4854 } 4855 4856 intel_ring_advance(ring); 4857 4858 return ret; 4859 } 4860 4861 void i915_gem_init_swizzling(struct drm_device *dev) 4862 { 4863 struct drm_i915_private *dev_priv = dev->dev_private; 4864 4865 if (INTEL_INFO(dev)->gen < 5 || 4866 dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE) 4867 return; 4868 4869 I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) | 4870 DISP_TILE_SURFACE_SWIZZLING); 4871 4872 if (IS_GEN5(dev)) 4873 return; 4874 4875 I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_SWZCTL); 4876 if (IS_GEN6(dev)) 4877 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_SNB)); 4878 else if (IS_GEN7(dev)) 4879 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_IVB)); 4880 else if (IS_GEN8(dev)) 4881 I915_WRITE(GAMTARBMODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_BDW)); 4882 else 4883 BUG(); 4884 } 4885 4886 static void init_unused_ring(struct drm_device *dev, u32 base) 4887 { 4888 struct drm_i915_private *dev_priv = dev->dev_private; 4889 4890 I915_WRITE(RING_CTL(base), 0); 4891 I915_WRITE(RING_HEAD(base), 0); 4892 I915_WRITE(RING_TAIL(base), 0); 4893 I915_WRITE(RING_START(base), 0); 4894 } 4895 4896 static void init_unused_rings(struct drm_device *dev) 4897 { 4898 if (IS_I830(dev)) { 4899 init_unused_ring(dev, PRB1_BASE); 4900 init_unused_ring(dev, SRB0_BASE); 4901 init_unused_ring(dev, SRB1_BASE); 4902 init_unused_ring(dev, SRB2_BASE); 4903 init_unused_ring(dev, SRB3_BASE); 4904 } else if (IS_GEN2(dev)) { 4905 init_unused_ring(dev, SRB0_BASE); 4906 init_unused_ring(dev, SRB1_BASE); 4907 } else if (IS_GEN3(dev)) { 4908 init_unused_ring(dev, PRB1_BASE); 4909 init_unused_ring(dev, PRB2_BASE); 4910 } 4911 } 4912 4913 int i915_gem_init_rings(struct drm_device *dev) 4914 { 4915 struct drm_i915_private *dev_priv = dev->dev_private; 4916 int ret; 4917 4918 ret = intel_init_render_ring_buffer(dev); 4919 if (ret) 4920 return ret; 4921 4922 if (HAS_BSD(dev)) { 4923 ret = intel_init_bsd_ring_buffer(dev); 4924 if (ret) 4925 goto cleanup_render_ring; 4926 } 4927 4928 if (HAS_BLT(dev)) { 4929 ret = intel_init_blt_ring_buffer(dev); 4930 if (ret) 4931 goto cleanup_bsd_ring; 4932 } 4933 4934 if (HAS_VEBOX(dev)) { 4935 ret = intel_init_vebox_ring_buffer(dev); 4936 if (ret) 4937 goto cleanup_blt_ring; 4938 } 4939 4940 if (HAS_BSD2(dev)) { 4941 ret = intel_init_bsd2_ring_buffer(dev); 4942 if (ret) 4943 goto cleanup_vebox_ring; 4944 } 4945 4946 return 0; 4947 4948 cleanup_vebox_ring: 4949 intel_cleanup_ring_buffer(&dev_priv->ring[VECS]); 4950 cleanup_blt_ring: 4951 intel_cleanup_ring_buffer(&dev_priv->ring[BCS]); 4952 cleanup_bsd_ring: 4953 intel_cleanup_ring_buffer(&dev_priv->ring[VCS]); 4954 cleanup_render_ring: 4955 intel_cleanup_ring_buffer(&dev_priv->ring[RCS]); 4956 4957 return ret; 4958 } 4959 4960 int 4961 i915_gem_init_hw(struct drm_device *dev) 4962 { 4963 struct drm_i915_private *dev_priv = dev->dev_private; 4964 struct intel_engine_cs *ring; 4965 int ret, i, j; 4966 4967 #if 0 4968 if (INTEL_INFO(dev)->gen < 6 && !intel_enable_gtt()) 4969 return -EIO; 4970 #endif 4971 4972 /* Double layer security blanket, see i915_gem_init() */ 4973 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 4974 4975 if (dev_priv->ellc_size) 4976 I915_WRITE(HSW_IDICR, I915_READ(HSW_IDICR) | IDIHASHMSK(0xf)); 4977 4978 if (IS_HASWELL(dev)) 4979 I915_WRITE(MI_PREDICATE_RESULT_2, IS_HSW_GT3(dev) ? 4980 LOWER_SLICE_ENABLED : LOWER_SLICE_DISABLED); 4981 4982 if (HAS_PCH_NOP(dev)) { 4983 if (IS_IVYBRIDGE(dev)) { 4984 u32 temp = I915_READ(GEN7_MSG_CTL); 4985 temp &= ~(WAIT_FOR_PCH_FLR_ACK | WAIT_FOR_PCH_RESET_ACK); 4986 I915_WRITE(GEN7_MSG_CTL, temp); 4987 } else if (INTEL_INFO(dev)->gen >= 7) { 4988 u32 temp = I915_READ(HSW_NDE_RSTWRN_OPT); 4989 temp &= ~RESET_PCH_HANDSHAKE_ENABLE; 4990 I915_WRITE(HSW_NDE_RSTWRN_OPT, temp); 4991 } 4992 } 4993 4994 i915_gem_init_swizzling(dev); 4995 4996 /* 4997 * At least 830 can leave some of the unused rings 4998 * "active" (ie. head != tail) after resume which 4999 * will prevent c3 entry. Makes sure all unused rings 5000 * are totally idle. 5001 */ 5002 init_unused_rings(dev); 5003 5004 BUG_ON(!dev_priv->kernel_context); 5005 5006 ret = i915_ppgtt_init_hw(dev); 5007 if (ret) { 5008 DRM_ERROR("PPGTT enable HW failed %d\n", ret); 5009 goto out; 5010 } 5011 5012 /* Need to do basic initialisation of all rings first: */ 5013 for_each_ring(ring, dev_priv, i) { 5014 ret = ring->init_hw(ring); 5015 if (ret) 5016 goto out; 5017 } 5018 5019 /* We can't enable contexts until all firmware is loaded */ 5020 if (HAS_GUC_UCODE(dev)) { 5021 ret = intel_guc_ucode_load(dev); 5022 if (ret) { 5023 DRM_ERROR("Failed to initialize GuC, error %d\n", ret); 5024 ret = -EIO; 5025 goto out; 5026 } 5027 } 5028 5029 /* 5030 * Increment the next seqno by 0x100 so we have a visible break 5031 * on re-initialisation 5032 */ 5033 ret = i915_gem_set_seqno(dev, dev_priv->next_seqno+0x100); 5034 if (ret) 5035 goto out; 5036 5037 /* Now it is safe to go back round and do everything else: */ 5038 for_each_ring(ring, dev_priv, i) { 5039 struct drm_i915_gem_request *req; 5040 5041 req = i915_gem_request_alloc(ring, NULL); 5042 if (IS_ERR(req)) { 5043 ret = PTR_ERR(req); 5044 i915_gem_cleanup_ringbuffer(dev); 5045 goto out; 5046 } 5047 5048 if (ring->id == RCS) { 5049 for (j = 0; j < NUM_L3_SLICES(dev); j++) 5050 i915_gem_l3_remap(req, j); 5051 } 5052 5053 ret = i915_ppgtt_init_ring(req); 5054 if (ret && ret != -EIO) { 5055 DRM_ERROR("PPGTT enable ring #%d failed %d\n", i, ret); 5056 i915_gem_request_cancel(req); 5057 i915_gem_cleanup_ringbuffer(dev); 5058 goto out; 5059 } 5060 5061 ret = i915_gem_context_enable(req); 5062 if (ret && ret != -EIO) { 5063 DRM_ERROR("Context enable ring #%d failed %d\n", i, ret); 5064 i915_gem_request_cancel(req); 5065 i915_gem_cleanup_ringbuffer(dev); 5066 goto out; 5067 } 5068 5069 i915_add_request_no_flush(req); 5070 } 5071 5072 out: 5073 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 5074 return ret; 5075 } 5076 5077 int i915_gem_init(struct drm_device *dev) 5078 { 5079 struct drm_i915_private *dev_priv = dev->dev_private; 5080 int ret; 5081 5082 i915.enable_execlists = intel_sanitize_enable_execlists(dev, 5083 i915.enable_execlists); 5084 5085 mutex_lock(&dev->struct_mutex); 5086 5087 if (!i915.enable_execlists) { 5088 dev_priv->gt.execbuf_submit = i915_gem_ringbuffer_submission; 5089 dev_priv->gt.init_rings = i915_gem_init_rings; 5090 dev_priv->gt.cleanup_ring = intel_cleanup_ring_buffer; 5091 dev_priv->gt.stop_ring = intel_stop_ring_buffer; 5092 } else { 5093 dev_priv->gt.execbuf_submit = intel_execlists_submission; 5094 dev_priv->gt.init_rings = intel_logical_rings_init; 5095 dev_priv->gt.cleanup_ring = intel_logical_ring_cleanup; 5096 dev_priv->gt.stop_ring = intel_logical_ring_stop; 5097 } 5098 5099 /* This is just a security blanket to placate dragons. 5100 * On some systems, we very sporadically observe that the first TLBs 5101 * used by the CS may be stale, despite us poking the TLB reset. If 5102 * we hold the forcewake during initialisation these problems 5103 * just magically go away. 5104 */ 5105 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 5106 5107 ret = i915_gem_init_userptr(dev); 5108 if (ret) 5109 goto out_unlock; 5110 5111 i915_gem_init_global_gtt(dev); 5112 5113 ret = i915_gem_context_init(dev); 5114 if (ret) 5115 goto out_unlock; 5116 5117 ret = dev_priv->gt.init_rings(dev); 5118 if (ret) 5119 goto out_unlock; 5120 5121 ret = i915_gem_init_hw(dev); 5122 if (ret == -EIO) { 5123 /* Allow ring initialisation to fail by marking the GPU as 5124 * wedged. But we only want to do this where the GPU is angry, 5125 * for all other failure, such as an allocation failure, bail. 5126 */ 5127 DRM_ERROR("Failed to initialize GPU, declaring it wedged\n"); 5128 atomic_or(I915_WEDGED, &dev_priv->gpu_error.reset_counter); 5129 ret = 0; 5130 } 5131 5132 out_unlock: 5133 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 5134 mutex_unlock(&dev->struct_mutex); 5135 5136 return ret; 5137 } 5138 5139 void 5140 i915_gem_cleanup_ringbuffer(struct drm_device *dev) 5141 { 5142 struct drm_i915_private *dev_priv = dev->dev_private; 5143 struct intel_engine_cs *ring; 5144 int i; 5145 5146 for_each_ring(ring, dev_priv, i) 5147 dev_priv->gt.cleanup_ring(ring); 5148 5149 if (i915.enable_execlists) 5150 /* 5151 * Neither the BIOS, ourselves or any other kernel 5152 * expects the system to be in execlists mode on startup, 5153 * so we need to reset the GPU back to legacy mode. 5154 */ 5155 intel_gpu_reset(dev); 5156 } 5157 5158 static void 5159 init_ring_lists(struct intel_engine_cs *ring) 5160 { 5161 INIT_LIST_HEAD(&ring->active_list); 5162 INIT_LIST_HEAD(&ring->request_list); 5163 } 5164 5165 void 5166 i915_gem_load_init(struct drm_device *dev) 5167 { 5168 struct drm_i915_private *dev_priv = dev->dev_private; 5169 int i; 5170 5171 INIT_LIST_HEAD(&dev_priv->vm_list); 5172 INIT_LIST_HEAD(&dev_priv->context_list); 5173 INIT_LIST_HEAD(&dev_priv->mm.unbound_list); 5174 INIT_LIST_HEAD(&dev_priv->mm.bound_list); 5175 INIT_LIST_HEAD(&dev_priv->mm.fence_list); 5176 for (i = 0; i < I915_NUM_RINGS; i++) 5177 init_ring_lists(&dev_priv->ring[i]); 5178 for (i = 0; i < I915_MAX_NUM_FENCES; i++) 5179 INIT_LIST_HEAD(&dev_priv->fence_regs[i].lru_list); 5180 INIT_DELAYED_WORK(&dev_priv->mm.retire_work, 5181 i915_gem_retire_work_handler); 5182 INIT_DELAYED_WORK(&dev_priv->mm.idle_work, 5183 i915_gem_idle_work_handler); 5184 init_waitqueue_head(&dev_priv->gpu_error.reset_queue); 5185 5186 dev_priv->relative_constants_mode = I915_EXEC_CONSTANTS_REL_GENERAL; 5187 5188 if (INTEL_INFO(dev)->gen >= 7 && !IS_VALLEYVIEW(dev) && !IS_CHERRYVIEW(dev)) 5189 dev_priv->num_fence_regs = 32; 5190 else if (INTEL_INFO(dev)->gen >= 4 || IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev)) 5191 dev_priv->num_fence_regs = 16; 5192 else 5193 dev_priv->num_fence_regs = 8; 5194 5195 if (intel_vgpu_active(dev)) 5196 dev_priv->num_fence_regs = 5197 I915_READ(vgtif_reg(avail_rs.fence_num)); 5198 5199 /* 5200 * Set initial sequence number for requests. 5201 * Using this number allows the wraparound to happen early, 5202 * catching any obvious problems. 5203 */ 5204 dev_priv->next_seqno = ((u32)~0 - 0x1100); 5205 dev_priv->last_seqno = ((u32)~0 - 0x1101); 5206 5207 /* Initialize fence registers to zero */ 5208 INIT_LIST_HEAD(&dev_priv->mm.fence_list); 5209 i915_gem_restore_fences(dev); 5210 5211 i915_gem_detect_bit_6_swizzle(dev); 5212 init_waitqueue_head(&dev_priv->pending_flip_queue); 5213 5214 dev_priv->mm.interruptible = true; 5215 5216 lockinit(&dev_priv->fb_tracking.lock, "drmftl", 0, LK_CANRECURSE); 5217 } 5218 5219 void i915_gem_load_cleanup(struct drm_device *dev) 5220 { 5221 #if 0 5222 struct drm_i915_private *dev_priv = to_i915(dev); 5223 5224 kmem_cache_destroy(dev_priv->requests); 5225 kmem_cache_destroy(dev_priv->vmas); 5226 kmem_cache_destroy(dev_priv->objects); 5227 #endif 5228 } 5229 5230 void i915_gem_release(struct drm_device *dev, struct drm_file *file) 5231 { 5232 struct drm_i915_file_private *file_priv = file->driver_priv; 5233 5234 /* Clean up our request list when the client is going away, so that 5235 * later retire_requests won't dereference our soon-to-be-gone 5236 * file_priv. 5237 */ 5238 spin_lock(&file_priv->mm.lock); 5239 while (!list_empty(&file_priv->mm.request_list)) { 5240 struct drm_i915_gem_request *request; 5241 5242 request = list_first_entry(&file_priv->mm.request_list, 5243 struct drm_i915_gem_request, 5244 client_list); 5245 list_del(&request->client_list); 5246 request->file_priv = NULL; 5247 } 5248 spin_unlock(&file_priv->mm.lock); 5249 5250 if (!list_empty(&file_priv->rps.link)) { 5251 lockmgr(&to_i915(dev)->rps.client_lock, LK_EXCLUSIVE); 5252 list_del(&file_priv->rps.link); 5253 lockmgr(&to_i915(dev)->rps.client_lock, LK_RELEASE); 5254 } 5255 } 5256 5257 int 5258 i915_gem_pager_ctor(void *handle, vm_ooffset_t size, vm_prot_t prot, 5259 vm_ooffset_t foff, struct ucred *cred, u_short *color) 5260 { 5261 *color = 0; /* XXXKIB */ 5262 return (0); 5263 } 5264 5265 void 5266 i915_gem_pager_dtor(void *handle) 5267 { 5268 struct drm_gem_object *obj; 5269 struct drm_device *dev; 5270 5271 obj = handle; 5272 dev = obj->dev; 5273 5274 mutex_lock(&dev->struct_mutex); 5275 drm_gem_free_mmap_offset(obj); 5276 i915_gem_release_mmap(to_intel_bo(obj)); 5277 drm_gem_object_unreference(obj); 5278 mutex_unlock(&dev->struct_mutex); 5279 } 5280 5281 int i915_gem_open(struct drm_device *dev, struct drm_file *file) 5282 { 5283 struct drm_i915_file_private *file_priv; 5284 int ret; 5285 5286 DRM_DEBUG_DRIVER("\n"); 5287 5288 file_priv = kzalloc(sizeof(*file_priv), GFP_KERNEL); 5289 if (!file_priv) 5290 return -ENOMEM; 5291 5292 file->driver_priv = file_priv; 5293 file_priv->dev_priv = dev->dev_private; 5294 file_priv->file = file; 5295 INIT_LIST_HEAD(&file_priv->rps.link); 5296 5297 spin_init(&file_priv->mm.lock, "i915_priv"); 5298 INIT_LIST_HEAD(&file_priv->mm.request_list); 5299 5300 file_priv->bsd_ring = -1; 5301 5302 ret = i915_gem_context_open(dev, file); 5303 if (ret) 5304 kfree(file_priv); 5305 5306 return ret; 5307 } 5308 5309 /** 5310 * i915_gem_track_fb - update frontbuffer tracking 5311 * @old: current GEM buffer for the frontbuffer slots 5312 * @new: new GEM buffer for the frontbuffer slots 5313 * @frontbuffer_bits: bitmask of frontbuffer slots 5314 * 5315 * This updates the frontbuffer tracking bits @frontbuffer_bits by clearing them 5316 * from @old and setting them in @new. Both @old and @new can be NULL. 5317 */ 5318 void i915_gem_track_fb(struct drm_i915_gem_object *old, 5319 struct drm_i915_gem_object *new, 5320 unsigned frontbuffer_bits) 5321 { 5322 if (old) { 5323 WARN_ON(!mutex_is_locked(&old->base.dev->struct_mutex)); 5324 WARN_ON(!(old->frontbuffer_bits & frontbuffer_bits)); 5325 old->frontbuffer_bits &= ~frontbuffer_bits; 5326 } 5327 5328 if (new) { 5329 WARN_ON(!mutex_is_locked(&new->base.dev->struct_mutex)); 5330 WARN_ON(new->frontbuffer_bits & frontbuffer_bits); 5331 new->frontbuffer_bits |= frontbuffer_bits; 5332 } 5333 } 5334 5335 /* All the new VM stuff */ 5336 u64 i915_gem_obj_offset(struct drm_i915_gem_object *o, 5337 struct i915_address_space *vm) 5338 { 5339 struct drm_i915_private *dev_priv = o->base.dev->dev_private; 5340 struct i915_vma *vma; 5341 5342 WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base); 5343 5344 list_for_each_entry(vma, &o->vma_list, obj_link) { 5345 if (vma->is_ggtt && 5346 vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL) 5347 continue; 5348 if (vma->vm == vm) 5349 return vma->node.start; 5350 } 5351 5352 WARN(1, "%s vma for this object not found.\n", 5353 i915_is_ggtt(vm) ? "global" : "ppgtt"); 5354 return -1; 5355 } 5356 5357 u64 i915_gem_obj_ggtt_offset_view(struct drm_i915_gem_object *o, 5358 const struct i915_ggtt_view *view) 5359 { 5360 struct i915_address_space *ggtt = i915_obj_to_ggtt(o); 5361 struct i915_vma *vma; 5362 5363 list_for_each_entry(vma, &o->vma_list, obj_link) 5364 if (vma->vm == ggtt && 5365 i915_ggtt_view_equal(&vma->ggtt_view, view)) 5366 return vma->node.start; 5367 5368 WARN(1, "global vma for this object not found. (view=%u)\n", view->type); 5369 return -1; 5370 } 5371 5372 bool i915_gem_obj_bound(struct drm_i915_gem_object *o, 5373 struct i915_address_space *vm) 5374 { 5375 struct i915_vma *vma; 5376 5377 list_for_each_entry(vma, &o->vma_list, obj_link) { 5378 if (vma->is_ggtt && 5379 vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL) 5380 continue; 5381 if (vma->vm == vm && drm_mm_node_allocated(&vma->node)) 5382 return true; 5383 } 5384 5385 return false; 5386 } 5387 5388 bool i915_gem_obj_ggtt_bound_view(struct drm_i915_gem_object *o, 5389 const struct i915_ggtt_view *view) 5390 { 5391 struct i915_address_space *ggtt = i915_obj_to_ggtt(o); 5392 struct i915_vma *vma; 5393 5394 list_for_each_entry(vma, &o->vma_list, obj_link) 5395 if (vma->vm == ggtt && 5396 i915_ggtt_view_equal(&vma->ggtt_view, view) && 5397 drm_mm_node_allocated(&vma->node)) 5398 return true; 5399 5400 return false; 5401 } 5402 5403 bool i915_gem_obj_bound_any(struct drm_i915_gem_object *o) 5404 { 5405 struct i915_vma *vma; 5406 5407 list_for_each_entry(vma, &o->vma_list, obj_link) 5408 if (drm_mm_node_allocated(&vma->node)) 5409 return true; 5410 5411 return false; 5412 } 5413 5414 unsigned long i915_gem_obj_size(struct drm_i915_gem_object *o, 5415 struct i915_address_space *vm) 5416 { 5417 struct drm_i915_private *dev_priv = o->base.dev->dev_private; 5418 struct i915_vma *vma; 5419 5420 WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base); 5421 5422 BUG_ON(list_empty(&o->vma_list)); 5423 5424 list_for_each_entry(vma, &o->vma_list, obj_link) { 5425 if (vma->is_ggtt && 5426 vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL) 5427 continue; 5428 if (vma->vm == vm) 5429 return vma->node.size; 5430 } 5431 return 0; 5432 } 5433 5434 bool i915_gem_obj_is_pinned(struct drm_i915_gem_object *obj) 5435 { 5436 struct i915_vma *vma; 5437 list_for_each_entry(vma, &obj->vma_list, obj_link) 5438 if (vma->pin_count > 0) 5439 return true; 5440 5441 return false; 5442 } 5443 5444 /* Like i915_gem_object_get_page(), but mark the returned page dirty */ 5445 struct vm_page * 5446 i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj, int n) 5447 { 5448 struct vm_page *page; 5449 5450 /* Only default objects have per-page dirty tracking */ 5451 if (WARN_ON((obj->ops->flags & I915_GEM_OBJECT_HAS_STRUCT_PAGE) == 0)) 5452 return NULL; 5453 5454 page = i915_gem_object_get_page(obj, n); 5455 set_page_dirty(page); 5456 return page; 5457 } 5458 5459 /* Allocate a new GEM object and fill it with the supplied data */ 5460 struct drm_i915_gem_object * 5461 i915_gem_object_create_from_data(struct drm_device *dev, 5462 const void *data, size_t size) 5463 { 5464 struct drm_i915_gem_object *obj; 5465 struct sg_table *sg; 5466 size_t bytes; 5467 int ret; 5468 5469 obj = i915_gem_alloc_object(dev, round_up(size, PAGE_SIZE)); 5470 if (IS_ERR_OR_NULL(obj)) 5471 return obj; 5472 5473 ret = i915_gem_object_set_to_cpu_domain(obj, true); 5474 if (ret) 5475 goto fail; 5476 5477 ret = i915_gem_object_get_pages(obj); 5478 if (ret) 5479 goto fail; 5480 5481 i915_gem_object_pin_pages(obj); 5482 sg = obj->pages; 5483 bytes = sg_copy_from_buffer(sg->sgl, sg->nents, data, size); 5484 obj->dirty = 1; /* Backing store is now out of date */ 5485 i915_gem_object_unpin_pages(obj); 5486 5487 if (WARN_ON(bytes != size)) { 5488 DRM_ERROR("Incomplete copy, wrote %zu of %zu", bytes, size); 5489 ret = -EFAULT; 5490 goto fail; 5491 } 5492 5493 return obj; 5494 5495 fail: 5496 drm_gem_object_unreference(&obj->base); 5497 return ERR_PTR(ret); 5498 } 5499