1 /* 2 * Copyright © 2008-2015 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Eric Anholt <eric@anholt.net> 25 * 26 */ 27 28 #include <drm/drmP.h> 29 #include <drm/drm_vma_manager.h> 30 #include <drm/i915_drm.h> 31 #include "i915_drv.h" 32 #include "i915_vgpu.h" 33 #include "i915_trace.h" 34 #include "intel_drv.h" 35 #include <linux/shmem_fs.h> 36 #include <linux/slab.h> 37 #include <linux/swap.h> 38 #include <linux/pci.h> 39 40 #define RQ_BUG_ON(expr) 41 42 static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj); 43 static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj); 44 static void 45 i915_gem_object_retire__write(struct drm_i915_gem_object *obj); 46 static void 47 i915_gem_object_retire__read(struct drm_i915_gem_object *obj, int ring); 48 static void i915_gem_write_fence(struct drm_device *dev, int reg, 49 struct drm_i915_gem_object *obj); 50 static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj, 51 struct drm_i915_fence_reg *fence, 52 bool enable); 53 54 static bool cpu_cache_is_coherent(struct drm_device *dev, 55 enum i915_cache_level level) 56 { 57 return HAS_LLC(dev) || level != I915_CACHE_NONE; 58 } 59 60 static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj) 61 { 62 if (!cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) 63 return true; 64 65 return obj->pin_display; 66 } 67 68 static inline void i915_gem_object_fence_lost(struct drm_i915_gem_object *obj) 69 { 70 if (obj->tiling_mode) 71 i915_gem_release_mmap(obj); 72 73 /* As we do not have an associated fence register, we will force 74 * a tiling change if we ever need to acquire one. 75 */ 76 obj->fence_dirty = false; 77 obj->fence_reg = I915_FENCE_REG_NONE; 78 } 79 80 /* some bookkeeping */ 81 static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv, 82 size_t size) 83 { 84 spin_lock(&dev_priv->mm.object_stat_lock); 85 dev_priv->mm.object_count++; 86 dev_priv->mm.object_memory += size; 87 spin_unlock(&dev_priv->mm.object_stat_lock); 88 } 89 90 static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv, 91 size_t size) 92 { 93 spin_lock(&dev_priv->mm.object_stat_lock); 94 dev_priv->mm.object_count--; 95 dev_priv->mm.object_memory -= size; 96 spin_unlock(&dev_priv->mm.object_stat_lock); 97 } 98 99 static int 100 i915_gem_wait_for_error(struct i915_gpu_error *error) 101 { 102 int ret; 103 104 #define EXIT_COND (!i915_reset_in_progress(error) || \ 105 i915_terminally_wedged(error)) 106 if (EXIT_COND) 107 return 0; 108 109 /* 110 * Only wait 10 seconds for the gpu reset to complete to avoid hanging 111 * userspace. If it takes that long something really bad is going on and 112 * we should simply try to bail out and fail as gracefully as possible. 113 */ 114 ret = wait_event_interruptible_timeout(error->reset_queue, 115 EXIT_COND, 116 10*HZ); 117 if (ret == 0) { 118 DRM_ERROR("Timed out waiting for the gpu reset to complete\n"); 119 return -EIO; 120 } else if (ret < 0) { 121 return ret; 122 } 123 #undef EXIT_COND 124 125 return 0; 126 } 127 128 int i915_mutex_lock_interruptible(struct drm_device *dev) 129 { 130 struct drm_i915_private *dev_priv = dev->dev_private; 131 int ret; 132 133 ret = i915_gem_wait_for_error(&dev_priv->gpu_error); 134 if (ret) 135 return ret; 136 137 ret = mutex_lock_interruptible(&dev->struct_mutex); 138 if (ret) 139 return ret; 140 141 WARN_ON(i915_verify_lists(dev)); 142 return 0; 143 } 144 145 int 146 i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, 147 struct drm_file *file) 148 { 149 struct drm_i915_private *dev_priv = dev->dev_private; 150 struct drm_i915_gem_get_aperture *args = data; 151 struct drm_i915_gem_object *obj; 152 size_t pinned; 153 154 pinned = 0; 155 mutex_lock(&dev->struct_mutex); 156 list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) 157 if (i915_gem_obj_is_pinned(obj)) 158 pinned += i915_gem_obj_ggtt_size(obj); 159 mutex_unlock(&dev->struct_mutex); 160 161 args->aper_size = dev_priv->gtt.base.total; 162 args->aper_available_size = args->aper_size - pinned; 163 164 return 0; 165 } 166 167 #if 0 168 static int 169 i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj) 170 { 171 struct address_space *mapping = file_inode(obj->base.filp)->i_mapping; 172 char *vaddr = obj->phys_handle->vaddr; 173 struct sg_table *st; 174 struct scatterlist *sg; 175 int i; 176 177 if (WARN_ON(i915_gem_object_needs_bit17_swizzle(obj))) 178 return -EINVAL; 179 180 for (i = 0; i < obj->base.size / PAGE_SIZE; i++) { 181 struct page *page; 182 char *src; 183 184 page = shmem_read_mapping_page(mapping, i); 185 if (IS_ERR(page)) 186 return PTR_ERR(page); 187 188 src = kmap_atomic(page); 189 memcpy(vaddr, src, PAGE_SIZE); 190 drm_clflush_virt_range(vaddr, PAGE_SIZE); 191 kunmap_atomic(src); 192 193 page_cache_release(page); 194 vaddr += PAGE_SIZE; 195 } 196 197 i915_gem_chipset_flush(obj->base.dev); 198 199 st = kmalloc(sizeof(*st), GFP_KERNEL); 200 if (st == NULL) 201 return -ENOMEM; 202 203 if (sg_alloc_table(st, 1, GFP_KERNEL)) { 204 kfree(st); 205 return -ENOMEM; 206 } 207 208 sg = st->sgl; 209 sg->offset = 0; 210 sg->length = obj->base.size; 211 212 sg_dma_address(sg) = obj->phys_handle->busaddr; 213 sg_dma_len(sg) = obj->base.size; 214 215 obj->pages = st; 216 return 0; 217 } 218 219 static void 220 i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj) 221 { 222 int ret; 223 224 BUG_ON(obj->madv == __I915_MADV_PURGED); 225 226 ret = i915_gem_object_set_to_cpu_domain(obj, true); 227 if (ret) { 228 /* In the event of a disaster, abandon all caches and 229 * hope for the best. 230 */ 231 WARN_ON(ret != -EIO); 232 obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU; 233 } 234 235 if (obj->madv == I915_MADV_DONTNEED) 236 obj->dirty = 0; 237 238 if (obj->dirty) { 239 struct address_space *mapping = file_inode(obj->base.filp)->i_mapping; 240 char *vaddr = obj->phys_handle->vaddr; 241 int i; 242 243 for (i = 0; i < obj->base.size / PAGE_SIZE; i++) { 244 struct page *page; 245 char *dst; 246 247 page = shmem_read_mapping_page(mapping, i); 248 if (IS_ERR(page)) 249 continue; 250 251 dst = kmap_atomic(page); 252 drm_clflush_virt_range(vaddr, PAGE_SIZE); 253 memcpy(dst, vaddr, PAGE_SIZE); 254 kunmap_atomic(dst); 255 256 set_page_dirty(page); 257 if (obj->madv == I915_MADV_WILLNEED) 258 mark_page_accessed(page); 259 page_cache_release(page); 260 vaddr += PAGE_SIZE; 261 } 262 obj->dirty = 0; 263 } 264 265 sg_free_table(obj->pages); 266 kfree(obj->pages); 267 } 268 269 static void 270 i915_gem_object_release_phys(struct drm_i915_gem_object *obj) 271 { 272 drm_pci_free(obj->base.dev, obj->phys_handle); 273 } 274 275 static const struct drm_i915_gem_object_ops i915_gem_phys_ops = { 276 .get_pages = i915_gem_object_get_pages_phys, 277 .put_pages = i915_gem_object_put_pages_phys, 278 .release = i915_gem_object_release_phys, 279 }; 280 #endif 281 282 static int 283 drop_pages(struct drm_i915_gem_object *obj) 284 { 285 struct i915_vma *vma, *next; 286 int ret; 287 288 drm_gem_object_reference(&obj->base); 289 list_for_each_entry_safe(vma, next, &obj->vma_list, vma_link) 290 if (i915_vma_unbind(vma)) 291 break; 292 293 ret = i915_gem_object_put_pages(obj); 294 drm_gem_object_unreference(&obj->base); 295 296 return ret; 297 } 298 299 int 300 i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, 301 int align) 302 { 303 drm_dma_handle_t *phys; 304 int ret; 305 306 if (obj->phys_handle) { 307 if ((unsigned long)obj->phys_handle->vaddr & (align -1)) 308 return -EBUSY; 309 310 return 0; 311 } 312 313 if (obj->madv != I915_MADV_WILLNEED) 314 return -EFAULT; 315 316 #if 0 317 if (obj->base.filp == NULL) 318 return -EINVAL; 319 #endif 320 321 ret = drop_pages(obj); 322 if (ret) 323 return ret; 324 325 /* create a new object */ 326 phys = drm_pci_alloc(obj->base.dev, obj->base.size, align); 327 if (!phys) 328 return -ENOMEM; 329 330 obj->phys_handle = phys; 331 #if 0 332 obj->ops = &i915_gem_phys_ops; 333 #endif 334 335 return i915_gem_object_get_pages(obj); 336 } 337 338 static int 339 i915_gem_phys_pwrite(struct drm_i915_gem_object *obj, 340 struct drm_i915_gem_pwrite *args, 341 struct drm_file *file_priv) 342 { 343 struct drm_device *dev = obj->base.dev; 344 void *vaddr = (char *)obj->phys_handle->vaddr + args->offset; 345 char __user *user_data = to_user_ptr(args->data_ptr); 346 int ret = 0; 347 348 /* We manually control the domain here and pretend that it 349 * remains coherent i.e. in the GTT domain, like shmem_pwrite. 350 */ 351 ret = i915_gem_object_wait_rendering(obj, false); 352 if (ret) 353 return ret; 354 355 intel_fb_obj_invalidate(obj, NULL, ORIGIN_CPU); 356 if (__copy_from_user_inatomic_nocache(vaddr, user_data, args->size)) { 357 unsigned long unwritten; 358 359 /* The physical object once assigned is fixed for the lifetime 360 * of the obj, so we can safely drop the lock and continue 361 * to access vaddr. 362 */ 363 mutex_unlock(&dev->struct_mutex); 364 unwritten = copy_from_user(vaddr, user_data, args->size); 365 mutex_lock(&dev->struct_mutex); 366 if (unwritten) { 367 ret = -EFAULT; 368 goto out; 369 } 370 } 371 372 drm_clflush_virt_range(vaddr, args->size); 373 i915_gem_chipset_flush(dev); 374 375 out: 376 intel_fb_obj_flush(obj, false); 377 return ret; 378 } 379 380 void *i915_gem_object_alloc(struct drm_device *dev) 381 { 382 return kmalloc(sizeof(struct drm_i915_gem_object), 383 M_DRM, M_WAITOK | M_ZERO); 384 } 385 386 void i915_gem_object_free(struct drm_i915_gem_object *obj) 387 { 388 kfree(obj); 389 } 390 391 static int 392 i915_gem_create(struct drm_file *file, 393 struct drm_device *dev, 394 uint64_t size, 395 uint32_t *handle_p) 396 { 397 struct drm_i915_gem_object *obj; 398 int ret; 399 u32 handle; 400 401 size = roundup(size, PAGE_SIZE); 402 if (size == 0) 403 return -EINVAL; 404 405 /* Allocate the new object */ 406 obj = i915_gem_alloc_object(dev, size); 407 if (obj == NULL) 408 return -ENOMEM; 409 410 ret = drm_gem_handle_create(file, &obj->base, &handle); 411 /* drop reference from allocate - handle holds it now */ 412 drm_gem_object_unreference_unlocked(&obj->base); 413 if (ret) 414 return ret; 415 416 *handle_p = handle; 417 return 0; 418 } 419 420 int 421 i915_gem_dumb_create(struct drm_file *file, 422 struct drm_device *dev, 423 struct drm_mode_create_dumb *args) 424 { 425 /* have to work out size/pitch and return them */ 426 args->pitch = ALIGN(args->width * DIV_ROUND_UP(args->bpp, 8), 64); 427 args->size = args->pitch * args->height; 428 return i915_gem_create(file, dev, 429 args->size, &args->handle); 430 } 431 432 /** 433 * Creates a new mm object and returns a handle to it. 434 */ 435 int 436 i915_gem_create_ioctl(struct drm_device *dev, void *data, 437 struct drm_file *file) 438 { 439 struct drm_i915_gem_create *args = data; 440 441 return i915_gem_create(file, dev, 442 args->size, &args->handle); 443 } 444 445 static inline int 446 __copy_to_user_swizzled(char __user *cpu_vaddr, 447 const char *gpu_vaddr, int gpu_offset, 448 int length) 449 { 450 int ret, cpu_offset = 0; 451 452 while (length > 0) { 453 int cacheline_end = ALIGN(gpu_offset + 1, 64); 454 int this_length = min(cacheline_end - gpu_offset, length); 455 int swizzled_gpu_offset = gpu_offset ^ 64; 456 457 ret = __copy_to_user(cpu_vaddr + cpu_offset, 458 gpu_vaddr + swizzled_gpu_offset, 459 this_length); 460 if (ret) 461 return ret + length; 462 463 cpu_offset += this_length; 464 gpu_offset += this_length; 465 length -= this_length; 466 } 467 468 return 0; 469 } 470 471 static inline int 472 __copy_from_user_swizzled(char *gpu_vaddr, int gpu_offset, 473 const char __user *cpu_vaddr, 474 int length) 475 { 476 int ret, cpu_offset = 0; 477 478 while (length > 0) { 479 int cacheline_end = ALIGN(gpu_offset + 1, 64); 480 int this_length = min(cacheline_end - gpu_offset, length); 481 int swizzled_gpu_offset = gpu_offset ^ 64; 482 483 ret = __copy_from_user(gpu_vaddr + swizzled_gpu_offset, 484 cpu_vaddr + cpu_offset, 485 this_length); 486 if (ret) 487 return ret + length; 488 489 cpu_offset += this_length; 490 gpu_offset += this_length; 491 length -= this_length; 492 } 493 494 return 0; 495 } 496 497 /* 498 * Pins the specified object's pages and synchronizes the object with 499 * GPU accesses. Sets needs_clflush to non-zero if the caller should 500 * flush the object from the CPU cache. 501 */ 502 int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj, 503 int *needs_clflush) 504 { 505 int ret; 506 507 *needs_clflush = 0; 508 509 #if 0 510 if (!obj->base.filp) 511 return -EINVAL; 512 #endif 513 514 if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)) { 515 /* If we're not in the cpu read domain, set ourself into the gtt 516 * read domain and manually flush cachelines (if required). This 517 * optimizes for the case when the gpu will dirty the data 518 * anyway again before the next pread happens. */ 519 *needs_clflush = !cpu_cache_is_coherent(obj->base.dev, 520 obj->cache_level); 521 ret = i915_gem_object_wait_rendering(obj, true); 522 if (ret) 523 return ret; 524 } 525 526 ret = i915_gem_object_get_pages(obj); 527 if (ret) 528 return ret; 529 530 i915_gem_object_pin_pages(obj); 531 532 return ret; 533 } 534 535 /* Per-page copy function for the shmem pread fastpath. 536 * Flushes invalid cachelines before reading the target if 537 * needs_clflush is set. */ 538 static int 539 shmem_pread_fast(struct vm_page *page, int shmem_page_offset, int page_length, 540 char __user *user_data, 541 bool page_do_bit17_swizzling, bool needs_clflush) 542 { 543 char *vaddr; 544 int ret; 545 546 if (unlikely(page_do_bit17_swizzling)) 547 return -EINVAL; 548 549 vaddr = kmap_atomic(page); 550 if (needs_clflush) 551 drm_clflush_virt_range(vaddr + shmem_page_offset, 552 page_length); 553 ret = __copy_to_user_inatomic(user_data, 554 vaddr + shmem_page_offset, 555 page_length); 556 kunmap_atomic(vaddr); 557 558 return ret ? -EFAULT : 0; 559 } 560 561 static void 562 shmem_clflush_swizzled_range(char *addr, unsigned long length, 563 bool swizzled) 564 { 565 if (unlikely(swizzled)) { 566 unsigned long start = (unsigned long) addr; 567 unsigned long end = (unsigned long) addr + length; 568 569 /* For swizzling simply ensure that we always flush both 570 * channels. Lame, but simple and it works. Swizzled 571 * pwrite/pread is far from a hotpath - current userspace 572 * doesn't use it at all. */ 573 start = round_down(start, 128); 574 end = round_up(end, 128); 575 576 drm_clflush_virt_range((void *)start, end - start); 577 } else { 578 drm_clflush_virt_range(addr, length); 579 } 580 581 } 582 583 /* Only difference to the fast-path function is that this can handle bit17 584 * and uses non-atomic copy and kmap functions. */ 585 static int 586 shmem_pread_slow(struct vm_page *page, int shmem_page_offset, int page_length, 587 char __user *user_data, 588 bool page_do_bit17_swizzling, bool needs_clflush) 589 { 590 char *vaddr; 591 int ret; 592 593 vaddr = kmap(page); 594 if (needs_clflush) 595 shmem_clflush_swizzled_range(vaddr + shmem_page_offset, 596 page_length, 597 page_do_bit17_swizzling); 598 599 if (page_do_bit17_swizzling) 600 ret = __copy_to_user_swizzled(user_data, 601 vaddr, shmem_page_offset, 602 page_length); 603 else 604 ret = __copy_to_user(user_data, 605 vaddr + shmem_page_offset, 606 page_length); 607 kunmap(page); 608 609 return ret ? - EFAULT : 0; 610 } 611 612 static int 613 i915_gem_shmem_pread(struct drm_device *dev, 614 struct drm_i915_gem_object *obj, 615 struct drm_i915_gem_pread *args, 616 struct drm_file *file) 617 { 618 char __user *user_data; 619 ssize_t remain; 620 loff_t offset; 621 int shmem_page_offset, page_length, ret = 0; 622 int obj_do_bit17_swizzling, page_do_bit17_swizzling; 623 int prefaulted = 0; 624 int needs_clflush = 0; 625 struct sg_page_iter sg_iter; 626 627 user_data = to_user_ptr(args->data_ptr); 628 remain = args->size; 629 630 obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj); 631 632 ret = i915_gem_obj_prepare_shmem_read(obj, &needs_clflush); 633 if (ret) 634 return ret; 635 636 offset = args->offset; 637 638 for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 639 offset >> PAGE_SHIFT) { 640 struct vm_page *page = sg_page_iter_page(&sg_iter); 641 642 if (remain <= 0) 643 break; 644 645 /* Operation in this page 646 * 647 * shmem_page_offset = offset within page in shmem file 648 * page_length = bytes to copy for this page 649 */ 650 shmem_page_offset = offset_in_page(offset); 651 page_length = remain; 652 if ((shmem_page_offset + page_length) > PAGE_SIZE) 653 page_length = PAGE_SIZE - shmem_page_offset; 654 655 page_do_bit17_swizzling = obj_do_bit17_swizzling && 656 (page_to_phys(page) & (1 << 17)) != 0; 657 658 ret = shmem_pread_fast(page, shmem_page_offset, page_length, 659 user_data, page_do_bit17_swizzling, 660 needs_clflush); 661 if (ret == 0) 662 goto next_page; 663 664 mutex_unlock(&dev->struct_mutex); 665 666 if (likely(!i915.prefault_disable) && !prefaulted) { 667 ret = fault_in_multipages_writeable(user_data, remain); 668 /* Userspace is tricking us, but we've already clobbered 669 * its pages with the prefault and promised to write the 670 * data up to the first fault. Hence ignore any errors 671 * and just continue. */ 672 (void)ret; 673 prefaulted = 1; 674 } 675 676 ret = shmem_pread_slow(page, shmem_page_offset, page_length, 677 user_data, page_do_bit17_swizzling, 678 needs_clflush); 679 680 mutex_lock(&dev->struct_mutex); 681 682 if (ret) 683 goto out; 684 685 next_page: 686 remain -= page_length; 687 user_data += page_length; 688 offset += page_length; 689 } 690 691 out: 692 i915_gem_object_unpin_pages(obj); 693 694 return ret; 695 } 696 697 /** 698 * Reads data from the object referenced by handle. 699 * 700 * On error, the contents of *data are undefined. 701 */ 702 int 703 i915_gem_pread_ioctl(struct drm_device *dev, void *data, 704 struct drm_file *file) 705 { 706 struct drm_i915_gem_pread *args = data; 707 struct drm_i915_gem_object *obj; 708 int ret = 0; 709 710 if (args->size == 0) 711 return 0; 712 713 ret = i915_mutex_lock_interruptible(dev); 714 if (ret) 715 return ret; 716 717 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 718 if (&obj->base == NULL) { 719 ret = -ENOENT; 720 goto unlock; 721 } 722 723 /* Bounds check source. */ 724 if (args->offset > obj->base.size || 725 args->size > obj->base.size - args->offset) { 726 ret = -EINVAL; 727 goto out; 728 } 729 730 /* prime objects have no backing filp to GEM pread/pwrite 731 * pages from. 732 */ 733 734 trace_i915_gem_object_pread(obj, args->offset, args->size); 735 736 ret = i915_gem_shmem_pread(dev, obj, args, file); 737 738 out: 739 drm_gem_object_unreference(&obj->base); 740 unlock: 741 mutex_unlock(&dev->struct_mutex); 742 return ret; 743 } 744 745 /* This is the fast write path which cannot handle 746 * page faults in the source data 747 */ 748 749 static inline int 750 fast_user_write(struct io_mapping *mapping, 751 loff_t page_base, int page_offset, 752 char __user *user_data, 753 int length) 754 { 755 void __iomem *vaddr_atomic; 756 void *vaddr; 757 unsigned long unwritten; 758 759 vaddr_atomic = io_mapping_map_atomic_wc(mapping, page_base); 760 /* We can use the cpu mem copy function because this is X86. */ 761 vaddr = (char __force*)vaddr_atomic + page_offset; 762 unwritten = __copy_from_user_inatomic_nocache(vaddr, 763 user_data, length); 764 io_mapping_unmap_atomic(vaddr_atomic); 765 return unwritten; 766 } 767 768 /** 769 * This is the fast pwrite path, where we copy the data directly from the 770 * user into the GTT, uncached. 771 */ 772 static int 773 i915_gem_gtt_pwrite_fast(struct drm_device *dev, 774 struct drm_i915_gem_object *obj, 775 struct drm_i915_gem_pwrite *args, 776 struct drm_file *file) 777 { 778 struct drm_i915_private *dev_priv = dev->dev_private; 779 ssize_t remain; 780 loff_t offset, page_base; 781 char __user *user_data; 782 int page_offset, page_length, ret; 783 784 ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE | PIN_NONBLOCK); 785 if (ret) 786 goto out; 787 788 ret = i915_gem_object_set_to_gtt_domain(obj, true); 789 if (ret) 790 goto out_unpin; 791 792 ret = i915_gem_object_put_fence(obj); 793 if (ret) 794 goto out_unpin; 795 796 user_data = to_user_ptr(args->data_ptr); 797 remain = args->size; 798 799 offset = i915_gem_obj_ggtt_offset(obj) + args->offset; 800 801 intel_fb_obj_invalidate(obj, NULL, ORIGIN_GTT); 802 803 while (remain > 0) { 804 /* Operation in this page 805 * 806 * page_base = page offset within aperture 807 * page_offset = offset within page 808 * page_length = bytes to copy for this page 809 */ 810 page_base = offset & ~PAGE_MASK; 811 page_offset = offset_in_page(offset); 812 page_length = remain; 813 if ((page_offset + remain) > PAGE_SIZE) 814 page_length = PAGE_SIZE - page_offset; 815 816 /* If we get a fault while copying data, then (presumably) our 817 * source page isn't available. Return the error and we'll 818 * retry in the slow path. 819 */ 820 if (fast_user_write(dev_priv->gtt.mappable, page_base, 821 page_offset, user_data, page_length)) { 822 ret = -EFAULT; 823 goto out_flush; 824 } 825 826 remain -= page_length; 827 user_data += page_length; 828 offset += page_length; 829 } 830 831 out_flush: 832 intel_fb_obj_flush(obj, false); 833 out_unpin: 834 i915_gem_object_ggtt_unpin(obj); 835 out: 836 return ret; 837 } 838 839 /* Per-page copy function for the shmem pwrite fastpath. 840 * Flushes invalid cachelines before writing to the target if 841 * needs_clflush_before is set and flushes out any written cachelines after 842 * writing if needs_clflush is set. */ 843 static int 844 shmem_pwrite_fast(struct vm_page *page, int shmem_page_offset, int page_length, 845 char __user *user_data, 846 bool page_do_bit17_swizzling, 847 bool needs_clflush_before, 848 bool needs_clflush_after) 849 { 850 char *vaddr; 851 int ret; 852 853 if (unlikely(page_do_bit17_swizzling)) 854 return -EINVAL; 855 856 vaddr = kmap_atomic(page); 857 if (needs_clflush_before) 858 drm_clflush_virt_range(vaddr + shmem_page_offset, 859 page_length); 860 ret = __copy_from_user_inatomic(vaddr + shmem_page_offset, 861 user_data, page_length); 862 if (needs_clflush_after) 863 drm_clflush_virt_range(vaddr + shmem_page_offset, 864 page_length); 865 kunmap_atomic(vaddr); 866 867 return ret ? -EFAULT : 0; 868 } 869 870 /* Only difference to the fast-path function is that this can handle bit17 871 * and uses non-atomic copy and kmap functions. */ 872 static int 873 shmem_pwrite_slow(struct vm_page *page, int shmem_page_offset, int page_length, 874 char __user *user_data, 875 bool page_do_bit17_swizzling, 876 bool needs_clflush_before, 877 bool needs_clflush_after) 878 { 879 char *vaddr; 880 int ret; 881 882 vaddr = kmap(page); 883 if (unlikely(needs_clflush_before || page_do_bit17_swizzling)) 884 shmem_clflush_swizzled_range(vaddr + shmem_page_offset, 885 page_length, 886 page_do_bit17_swizzling); 887 if (page_do_bit17_swizzling) 888 ret = __copy_from_user_swizzled(vaddr, shmem_page_offset, 889 user_data, 890 page_length); 891 else 892 ret = __copy_from_user(vaddr + shmem_page_offset, 893 user_data, 894 page_length); 895 if (needs_clflush_after) 896 shmem_clflush_swizzled_range(vaddr + shmem_page_offset, 897 page_length, 898 page_do_bit17_swizzling); 899 kunmap(page); 900 901 return ret ? -EFAULT : 0; 902 } 903 904 static int 905 i915_gem_shmem_pwrite(struct drm_device *dev, 906 struct drm_i915_gem_object *obj, 907 struct drm_i915_gem_pwrite *args, 908 struct drm_file *file) 909 { 910 ssize_t remain; 911 loff_t offset; 912 char __user *user_data; 913 int shmem_page_offset, page_length, ret = 0; 914 int obj_do_bit17_swizzling, page_do_bit17_swizzling; 915 int hit_slowpath = 0; 916 int needs_clflush_after = 0; 917 int needs_clflush_before = 0; 918 struct sg_page_iter sg_iter; 919 920 user_data = to_user_ptr(args->data_ptr); 921 remain = args->size; 922 923 obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj); 924 925 if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) { 926 /* If we're not in the cpu write domain, set ourself into the gtt 927 * write domain and manually flush cachelines (if required). This 928 * optimizes for the case when the gpu will use the data 929 * right away and we therefore have to clflush anyway. */ 930 needs_clflush_after = cpu_write_needs_clflush(obj); 931 ret = i915_gem_object_wait_rendering(obj, false); 932 if (ret) 933 return ret; 934 } 935 /* Same trick applies to invalidate partially written cachelines read 936 * before writing. */ 937 if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) 938 needs_clflush_before = 939 !cpu_cache_is_coherent(dev, obj->cache_level); 940 941 ret = i915_gem_object_get_pages(obj); 942 if (ret) 943 return ret; 944 945 intel_fb_obj_invalidate(obj, NULL, ORIGIN_CPU); 946 947 i915_gem_object_pin_pages(obj); 948 949 offset = args->offset; 950 obj->dirty = 1; 951 952 VM_OBJECT_LOCK(obj->base.vm_obj); 953 vm_object_pip_add(obj->base.vm_obj, 1); 954 955 for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 956 offset >> PAGE_SHIFT) { 957 struct vm_page *page = sg_page_iter_page(&sg_iter); 958 int partial_cacheline_write; 959 960 if (remain <= 0) 961 break; 962 963 /* Operation in this page 964 * 965 * shmem_page_offset = offset within page in shmem file 966 * page_length = bytes to copy for this page 967 */ 968 shmem_page_offset = offset_in_page(offset); 969 970 page_length = remain; 971 if ((shmem_page_offset + page_length) > PAGE_SIZE) 972 page_length = PAGE_SIZE - shmem_page_offset; 973 974 /* If we don't overwrite a cacheline completely we need to be 975 * careful to have up-to-date data by first clflushing. Don't 976 * overcomplicate things and flush the entire patch. */ 977 partial_cacheline_write = needs_clflush_before && 978 ((shmem_page_offset | page_length) 979 & (cpu_clflush_line_size - 1)); 980 981 page_do_bit17_swizzling = obj_do_bit17_swizzling && 982 (page_to_phys(page) & (1 << 17)) != 0; 983 984 ret = shmem_pwrite_fast(page, shmem_page_offset, page_length, 985 user_data, page_do_bit17_swizzling, 986 partial_cacheline_write, 987 needs_clflush_after); 988 if (ret == 0) 989 goto next_page; 990 991 hit_slowpath = 1; 992 mutex_unlock(&dev->struct_mutex); 993 ret = shmem_pwrite_slow(page, shmem_page_offset, page_length, 994 user_data, page_do_bit17_swizzling, 995 partial_cacheline_write, 996 needs_clflush_after); 997 998 mutex_lock(&dev->struct_mutex); 999 1000 if (ret) 1001 goto out; 1002 1003 next_page: 1004 remain -= page_length; 1005 user_data += page_length; 1006 offset += page_length; 1007 } 1008 vm_object_pip_wakeup(obj->base.vm_obj); 1009 VM_OBJECT_UNLOCK(obj->base.vm_obj); 1010 1011 out: 1012 i915_gem_object_unpin_pages(obj); 1013 1014 if (hit_slowpath) { 1015 /* 1016 * Fixup: Flush cpu caches in case we didn't flush the dirty 1017 * cachelines in-line while writing and the object moved 1018 * out of the cpu write domain while we've dropped the lock. 1019 */ 1020 if (!needs_clflush_after && 1021 obj->base.write_domain != I915_GEM_DOMAIN_CPU) { 1022 if (i915_gem_clflush_object(obj, obj->pin_display)) 1023 i915_gem_chipset_flush(dev); 1024 } 1025 } 1026 1027 if (needs_clflush_after) 1028 i915_gem_chipset_flush(dev); 1029 1030 intel_fb_obj_flush(obj, false); 1031 return ret; 1032 } 1033 1034 /** 1035 * Writes data to the object referenced by handle. 1036 * 1037 * On error, the contents of the buffer that were to be modified are undefined. 1038 */ 1039 int 1040 i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, 1041 struct drm_file *file) 1042 { 1043 struct drm_i915_private *dev_priv = dev->dev_private; 1044 struct drm_i915_gem_pwrite *args = data; 1045 struct drm_i915_gem_object *obj; 1046 int ret; 1047 1048 if (args->size == 0) 1049 return 0; 1050 1051 if (likely(!i915.prefault_disable)) { 1052 ret = fault_in_multipages_readable(to_user_ptr(args->data_ptr), 1053 args->size); 1054 if (ret) 1055 return -EFAULT; 1056 } 1057 1058 intel_runtime_pm_get(dev_priv); 1059 1060 ret = i915_mutex_lock_interruptible(dev); 1061 if (ret) 1062 goto put_rpm; 1063 1064 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 1065 if (&obj->base == NULL) { 1066 ret = -ENOENT; 1067 goto unlock; 1068 } 1069 1070 /* Bounds check destination. */ 1071 if (args->offset > obj->base.size || 1072 args->size > obj->base.size - args->offset) { 1073 ret = -EINVAL; 1074 goto out; 1075 } 1076 1077 /* prime objects have no backing filp to GEM pread/pwrite 1078 * pages from. 1079 */ 1080 1081 trace_i915_gem_object_pwrite(obj, args->offset, args->size); 1082 1083 ret = -EFAULT; 1084 /* We can only do the GTT pwrite on untiled buffers, as otherwise 1085 * it would end up going through the fenced access, and we'll get 1086 * different detiling behavior between reading and writing. 1087 * pread/pwrite currently are reading and writing from the CPU 1088 * perspective, requiring manual detiling by the client. 1089 */ 1090 if (obj->tiling_mode == I915_TILING_NONE && 1091 obj->base.write_domain != I915_GEM_DOMAIN_CPU && 1092 cpu_write_needs_clflush(obj)) { 1093 ret = i915_gem_gtt_pwrite_fast(dev, obj, args, file); 1094 /* Note that the gtt paths might fail with non-page-backed user 1095 * pointers (e.g. gtt mappings when moving data between 1096 * textures). Fallback to the shmem path in that case. */ 1097 } 1098 1099 if (ret == -EFAULT || ret == -ENOSPC) { 1100 if (obj->phys_handle) 1101 ret = i915_gem_phys_pwrite(obj, args, file); 1102 else 1103 ret = i915_gem_shmem_pwrite(dev, obj, args, file); 1104 } 1105 1106 out: 1107 drm_gem_object_unreference(&obj->base); 1108 unlock: 1109 mutex_unlock(&dev->struct_mutex); 1110 put_rpm: 1111 intel_runtime_pm_put(dev_priv); 1112 1113 return ret; 1114 } 1115 1116 int 1117 i915_gem_check_wedge(struct i915_gpu_error *error, 1118 bool interruptible) 1119 { 1120 if (i915_reset_in_progress(error)) { 1121 /* Non-interruptible callers can't handle -EAGAIN, hence return 1122 * -EIO unconditionally for these. */ 1123 if (!interruptible) 1124 return -EIO; 1125 1126 /* Recovery complete, but the reset failed ... */ 1127 if (i915_terminally_wedged(error)) 1128 return -EIO; 1129 1130 /* 1131 * Check if GPU Reset is in progress - we need intel_ring_begin 1132 * to work properly to reinit the hw state while the gpu is 1133 * still marked as reset-in-progress. Handle this with a flag. 1134 */ 1135 if (!error->reload_in_reset) 1136 return -EAGAIN; 1137 } 1138 1139 return 0; 1140 } 1141 1142 /* 1143 * Compare arbitrary request against outstanding lazy request. Emit on match. 1144 */ 1145 int 1146 i915_gem_check_olr(struct drm_i915_gem_request *req) 1147 { 1148 int ret; 1149 1150 WARN_ON(!mutex_is_locked(&req->ring->dev->struct_mutex)); 1151 1152 ret = 0; 1153 if (req == req->ring->outstanding_lazy_request) 1154 ret = i915_add_request(req->ring); 1155 1156 return ret; 1157 } 1158 1159 static void fake_irq(unsigned long data) 1160 { 1161 wakeup_one((void *)data); 1162 } 1163 1164 static bool missed_irq(struct drm_i915_private *dev_priv, 1165 struct intel_engine_cs *ring) 1166 { 1167 return test_bit(ring->id, &dev_priv->gpu_error.missed_irq_rings); 1168 } 1169 1170 #if 0 1171 static int __i915_spin_request(struct drm_i915_gem_request *req) 1172 { 1173 unsigned long timeout; 1174 1175 if (i915_gem_request_get_ring(req)->irq_refcount) 1176 return -EBUSY; 1177 1178 timeout = jiffies + 1; 1179 while (!need_resched()) { 1180 if (i915_gem_request_completed(req, true)) 1181 return 0; 1182 1183 if (time_after_eq(jiffies, timeout)) 1184 break; 1185 1186 cpu_relax_lowlatency(); 1187 } 1188 if (i915_gem_request_completed(req, false)) 1189 return 0; 1190 1191 return -EAGAIN; 1192 } 1193 #endif 1194 1195 /** 1196 * __i915_wait_request - wait until execution of request has finished 1197 * @req: duh! 1198 * @reset_counter: reset sequence associated with the given request 1199 * @interruptible: do an interruptible wait (normally yes) 1200 * @timeout: in - how long to wait (NULL forever); out - how much time remaining 1201 * 1202 * Note: It is of utmost importance that the passed in seqno and reset_counter 1203 * values have been read by the caller in an smp safe manner. Where read-side 1204 * locks are involved, it is sufficient to read the reset_counter before 1205 * unlocking the lock that protects the seqno. For lockless tricks, the 1206 * reset_counter _must_ be read before, and an appropriate smp_rmb must be 1207 * inserted. 1208 * 1209 * Returns 0 if the request was found within the alloted time. Else returns the 1210 * errno with remaining time filled in timeout argument. 1211 */ 1212 int __i915_wait_request(struct drm_i915_gem_request *req, 1213 unsigned reset_counter, 1214 bool interruptible, 1215 s64 *timeout, 1216 struct intel_rps_client *rps) 1217 { 1218 struct intel_engine_cs *ring = i915_gem_request_get_ring(req); 1219 struct drm_device *dev = ring->dev; 1220 struct drm_i915_private *dev_priv = dev->dev_private; 1221 const bool irq_test_in_progress = 1222 ACCESS_ONCE(dev_priv->gpu_error.test_irq_rings) & intel_ring_flag(ring); 1223 unsigned long timeout_expire; 1224 s64 before, now; 1225 int ret, sl_timeout = 1; 1226 1227 WARN(!intel_irqs_enabled(dev_priv), "IRQs disabled"); 1228 1229 if (list_empty(&req->list)) 1230 return 0; 1231 1232 if (i915_gem_request_completed(req, true)) 1233 return 0; 1234 1235 timeout_expire = timeout ? 1236 jiffies + nsecs_to_jiffies_timeout((u64)*timeout) : 0; 1237 1238 if (INTEL_INFO(dev_priv)->gen >= 6) 1239 gen6_rps_boost(dev_priv, rps, req->emitted_jiffies); 1240 1241 /* Record current time in case interrupted by signal, or wedged */ 1242 trace_i915_gem_request_wait_begin(req); 1243 before = ktime_get_raw_ns(); 1244 1245 /* Optimistic spin for the next jiffie before touching IRQs */ 1246 #if 0 1247 ret = __i915_spin_request(req); 1248 if (ret == 0) 1249 goto out; 1250 #endif 1251 1252 if (!irq_test_in_progress && WARN_ON(!ring->irq_get(ring))) { 1253 ret = -ENODEV; 1254 goto out; 1255 } 1256 1257 lockmgr(&ring->irq_queue.lock, LK_EXCLUSIVE); 1258 for (;;) { 1259 struct timer_list timer; 1260 1261 /* We need to check whether any gpu reset happened in between 1262 * the caller grabbing the seqno and now ... */ 1263 if (reset_counter != atomic_read(&dev_priv->gpu_error.reset_counter)) { 1264 /* ... but upgrade the -EAGAIN to an -EIO if the gpu 1265 * is truely gone. */ 1266 ret = i915_gem_check_wedge(&dev_priv->gpu_error, interruptible); 1267 if (ret == 0) 1268 ret = -EAGAIN; 1269 break; 1270 } 1271 1272 if (i915_gem_request_completed(req, false)) { 1273 ret = 0; 1274 break; 1275 } 1276 1277 if (interruptible && signal_pending(curthread->td_lwp)) { 1278 ret = -ERESTARTSYS; 1279 break; 1280 } 1281 1282 if (timeout && time_after_eq(jiffies, timeout_expire)) { 1283 ret = -ETIME; 1284 break; 1285 } 1286 1287 timer.function = NULL; 1288 if (timeout || missed_irq(dev_priv, ring)) { 1289 unsigned long expire; 1290 1291 setup_timer_on_stack(&timer, fake_irq, (unsigned long)&ring->irq_queue); 1292 expire = missed_irq(dev_priv, ring) ? jiffies + 1 : timeout_expire; 1293 sl_timeout = expire - jiffies; 1294 if (sl_timeout < 1) 1295 sl_timeout = 1; 1296 mod_timer(&timer, expire); 1297 } 1298 1299 #if 0 1300 io_schedule(); 1301 #endif 1302 1303 if (timer.function) { 1304 del_singleshot_timer_sync(&timer); 1305 destroy_timer_on_stack(&timer); 1306 } 1307 1308 lksleep(&ring->irq_queue, &ring->irq_queue.lock, 1309 interruptible ? PCATCH : 0, "lwe", sl_timeout); 1310 } 1311 lockmgr(&ring->irq_queue.lock, LK_RELEASE); 1312 if (!irq_test_in_progress) 1313 ring->irq_put(ring); 1314 1315 out: 1316 now = ktime_get_raw_ns(); 1317 trace_i915_gem_request_wait_end(req); 1318 1319 if (timeout) { 1320 s64 tres = *timeout - (now - before); 1321 1322 *timeout = tres < 0 ? 0 : tres; 1323 1324 /* 1325 * Apparently ktime isn't accurate enough and occasionally has a 1326 * bit of mismatch in the jiffies<->nsecs<->ktime loop. So patch 1327 * things up to make the test happy. We allow up to 1 jiffy. 1328 * 1329 * This is a regrssion from the timespec->ktime conversion. 1330 */ 1331 if (ret == -ETIME && *timeout < jiffies_to_usecs(1)*1000) 1332 *timeout = 0; 1333 } 1334 1335 return ret; 1336 } 1337 1338 static inline void 1339 i915_gem_request_remove_from_client(struct drm_i915_gem_request *request) 1340 { 1341 struct drm_i915_file_private *file_priv = request->file_priv; 1342 1343 if (!file_priv) 1344 return; 1345 1346 spin_lock(&file_priv->mm.lock); 1347 list_del(&request->client_list); 1348 request->file_priv = NULL; 1349 spin_unlock(&file_priv->mm.lock); 1350 } 1351 1352 static void i915_gem_request_retire(struct drm_i915_gem_request *request) 1353 { 1354 trace_i915_gem_request_retire(request); 1355 1356 /* We know the GPU must have read the request to have 1357 * sent us the seqno + interrupt, so use the position 1358 * of tail of the request to update the last known position 1359 * of the GPU head. 1360 * 1361 * Note this requires that we are always called in request 1362 * completion order. 1363 */ 1364 request->ringbuf->last_retired_head = request->postfix; 1365 1366 list_del_init(&request->list); 1367 i915_gem_request_remove_from_client(request); 1368 1369 #if 0 1370 put_pid(request->pid); 1371 #endif 1372 1373 i915_gem_request_unreference(request); 1374 } 1375 1376 static void 1377 __i915_gem_request_retire__upto(struct drm_i915_gem_request *req) 1378 { 1379 struct intel_engine_cs *engine = req->ring; 1380 struct drm_i915_gem_request *tmp; 1381 1382 lockdep_assert_held(&engine->dev->struct_mutex); 1383 1384 if (list_empty(&req->list)) 1385 return; 1386 1387 do { 1388 tmp = list_first_entry(&engine->request_list, 1389 typeof(*tmp), list); 1390 1391 i915_gem_request_retire(tmp); 1392 } while (tmp != req); 1393 1394 WARN_ON(i915_verify_lists(engine->dev)); 1395 } 1396 1397 /** 1398 * Waits for a request to be signaled, and cleans up the 1399 * request and object lists appropriately for that event. 1400 */ 1401 int 1402 i915_wait_request(struct drm_i915_gem_request *req) 1403 { 1404 struct drm_device *dev; 1405 struct drm_i915_private *dev_priv; 1406 bool interruptible; 1407 int ret; 1408 1409 BUG_ON(req == NULL); 1410 1411 dev = req->ring->dev; 1412 dev_priv = dev->dev_private; 1413 interruptible = dev_priv->mm.interruptible; 1414 1415 BUG_ON(!mutex_is_locked(&dev->struct_mutex)); 1416 1417 ret = i915_gem_check_wedge(&dev_priv->gpu_error, interruptible); 1418 if (ret) 1419 return ret; 1420 1421 ret = i915_gem_check_olr(req); 1422 if (ret) 1423 return ret; 1424 1425 ret = __i915_wait_request(req, 1426 atomic_read(&dev_priv->gpu_error.reset_counter), 1427 interruptible, NULL, NULL); 1428 if (ret) 1429 return ret; 1430 1431 __i915_gem_request_retire__upto(req); 1432 return 0; 1433 } 1434 1435 /** 1436 * Ensures that all rendering to the object has completed and the object is 1437 * safe to unbind from the GTT or access from the CPU. 1438 */ 1439 int 1440 i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj, 1441 bool readonly) 1442 { 1443 int ret, i; 1444 1445 if (!obj->active) 1446 return 0; 1447 1448 if (readonly) { 1449 if (obj->last_write_req != NULL) { 1450 ret = i915_wait_request(obj->last_write_req); 1451 if (ret) 1452 return ret; 1453 1454 i = obj->last_write_req->ring->id; 1455 if (obj->last_read_req[i] == obj->last_write_req) 1456 i915_gem_object_retire__read(obj, i); 1457 else 1458 i915_gem_object_retire__write(obj); 1459 } 1460 } else { 1461 for (i = 0; i < I915_NUM_RINGS; i++) { 1462 if (obj->last_read_req[i] == NULL) 1463 continue; 1464 1465 ret = i915_wait_request(obj->last_read_req[i]); 1466 if (ret) 1467 return ret; 1468 1469 i915_gem_object_retire__read(obj, i); 1470 } 1471 RQ_BUG_ON(obj->active); 1472 } 1473 1474 return 0; 1475 } 1476 1477 static void 1478 i915_gem_object_retire_request(struct drm_i915_gem_object *obj, 1479 struct drm_i915_gem_request *req) 1480 { 1481 int ring = req->ring->id; 1482 1483 if (obj->last_read_req[ring] == req) 1484 i915_gem_object_retire__read(obj, ring); 1485 else if (obj->last_write_req == req) 1486 i915_gem_object_retire__write(obj); 1487 1488 __i915_gem_request_retire__upto(req); 1489 } 1490 1491 /* A nonblocking variant of the above wait. This is a highly dangerous routine 1492 * as the object state may change during this call. 1493 */ 1494 static __must_check int 1495 i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj, 1496 struct intel_rps_client *rps, 1497 bool readonly) 1498 { 1499 struct drm_device *dev = obj->base.dev; 1500 struct drm_i915_private *dev_priv = dev->dev_private; 1501 struct drm_i915_gem_request *requests[I915_NUM_RINGS]; 1502 unsigned reset_counter; 1503 int ret, i, n = 0; 1504 1505 BUG_ON(!mutex_is_locked(&dev->struct_mutex)); 1506 BUG_ON(!dev_priv->mm.interruptible); 1507 1508 if (!obj->active) 1509 return 0; 1510 1511 ret = i915_gem_check_wedge(&dev_priv->gpu_error, true); 1512 if (ret) 1513 return ret; 1514 1515 reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter); 1516 1517 if (readonly) { 1518 struct drm_i915_gem_request *req; 1519 1520 req = obj->last_write_req; 1521 if (req == NULL) 1522 return 0; 1523 1524 ret = i915_gem_check_olr(req); 1525 if (ret) 1526 goto err; 1527 1528 requests[n++] = i915_gem_request_reference(req); 1529 } else { 1530 for (i = 0; i < I915_NUM_RINGS; i++) { 1531 struct drm_i915_gem_request *req; 1532 1533 req = obj->last_read_req[i]; 1534 if (req == NULL) 1535 continue; 1536 1537 ret = i915_gem_check_olr(req); 1538 if (ret) 1539 goto err; 1540 1541 requests[n++] = i915_gem_request_reference(req); 1542 } 1543 } 1544 1545 mutex_unlock(&dev->struct_mutex); 1546 for (i = 0; ret == 0 && i < n; i++) 1547 ret = __i915_wait_request(requests[i], reset_counter, true, 1548 NULL, rps); 1549 mutex_lock(&dev->struct_mutex); 1550 1551 err: 1552 for (i = 0; i < n; i++) { 1553 if (ret == 0) 1554 i915_gem_object_retire_request(obj, requests[i]); 1555 i915_gem_request_unreference(requests[i]); 1556 } 1557 1558 return ret; 1559 } 1560 1561 static struct intel_rps_client *to_rps_client(struct drm_file *file) 1562 { 1563 struct drm_i915_file_private *fpriv = file->driver_priv; 1564 return &fpriv->rps; 1565 } 1566 1567 /** 1568 * Called when user space prepares to use an object with the CPU, either 1569 * through the mmap ioctl's mapping or a GTT mapping. 1570 */ 1571 int 1572 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, 1573 struct drm_file *file) 1574 { 1575 struct drm_i915_gem_set_domain *args = data; 1576 struct drm_i915_gem_object *obj; 1577 uint32_t read_domains = args->read_domains; 1578 uint32_t write_domain = args->write_domain; 1579 int ret; 1580 1581 /* Only handle setting domains to types used by the CPU. */ 1582 if (write_domain & I915_GEM_GPU_DOMAINS) 1583 return -EINVAL; 1584 1585 if (read_domains & I915_GEM_GPU_DOMAINS) 1586 return -EINVAL; 1587 1588 /* Having something in the write domain implies it's in the read 1589 * domain, and only that read domain. Enforce that in the request. 1590 */ 1591 if (write_domain != 0 && read_domains != write_domain) 1592 return -EINVAL; 1593 1594 ret = i915_mutex_lock_interruptible(dev); 1595 if (ret) 1596 return ret; 1597 1598 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 1599 if (&obj->base == NULL) { 1600 ret = -ENOENT; 1601 goto unlock; 1602 } 1603 1604 /* Try to flush the object off the GPU without holding the lock. 1605 * We will repeat the flush holding the lock in the normal manner 1606 * to catch cases where we are gazumped. 1607 */ 1608 ret = i915_gem_object_wait_rendering__nonblocking(obj, 1609 to_rps_client(file), 1610 !write_domain); 1611 if (ret) 1612 goto unref; 1613 1614 if (read_domains & I915_GEM_DOMAIN_GTT) 1615 ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0); 1616 else 1617 ret = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0); 1618 1619 unref: 1620 drm_gem_object_unreference(&obj->base); 1621 unlock: 1622 mutex_unlock(&dev->struct_mutex); 1623 return ret; 1624 } 1625 1626 /** 1627 * Called when user space has done writes to this buffer 1628 */ 1629 int 1630 i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data, 1631 struct drm_file *file) 1632 { 1633 struct drm_i915_gem_sw_finish *args = data; 1634 struct drm_i915_gem_object *obj; 1635 int ret = 0; 1636 1637 ret = i915_mutex_lock_interruptible(dev); 1638 if (ret) 1639 return ret; 1640 1641 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 1642 if (&obj->base == NULL) { 1643 ret = -ENOENT; 1644 goto unlock; 1645 } 1646 1647 /* Pinned buffers may be scanout, so flush the cache */ 1648 if (obj->pin_display) 1649 i915_gem_object_flush_cpu_write_domain(obj); 1650 1651 drm_gem_object_unreference(&obj->base); 1652 unlock: 1653 mutex_unlock(&dev->struct_mutex); 1654 return ret; 1655 } 1656 1657 /** 1658 * Maps the contents of an object, returning the address it is mapped 1659 * into. 1660 * 1661 * While the mapping holds a reference on the contents of the object, it doesn't 1662 * imply a ref on the object itself. 1663 * 1664 * IMPORTANT: 1665 * 1666 * DRM driver writers who look a this function as an example for how to do GEM 1667 * mmap support, please don't implement mmap support like here. The modern way 1668 * to implement DRM mmap support is with an mmap offset ioctl (like 1669 * i915_gem_mmap_gtt) and then using the mmap syscall on the DRM fd directly. 1670 * That way debug tooling like valgrind will understand what's going on, hiding 1671 * the mmap call in a driver private ioctl will break that. The i915 driver only 1672 * does cpu mmaps this way because we didn't know better. 1673 */ 1674 int 1675 i915_gem_mmap_ioctl(struct drm_device *dev, void *data, 1676 struct drm_file *file) 1677 { 1678 struct drm_i915_gem_mmap *args = data; 1679 struct drm_gem_object *obj; 1680 unsigned long addr; 1681 1682 struct proc *p = curproc; 1683 vm_map_t map = &p->p_vmspace->vm_map; 1684 vm_size_t size; 1685 int error = 0, rv; 1686 1687 if (args->flags & ~(I915_MMAP_WC)) 1688 return -EINVAL; 1689 1690 obj = drm_gem_object_lookup(dev, file, args->handle); 1691 if (obj == NULL) 1692 return -ENOENT; 1693 1694 if (args->size == 0) 1695 goto out; 1696 1697 size = round_page(args->size); 1698 if (map->size + size > p->p_rlimit[RLIMIT_VMEM].rlim_cur) { 1699 error = -ENOMEM; 1700 goto out; 1701 } 1702 1703 /* prime objects have no backing filp to GEM mmap 1704 * pages from. 1705 */ 1706 1707 /* 1708 * Call hint to ensure that NULL is not returned as a valid address 1709 * and to reduce vm_map traversals. XXX causes instability, use a 1710 * fixed low address as the start point instead to avoid the NULL 1711 * return issue. 1712 */ 1713 1714 addr = PAGE_SIZE; 1715 1716 /* 1717 * Use 256KB alignment. It is unclear why this matters for a 1718 * virtual address but it appears to fix a number of application/X 1719 * crashes and kms console switching is much faster. 1720 */ 1721 vm_object_hold(obj->vm_obj); 1722 vm_object_reference_locked(obj->vm_obj); 1723 vm_object_drop(obj->vm_obj); 1724 1725 rv = vm_map_find(map, obj->vm_obj, NULL, 1726 args->offset, &addr, args->size, 1727 256 * 1024, /* align */ 1728 TRUE, /* fitit */ 1729 VM_MAPTYPE_NORMAL, /* maptype */ 1730 VM_PROT_READ | VM_PROT_WRITE, /* prot */ 1731 VM_PROT_READ | VM_PROT_WRITE, /* max */ 1732 MAP_SHARED /* cow */); 1733 if (rv != KERN_SUCCESS) { 1734 vm_object_deallocate(obj->vm_obj); 1735 error = -vm_mmap_to_errno(rv); 1736 } else { 1737 args->addr_ptr = (uint64_t)addr; 1738 } 1739 out: 1740 drm_gem_object_unreference(obj); 1741 return (error); 1742 } 1743 1744 /** 1745 * i915_gem_fault - fault a page into the GTT 1746 * 1747 * vm_obj is locked on entry and expected to be locked on return. 1748 * 1749 * The vm_pager has placemarked the object with an anonymous memory page 1750 * which we must replace atomically to avoid races against concurrent faults 1751 * on the same page. XXX we currently are unable to do this atomically. 1752 * 1753 * If we are to return an error we should not touch the anonymous page, 1754 * the caller will deallocate it. 1755 * 1756 * XXX Most GEM calls appear to be interruptable, but we can't hard loop 1757 * in that case. Release all resources and wait 1 tick before retrying. 1758 * This is a huge problem which needs to be fixed by getting rid of most 1759 * of the interruptability. The linux code does not retry but does appear 1760 * to have some sort of mechanism (VM_FAULT_NOPAGE ?) for the higher level 1761 * to be able to retry. 1762 * 1763 * -- 1764 * vma: VMA in question 1765 * vmf: fault info 1766 * 1767 * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped 1768 * from userspace. The fault handler takes care of binding the object to 1769 * the GTT (if needed), allocating and programming a fence register (again, 1770 * only if needed based on whether the old reg is still valid or the object 1771 * is tiled) and inserting a new PTE into the faulting process. 1772 * 1773 * Note that the faulting process may involve evicting existing objects 1774 * from the GTT and/or fence registers to make room. So performance may 1775 * suffer if the GTT working set is large or there are few fence registers 1776 * left. 1777 * 1778 * vm_obj is locked on entry and expected to be locked on return. The VM 1779 * pager has placed an anonymous memory page at (obj,offset) which we have 1780 * to replace. 1781 */ 1782 int i915_gem_fault(vm_object_t vm_obj, vm_ooffset_t offset, int prot, vm_page_t *mres) 1783 { 1784 struct drm_i915_gem_object *obj = to_intel_bo(vm_obj->handle); 1785 struct drm_device *dev = obj->base.dev; 1786 struct drm_i915_private *dev_priv = dev->dev_private; 1787 struct i915_ggtt_view view = i915_ggtt_view_normal; 1788 unsigned long page_offset; 1789 vm_page_t m, oldm = NULL; 1790 int ret = 0; 1791 bool write = !!(prot & VM_PROT_WRITE); 1792 1793 intel_runtime_pm_get(dev_priv); 1794 1795 /* We don't use vmf->pgoff since that has the fake offset */ 1796 page_offset = (unsigned long)offset; 1797 1798 retry: 1799 ret = i915_mutex_lock_interruptible(dev); 1800 if (ret) 1801 goto out; 1802 1803 trace_i915_gem_object_fault(obj, page_offset, true, write); 1804 1805 /* Try to flush the object off the GPU first without holding the lock. 1806 * Upon reacquiring the lock, we will perform our sanity checks and then 1807 * repeat the flush holding the lock in the normal manner to catch cases 1808 * where we are gazumped. 1809 */ 1810 ret = i915_gem_object_wait_rendering__nonblocking(obj, NULL, !write); 1811 if (ret) 1812 goto unlock; 1813 1814 /* Access to snoopable pages through the GTT is incoherent. */ 1815 if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(dev)) { 1816 ret = -EFAULT; 1817 goto unlock; 1818 } 1819 1820 /* Use a partial view if the object is bigger than the aperture. */ 1821 if (obj->base.size >= dev_priv->gtt.mappable_end && 1822 obj->tiling_mode == I915_TILING_NONE) { 1823 #if 0 1824 static const unsigned int chunk_size = 256; // 1 MiB 1825 1826 memset(&view, 0, sizeof(view)); 1827 view.type = I915_GGTT_VIEW_PARTIAL; 1828 view.params.partial.offset = rounddown(page_offset, chunk_size); 1829 view.params.partial.size = 1830 min_t(unsigned int, 1831 chunk_size, 1832 (vma->vm_end - vma->vm_start)/PAGE_SIZE - 1833 view.params.partial.offset); 1834 #endif 1835 } 1836 1837 /* Now pin it into the GTT if needed */ 1838 ret = i915_gem_object_ggtt_pin(obj, &view, 0, PIN_MAPPABLE); 1839 if (ret) 1840 goto unlock; 1841 1842 ret = i915_gem_object_set_to_gtt_domain(obj, write); 1843 if (ret) 1844 goto unpin; 1845 1846 ret = i915_gem_object_get_fence(obj); 1847 if (ret) 1848 goto unpin; 1849 1850 /* 1851 * START FREEBSD MAGIC 1852 * 1853 * Add a pip count to avoid destruction and certain other 1854 * complex operations (such as collapses?) while unlocked. 1855 */ 1856 vm_object_pip_add(vm_obj, 1); 1857 1858 /* 1859 * XXX We must currently remove the placeholder page now to avoid 1860 * a deadlock against a concurrent i915_gem_release_mmap(). 1861 * Otherwise concurrent operation will block on the busy page 1862 * while holding locks which we need to obtain. 1863 */ 1864 if (*mres != NULL) { 1865 oldm = *mres; 1866 if ((oldm->flags & PG_BUSY) == 0) 1867 kprintf("i915_gem_fault: Page was not busy\n"); 1868 else 1869 vm_page_remove(oldm); 1870 *mres = NULL; 1871 } else { 1872 oldm = NULL; 1873 } 1874 1875 ret = 0; 1876 m = NULL; 1877 1878 /* 1879 * Since the object lock was dropped, another thread might have 1880 * faulted on the same GTT address and instantiated the mapping. 1881 * Recheck. 1882 */ 1883 m = vm_page_lookup(vm_obj, OFF_TO_IDX(offset)); 1884 if (m != NULL) { 1885 /* 1886 * Try to busy the page, retry on failure (non-zero ret). 1887 */ 1888 if (vm_page_busy_try(m, false)) { 1889 kprintf("i915_gem_fault: PG_BUSY\n"); 1890 ret = -EINTR; 1891 goto unlock; 1892 } 1893 goto have_page; 1894 } 1895 /* 1896 * END FREEBSD MAGIC 1897 */ 1898 1899 obj->fault_mappable = true; 1900 1901 /* Finally, remap it using the new GTT offset */ 1902 m = vm_phys_fictitious_to_vm_page(dev_priv->gtt.mappable_base + 1903 i915_gem_obj_ggtt_offset_view(obj, &view) + offset); 1904 if (m == NULL) { 1905 ret = -EFAULT; 1906 goto unpin; 1907 } 1908 KASSERT((m->flags & PG_FICTITIOUS) != 0, ("not fictitious %p", m)); 1909 KASSERT(m->wire_count == 1, ("wire_count not 1 %p", m)); 1910 1911 /* 1912 * Try to busy the page. Fails on non-zero return. 1913 */ 1914 if (vm_page_busy_try(m, false)) { 1915 kprintf("i915_gem_fault: PG_BUSY(2)\n"); 1916 ret = -EINTR; 1917 goto unpin; 1918 } 1919 m->valid = VM_PAGE_BITS_ALL; 1920 1921 #if 0 1922 if (unlikely(view.type == I915_GGTT_VIEW_PARTIAL)) { 1923 /* Overriding existing pages in partial view does not cause 1924 * us any trouble as TLBs are still valid because the fault 1925 * is due to userspace losing part of the mapping or never 1926 * having accessed it before (at this partials' range). 1927 */ 1928 unsigned long base = vma->vm_start + 1929 (view.params.partial.offset << PAGE_SHIFT); 1930 unsigned int i; 1931 1932 for (i = 0; i < view.params.partial.size; i++) { 1933 ret = vm_insert_pfn(vma, base + i * PAGE_SIZE, pfn + i); 1934 if (ret) 1935 break; 1936 } 1937 1938 obj->fault_mappable = true; 1939 } else { 1940 if (!obj->fault_mappable) { 1941 unsigned long size = min_t(unsigned long, 1942 vma->vm_end - vma->vm_start, 1943 obj->base.size); 1944 int i; 1945 1946 for (i = 0; i < size >> PAGE_SHIFT; i++) { 1947 ret = vm_insert_pfn(vma, 1948 (unsigned long)vma->vm_start + i * PAGE_SIZE, 1949 pfn + i); 1950 if (ret) 1951 break; 1952 } 1953 1954 obj->fault_mappable = true; 1955 } else 1956 ret = vm_insert_pfn(vma, 1957 (unsigned long)vmf->virtual_address, 1958 pfn + page_offset); 1959 #endif 1960 vm_page_insert(m, vm_obj, OFF_TO_IDX(offset)); 1961 #if 0 1962 } 1963 #endif 1964 1965 have_page: 1966 *mres = m; 1967 1968 i915_gem_object_ggtt_unpin_view(obj, &view); 1969 mutex_unlock(&dev->struct_mutex); 1970 ret = VM_PAGER_OK; 1971 goto done; 1972 1973 /* 1974 * ALTERNATIVE ERROR RETURN. 1975 * 1976 * OBJECT EXPECTED TO BE LOCKED. 1977 */ 1978 unpin: 1979 i915_gem_object_ggtt_unpin_view(obj, &view); 1980 unlock: 1981 mutex_unlock(&dev->struct_mutex); 1982 out: 1983 switch (ret) { 1984 case -EIO: 1985 /* 1986 * We eat errors when the gpu is terminally wedged to avoid 1987 * userspace unduly crashing (gl has no provisions for mmaps to 1988 * fail). But any other -EIO isn't ours (e.g. swap in failure) 1989 * and so needs to be reported. 1990 */ 1991 if (!i915_terminally_wedged(&dev_priv->gpu_error)) { 1992 // ret = VM_FAULT_SIGBUS; 1993 break; 1994 } 1995 case -EAGAIN: 1996 /* 1997 * EAGAIN means the gpu is hung and we'll wait for the error 1998 * handler to reset everything when re-faulting in 1999 * i915_mutex_lock_interruptible. 2000 */ 2001 case -ERESTARTSYS: 2002 case -EINTR: 2003 VM_OBJECT_UNLOCK(vm_obj); 2004 int dummy; 2005 tsleep(&dummy, 0, "delay", 1); /* XXX */ 2006 VM_OBJECT_LOCK(vm_obj); 2007 goto retry; 2008 default: 2009 WARN_ONCE(ret, "unhandled error in i915_gem_fault: %i\n", ret); 2010 ret = VM_PAGER_ERROR; 2011 break; 2012 } 2013 2014 done: 2015 if (oldm != NULL) 2016 vm_page_free(oldm); 2017 vm_object_pip_wakeup(vm_obj); 2018 2019 intel_runtime_pm_put(dev_priv); 2020 return ret; 2021 } 2022 2023 /** 2024 * i915_gem_release_mmap - remove physical page mappings 2025 * @obj: obj in question 2026 * 2027 * Preserve the reservation of the mmapping with the DRM core code, but 2028 * relinquish ownership of the pages back to the system. 2029 * 2030 * It is vital that we remove the page mapping if we have mapped a tiled 2031 * object through the GTT and then lose the fence register due to 2032 * resource pressure. Similarly if the object has been moved out of the 2033 * aperture, than pages mapped into userspace must be revoked. Removing the 2034 * mapping will then trigger a page fault on the next user access, allowing 2035 * fixup by i915_gem_fault(). 2036 */ 2037 void 2038 i915_gem_release_mmap(struct drm_i915_gem_object *obj) 2039 { 2040 vm_object_t devobj; 2041 vm_page_t m; 2042 int i, page_count; 2043 2044 if (!obj->fault_mappable) 2045 return; 2046 2047 devobj = cdev_pager_lookup(obj); 2048 if (devobj != NULL) { 2049 page_count = OFF_TO_IDX(obj->base.size); 2050 2051 VM_OBJECT_LOCK(devobj); 2052 for (i = 0; i < page_count; i++) { 2053 m = vm_page_lookup_busy_wait(devobj, i, TRUE, "915unm"); 2054 if (m == NULL) 2055 continue; 2056 cdev_pager_free_page(devobj, m); 2057 } 2058 VM_OBJECT_UNLOCK(devobj); 2059 vm_object_deallocate(devobj); 2060 } 2061 2062 obj->fault_mappable = false; 2063 } 2064 2065 void 2066 i915_gem_release_all_mmaps(struct drm_i915_private *dev_priv) 2067 { 2068 struct drm_i915_gem_object *obj; 2069 2070 list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) 2071 i915_gem_release_mmap(obj); 2072 } 2073 2074 uint32_t 2075 i915_gem_get_gtt_size(struct drm_device *dev, uint32_t size, int tiling_mode) 2076 { 2077 uint32_t gtt_size; 2078 2079 if (INTEL_INFO(dev)->gen >= 4 || 2080 tiling_mode == I915_TILING_NONE) 2081 return size; 2082 2083 /* Previous chips need a power-of-two fence region when tiling */ 2084 if (INTEL_INFO(dev)->gen == 3) 2085 gtt_size = 1024*1024; 2086 else 2087 gtt_size = 512*1024; 2088 2089 while (gtt_size < size) 2090 gtt_size <<= 1; 2091 2092 return gtt_size; 2093 } 2094 2095 /** 2096 * i915_gem_get_gtt_alignment - return required GTT alignment for an object 2097 * @obj: object to check 2098 * 2099 * Return the required GTT alignment for an object, taking into account 2100 * potential fence register mapping. 2101 */ 2102 uint32_t 2103 i915_gem_get_gtt_alignment(struct drm_device *dev, uint32_t size, 2104 int tiling_mode, bool fenced) 2105 { 2106 /* 2107 * Minimum alignment is 4k (GTT page size), but might be greater 2108 * if a fence register is needed for the object. 2109 */ 2110 if (INTEL_INFO(dev)->gen >= 4 || (!fenced && IS_G33(dev)) || 2111 tiling_mode == I915_TILING_NONE) 2112 return 4096; 2113 2114 /* 2115 * Previous chips need to be aligned to the size of the smallest 2116 * fence register that can contain the object. 2117 */ 2118 return i915_gem_get_gtt_size(dev, size, tiling_mode); 2119 } 2120 2121 static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj) 2122 { 2123 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 2124 int ret; 2125 2126 #if 0 2127 if (drm_vma_node_has_offset(&obj->base.vma_node)) 2128 return 0; 2129 #endif 2130 2131 dev_priv->mm.shrinker_no_lock_stealing = true; 2132 2133 ret = drm_gem_create_mmap_offset(&obj->base); 2134 if (ret != -ENOSPC) 2135 goto out; 2136 2137 /* Badly fragmented mmap space? The only way we can recover 2138 * space is by destroying unwanted objects. We can't randomly release 2139 * mmap_offsets as userspace expects them to be persistent for the 2140 * lifetime of the objects. The closest we can is to release the 2141 * offsets on purgeable objects by truncating it and marking it purged, 2142 * which prevents userspace from ever using that object again. 2143 */ 2144 i915_gem_shrink(dev_priv, 2145 obj->base.size >> PAGE_SHIFT, 2146 I915_SHRINK_BOUND | 2147 I915_SHRINK_UNBOUND | 2148 I915_SHRINK_PURGEABLE); 2149 ret = drm_gem_create_mmap_offset(&obj->base); 2150 if (ret != -ENOSPC) 2151 goto out; 2152 2153 i915_gem_shrink_all(dev_priv); 2154 ret = drm_gem_create_mmap_offset(&obj->base); 2155 out: 2156 dev_priv->mm.shrinker_no_lock_stealing = false; 2157 2158 return ret; 2159 } 2160 2161 static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj) 2162 { 2163 drm_gem_free_mmap_offset(&obj->base); 2164 } 2165 2166 int 2167 i915_gem_mmap_gtt(struct drm_file *file, 2168 struct drm_device *dev, 2169 uint32_t handle, 2170 uint64_t *offset) 2171 { 2172 struct drm_i915_gem_object *obj; 2173 int ret; 2174 2175 ret = i915_mutex_lock_interruptible(dev); 2176 if (ret) 2177 return ret; 2178 2179 obj = to_intel_bo(drm_gem_object_lookup(dev, file, handle)); 2180 if (&obj->base == NULL) { 2181 ret = -ENOENT; 2182 goto unlock; 2183 } 2184 2185 if (obj->madv != I915_MADV_WILLNEED) { 2186 DRM_DEBUG("Attempting to mmap a purgeable buffer\n"); 2187 ret = -EFAULT; 2188 goto out; 2189 } 2190 2191 ret = i915_gem_object_create_mmap_offset(obj); 2192 if (ret) 2193 goto out; 2194 2195 *offset = DRM_GEM_MAPPING_OFF(obj->base.map_list.key) | 2196 DRM_GEM_MAPPING_KEY; 2197 2198 out: 2199 drm_gem_object_unreference(&obj->base); 2200 unlock: 2201 mutex_unlock(&dev->struct_mutex); 2202 return ret; 2203 } 2204 2205 /** 2206 * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing 2207 * @dev: DRM device 2208 * @data: GTT mapping ioctl data 2209 * @file: GEM object info 2210 * 2211 * Simply returns the fake offset to userspace so it can mmap it. 2212 * The mmap call will end up in drm_gem_mmap(), which will set things 2213 * up so we can get faults in the handler above. 2214 * 2215 * The fault handler will take care of binding the object into the GTT 2216 * (since it may have been evicted to make room for something), allocating 2217 * a fence register, and mapping the appropriate aperture address into 2218 * userspace. 2219 */ 2220 int 2221 i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data, 2222 struct drm_file *file) 2223 { 2224 struct drm_i915_gem_mmap_gtt *args = data; 2225 2226 return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset); 2227 } 2228 2229 /* Immediately discard the backing storage */ 2230 static void 2231 i915_gem_object_truncate(struct drm_i915_gem_object *obj) 2232 { 2233 vm_object_t vm_obj; 2234 2235 vm_obj = obj->base.vm_obj; 2236 VM_OBJECT_LOCK(vm_obj); 2237 vm_object_page_remove(vm_obj, 0, 0, false); 2238 VM_OBJECT_UNLOCK(vm_obj); 2239 2240 obj->madv = __I915_MADV_PURGED; 2241 } 2242 2243 /* Try to discard unwanted pages */ 2244 static void 2245 i915_gem_object_invalidate(struct drm_i915_gem_object *obj) 2246 { 2247 #if 0 2248 struct address_space *mapping; 2249 #endif 2250 2251 switch (obj->madv) { 2252 case I915_MADV_DONTNEED: 2253 i915_gem_object_truncate(obj); 2254 case __I915_MADV_PURGED: 2255 return; 2256 } 2257 2258 #if 0 2259 if (obj->base.filp == NULL) 2260 return; 2261 2262 mapping = file_inode(obj->base.filp)->i_mapping, 2263 invalidate_mapping_pages(mapping, 0, (loff_t)-1); 2264 #endif 2265 } 2266 2267 static void 2268 i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj) 2269 { 2270 struct sg_page_iter sg_iter; 2271 int ret; 2272 2273 BUG_ON(obj->madv == __I915_MADV_PURGED); 2274 2275 ret = i915_gem_object_set_to_cpu_domain(obj, true); 2276 if (ret) { 2277 /* In the event of a disaster, abandon all caches and 2278 * hope for the best. 2279 */ 2280 WARN_ON(ret != -EIO); 2281 i915_gem_clflush_object(obj, true); 2282 obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU; 2283 } 2284 2285 i915_gem_gtt_finish_object(obj); 2286 2287 if (i915_gem_object_needs_bit17_swizzle(obj)) 2288 i915_gem_object_save_bit_17_swizzle(obj); 2289 2290 if (obj->madv == I915_MADV_DONTNEED) 2291 obj->dirty = 0; 2292 2293 for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 0) { 2294 struct vm_page *page = sg_page_iter_page(&sg_iter); 2295 2296 if (obj->dirty) 2297 set_page_dirty(page); 2298 2299 if (obj->madv == I915_MADV_WILLNEED) 2300 mark_page_accessed(page); 2301 2302 vm_page_busy_wait(page, FALSE, "i915gem"); 2303 vm_page_unwire(page, 1); 2304 vm_page_wakeup(page); 2305 } 2306 obj->dirty = 0; 2307 2308 sg_free_table(obj->pages); 2309 kfree(obj->pages); 2310 } 2311 2312 int 2313 i915_gem_object_put_pages(struct drm_i915_gem_object *obj) 2314 { 2315 const struct drm_i915_gem_object_ops *ops = obj->ops; 2316 2317 if (obj->pages == NULL) 2318 return 0; 2319 2320 if (obj->pages_pin_count) 2321 return -EBUSY; 2322 2323 BUG_ON(i915_gem_obj_bound_any(obj)); 2324 2325 /* ->put_pages might need to allocate memory for the bit17 swizzle 2326 * array, hence protect them from being reaped by removing them from gtt 2327 * lists early. */ 2328 list_del(&obj->global_list); 2329 2330 ops->put_pages(obj); 2331 obj->pages = NULL; 2332 2333 i915_gem_object_invalidate(obj); 2334 2335 return 0; 2336 } 2337 2338 static int 2339 i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) 2340 { 2341 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 2342 int page_count, i; 2343 vm_object_t vm_obj; 2344 struct sg_table *st; 2345 struct scatterlist *sg; 2346 struct sg_page_iter sg_iter; 2347 struct vm_page *page; 2348 unsigned long last_pfn = 0; /* suppress gcc warning */ 2349 int ret; 2350 2351 /* Assert that the object is not currently in any GPU domain. As it 2352 * wasn't in the GTT, there shouldn't be any way it could have been in 2353 * a GPU cache 2354 */ 2355 BUG_ON(obj->base.read_domains & I915_GEM_GPU_DOMAINS); 2356 BUG_ON(obj->base.write_domain & I915_GEM_GPU_DOMAINS); 2357 2358 st = kmalloc(sizeof(*st), M_DRM, M_WAITOK); 2359 if (st == NULL) 2360 return -ENOMEM; 2361 2362 page_count = obj->base.size / PAGE_SIZE; 2363 if (sg_alloc_table(st, page_count, GFP_KERNEL)) { 2364 kfree(st); 2365 return -ENOMEM; 2366 } 2367 2368 /* Get the list of pages out of our struct file. They'll be pinned 2369 * at this point until we release them. 2370 * 2371 * Fail silently without starting the shrinker 2372 */ 2373 vm_obj = obj->base.vm_obj; 2374 VM_OBJECT_LOCK(vm_obj); 2375 sg = st->sgl; 2376 st->nents = 0; 2377 for (i = 0; i < page_count; i++) { 2378 page = shmem_read_mapping_page(vm_obj, i); 2379 if (IS_ERR(page)) { 2380 i915_gem_shrink(dev_priv, 2381 page_count, 2382 I915_SHRINK_BOUND | 2383 I915_SHRINK_UNBOUND | 2384 I915_SHRINK_PURGEABLE); 2385 page = shmem_read_mapping_page(vm_obj, i); 2386 } 2387 if (IS_ERR(page)) { 2388 /* We've tried hard to allocate the memory by reaping 2389 * our own buffer, now let the real VM do its job and 2390 * go down in flames if truly OOM. 2391 */ 2392 i915_gem_shrink_all(dev_priv); 2393 page = shmem_read_mapping_page(vm_obj, i); 2394 if (IS_ERR(page)) { 2395 ret = PTR_ERR(page); 2396 goto err_pages; 2397 } 2398 } 2399 #ifdef CONFIG_SWIOTLB 2400 if (swiotlb_nr_tbl()) { 2401 st->nents++; 2402 sg_set_page(sg, page, PAGE_SIZE, 0); 2403 sg = sg_next(sg); 2404 continue; 2405 } 2406 #endif 2407 if (!i || page_to_pfn(page) != last_pfn + 1) { 2408 if (i) 2409 sg = sg_next(sg); 2410 st->nents++; 2411 sg_set_page(sg, page, PAGE_SIZE, 0); 2412 } else { 2413 sg->length += PAGE_SIZE; 2414 } 2415 last_pfn = page_to_pfn(page); 2416 2417 /* Check that the i965g/gm workaround works. */ 2418 } 2419 #ifdef CONFIG_SWIOTLB 2420 if (!swiotlb_nr_tbl()) 2421 #endif 2422 sg_mark_end(sg); 2423 obj->pages = st; 2424 VM_OBJECT_UNLOCK(vm_obj); 2425 2426 ret = i915_gem_gtt_prepare_object(obj); 2427 if (ret) 2428 goto err_pages; 2429 2430 if (i915_gem_object_needs_bit17_swizzle(obj)) 2431 i915_gem_object_do_bit_17_swizzle(obj); 2432 2433 if (obj->tiling_mode != I915_TILING_NONE && 2434 dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) 2435 i915_gem_object_pin_pages(obj); 2436 2437 return 0; 2438 2439 err_pages: 2440 sg_mark_end(sg); 2441 for_each_sg_page(st->sgl, &sg_iter, st->nents, 0) { 2442 page = sg_page_iter_page(&sg_iter); 2443 vm_page_busy_wait(page, FALSE, "i915gem"); 2444 vm_page_unwire(page, 0); 2445 vm_page_wakeup(page); 2446 } 2447 VM_OBJECT_UNLOCK(vm_obj); 2448 sg_free_table(st); 2449 kfree(st); 2450 2451 /* shmemfs first checks if there is enough memory to allocate the page 2452 * and reports ENOSPC should there be insufficient, along with the usual 2453 * ENOMEM for a genuine allocation failure. 2454 * 2455 * We use ENOSPC in our driver to mean that we have run out of aperture 2456 * space and so want to translate the error from shmemfs back to our 2457 * usual understanding of ENOMEM. 2458 */ 2459 if (ret == -ENOSPC) 2460 ret = -ENOMEM; 2461 2462 return ret; 2463 } 2464 2465 /* Ensure that the associated pages are gathered from the backing storage 2466 * and pinned into our object. i915_gem_object_get_pages() may be called 2467 * multiple times before they are released by a single call to 2468 * i915_gem_object_put_pages() - once the pages are no longer referenced 2469 * either as a result of memory pressure (reaping pages under the shrinker) 2470 * or as the object is itself released. 2471 */ 2472 int 2473 i915_gem_object_get_pages(struct drm_i915_gem_object *obj) 2474 { 2475 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 2476 const struct drm_i915_gem_object_ops *ops = obj->ops; 2477 int ret; 2478 2479 if (obj->pages) 2480 return 0; 2481 2482 if (obj->madv != I915_MADV_WILLNEED) { 2483 DRM_DEBUG("Attempting to obtain a purgeable object\n"); 2484 return -EFAULT; 2485 } 2486 2487 BUG_ON(obj->pages_pin_count); 2488 2489 ret = ops->get_pages(obj); 2490 if (ret) 2491 return ret; 2492 2493 list_add_tail(&obj->global_list, &dev_priv->mm.unbound_list); 2494 2495 obj->get_page.sg = obj->pages->sgl; 2496 obj->get_page.last = 0; 2497 2498 return 0; 2499 } 2500 2501 void i915_vma_move_to_active(struct i915_vma *vma, 2502 struct intel_engine_cs *ring) 2503 { 2504 struct drm_i915_gem_object *obj = vma->obj; 2505 2506 /* Add a reference if we're newly entering the active list. */ 2507 if (obj->active == 0) 2508 drm_gem_object_reference(&obj->base); 2509 obj->active |= intel_ring_flag(ring); 2510 2511 list_move_tail(&obj->ring_list[ring->id], &ring->active_list); 2512 i915_gem_request_assign(&obj->last_read_req[ring->id], 2513 intel_ring_get_request(ring)); 2514 2515 list_move_tail(&vma->mm_list, &vma->vm->active_list); 2516 } 2517 2518 static void 2519 i915_gem_object_retire__write(struct drm_i915_gem_object *obj) 2520 { 2521 RQ_BUG_ON(obj->last_write_req == NULL); 2522 RQ_BUG_ON(!(obj->active & intel_ring_flag(obj->last_write_req->ring))); 2523 2524 i915_gem_request_assign(&obj->last_write_req, NULL); 2525 intel_fb_obj_flush(obj, true); 2526 } 2527 2528 static void 2529 i915_gem_object_retire__read(struct drm_i915_gem_object *obj, int ring) 2530 { 2531 struct i915_vma *vma; 2532 2533 RQ_BUG_ON(obj->last_read_req[ring] == NULL); 2534 RQ_BUG_ON(!(obj->active & (1 << ring))); 2535 2536 list_del_init(&obj->ring_list[ring]); 2537 i915_gem_request_assign(&obj->last_read_req[ring], NULL); 2538 2539 if (obj->last_write_req && obj->last_write_req->ring->id == ring) 2540 i915_gem_object_retire__write(obj); 2541 2542 obj->active &= ~(1 << ring); 2543 if (obj->active) 2544 return; 2545 2546 list_for_each_entry(vma, &obj->vma_list, vma_link) { 2547 if (!list_empty(&vma->mm_list)) 2548 list_move_tail(&vma->mm_list, &vma->vm->inactive_list); 2549 } 2550 2551 i915_gem_request_assign(&obj->last_fenced_req, NULL); 2552 drm_gem_object_unreference(&obj->base); 2553 } 2554 2555 static int 2556 i915_gem_init_seqno(struct drm_device *dev, u32 seqno) 2557 { 2558 struct drm_i915_private *dev_priv = dev->dev_private; 2559 struct intel_engine_cs *ring; 2560 int ret, i, j; 2561 2562 /* Carefully retire all requests without writing to the rings */ 2563 for_each_ring(ring, dev_priv, i) { 2564 ret = intel_ring_idle(ring); 2565 if (ret) 2566 return ret; 2567 } 2568 i915_gem_retire_requests(dev); 2569 2570 /* Finally reset hw state */ 2571 for_each_ring(ring, dev_priv, i) { 2572 intel_ring_init_seqno(ring, seqno); 2573 2574 for (j = 0; j < ARRAY_SIZE(ring->semaphore.sync_seqno); j++) 2575 ring->semaphore.sync_seqno[j] = 0; 2576 } 2577 2578 return 0; 2579 } 2580 2581 int i915_gem_set_seqno(struct drm_device *dev, u32 seqno) 2582 { 2583 struct drm_i915_private *dev_priv = dev->dev_private; 2584 int ret; 2585 2586 if (seqno == 0) 2587 return -EINVAL; 2588 2589 /* HWS page needs to be set less than what we 2590 * will inject to ring 2591 */ 2592 ret = i915_gem_init_seqno(dev, seqno - 1); 2593 if (ret) 2594 return ret; 2595 2596 /* Carefully set the last_seqno value so that wrap 2597 * detection still works 2598 */ 2599 dev_priv->next_seqno = seqno; 2600 dev_priv->last_seqno = seqno - 1; 2601 if (dev_priv->last_seqno == 0) 2602 dev_priv->last_seqno--; 2603 2604 return 0; 2605 } 2606 2607 int 2608 i915_gem_get_seqno(struct drm_device *dev, u32 *seqno) 2609 { 2610 struct drm_i915_private *dev_priv = dev->dev_private; 2611 2612 /* reserve 0 for non-seqno */ 2613 if (dev_priv->next_seqno == 0) { 2614 int ret = i915_gem_init_seqno(dev, 0); 2615 if (ret) 2616 return ret; 2617 2618 dev_priv->next_seqno = 1; 2619 } 2620 2621 *seqno = dev_priv->last_seqno = dev_priv->next_seqno++; 2622 return 0; 2623 } 2624 2625 int __i915_add_request(struct intel_engine_cs *ring, 2626 struct drm_file *file, 2627 struct drm_i915_gem_object *obj) 2628 { 2629 struct drm_i915_private *dev_priv = ring->dev->dev_private; 2630 struct drm_i915_gem_request *request; 2631 struct intel_ringbuffer *ringbuf; 2632 u32 request_start; 2633 int ret; 2634 2635 request = ring->outstanding_lazy_request; 2636 if (WARN_ON(request == NULL)) 2637 return -ENOMEM; 2638 2639 if (i915.enable_execlists) { 2640 ringbuf = request->ctx->engine[ring->id].ringbuf; 2641 } else 2642 ringbuf = ring->buffer; 2643 2644 request_start = intel_ring_get_tail(ringbuf); 2645 /* 2646 * Emit any outstanding flushes - execbuf can fail to emit the flush 2647 * after having emitted the batchbuffer command. Hence we need to fix 2648 * things up similar to emitting the lazy request. The difference here 2649 * is that the flush _must_ happen before the next request, no matter 2650 * what. 2651 */ 2652 if (i915.enable_execlists) { 2653 ret = logical_ring_flush_all_caches(ringbuf, request->ctx); 2654 if (ret) 2655 return ret; 2656 } else { 2657 ret = intel_ring_flush_all_caches(ring); 2658 if (ret) 2659 return ret; 2660 } 2661 2662 /* Record the position of the start of the request so that 2663 * should we detect the updated seqno part-way through the 2664 * GPU processing the request, we never over-estimate the 2665 * position of the head. 2666 */ 2667 request->postfix = intel_ring_get_tail(ringbuf); 2668 2669 if (i915.enable_execlists) { 2670 ret = ring->emit_request(ringbuf, request); 2671 if (ret) 2672 return ret; 2673 } else { 2674 ret = ring->add_request(ring); 2675 if (ret) 2676 return ret; 2677 2678 request->tail = intel_ring_get_tail(ringbuf); 2679 } 2680 2681 request->head = request_start; 2682 2683 /* Whilst this request exists, batch_obj will be on the 2684 * active_list, and so will hold the active reference. Only when this 2685 * request is retired will the the batch_obj be moved onto the 2686 * inactive_list and lose its active reference. Hence we do not need 2687 * to explicitly hold another reference here. 2688 */ 2689 request->batch_obj = obj; 2690 2691 if (!i915.enable_execlists) { 2692 /* Hold a reference to the current context so that we can inspect 2693 * it later in case a hangcheck error event fires. 2694 */ 2695 request->ctx = ring->last_context; 2696 if (request->ctx) 2697 i915_gem_context_reference(request->ctx); 2698 } 2699 2700 request->emitted_jiffies = jiffies; 2701 ring->last_submitted_seqno = request->seqno; 2702 list_add_tail(&request->list, &ring->request_list); 2703 request->file_priv = NULL; 2704 2705 if (file) { 2706 struct drm_i915_file_private *file_priv = file->driver_priv; 2707 2708 spin_lock(&file_priv->mm.lock); 2709 request->file_priv = file_priv; 2710 list_add_tail(&request->client_list, 2711 &file_priv->mm.request_list); 2712 spin_unlock(&file_priv->mm.lock); 2713 2714 request->pid = curproc->p_pid; 2715 } 2716 2717 trace_i915_gem_request_add(request); 2718 ring->outstanding_lazy_request = NULL; 2719 2720 i915_queue_hangcheck(ring->dev); 2721 2722 queue_delayed_work(dev_priv->wq, 2723 &dev_priv->mm.retire_work, 2724 round_jiffies_up_relative(HZ)); 2725 intel_mark_busy(dev_priv->dev); 2726 2727 return 0; 2728 } 2729 2730 static bool i915_context_is_banned(struct drm_i915_private *dev_priv, 2731 const struct intel_context *ctx) 2732 { 2733 unsigned long elapsed; 2734 2735 elapsed = get_seconds() - ctx->hang_stats.guilty_ts; 2736 2737 if (ctx->hang_stats.banned) 2738 return true; 2739 2740 if (ctx->hang_stats.ban_period_seconds && 2741 elapsed <= ctx->hang_stats.ban_period_seconds) { 2742 if (!i915_gem_context_is_default(ctx)) { 2743 DRM_DEBUG("context hanging too fast, banning!\n"); 2744 return true; 2745 } else if (i915_stop_ring_allow_ban(dev_priv)) { 2746 if (i915_stop_ring_allow_warn(dev_priv)) 2747 DRM_ERROR("gpu hanging too fast, banning!\n"); 2748 return true; 2749 } 2750 } 2751 2752 return false; 2753 } 2754 2755 static void i915_set_reset_status(struct drm_i915_private *dev_priv, 2756 struct intel_context *ctx, 2757 const bool guilty) 2758 { 2759 struct i915_ctx_hang_stats *hs; 2760 2761 if (WARN_ON(!ctx)) 2762 return; 2763 2764 hs = &ctx->hang_stats; 2765 2766 if (guilty) { 2767 hs->banned = i915_context_is_banned(dev_priv, ctx); 2768 hs->batch_active++; 2769 hs->guilty_ts = get_seconds(); 2770 } else { 2771 hs->batch_pending++; 2772 } 2773 } 2774 2775 void i915_gem_request_free(struct kref *req_ref) 2776 { 2777 struct drm_i915_gem_request *req = container_of(req_ref, 2778 typeof(*req), ref); 2779 struct intel_context *ctx = req->ctx; 2780 2781 if (ctx) { 2782 if (i915.enable_execlists) { 2783 struct intel_engine_cs *ring = req->ring; 2784 2785 if (ctx != ring->default_context) 2786 intel_lr_context_unpin(ring, ctx); 2787 } 2788 2789 i915_gem_context_unreference(ctx); 2790 } 2791 2792 kfree(req); 2793 } 2794 2795 int i915_gem_request_alloc(struct intel_engine_cs *ring, 2796 struct intel_context *ctx) 2797 { 2798 struct drm_i915_private *dev_priv = to_i915(ring->dev); 2799 struct drm_i915_gem_request *req; 2800 int ret; 2801 2802 if (ring->outstanding_lazy_request) 2803 return 0; 2804 2805 req = kzalloc(sizeof(*req), GFP_KERNEL); 2806 if (req == NULL) 2807 return -ENOMEM; 2808 2809 kref_init(&req->ref); 2810 req->i915 = dev_priv; 2811 2812 ret = i915_gem_get_seqno(ring->dev, &req->seqno); 2813 if (ret) 2814 goto err; 2815 2816 req->ring = ring; 2817 2818 if (i915.enable_execlists) 2819 ret = intel_logical_ring_alloc_request_extras(req, ctx); 2820 else 2821 ret = intel_ring_alloc_request_extras(req); 2822 if (ret) 2823 goto err; 2824 2825 ring->outstanding_lazy_request = req; 2826 return 0; 2827 2828 err: 2829 kfree(req); 2830 return ret; 2831 } 2832 2833 struct drm_i915_gem_request * 2834 i915_gem_find_active_request(struct intel_engine_cs *ring) 2835 { 2836 struct drm_i915_gem_request *request; 2837 2838 list_for_each_entry(request, &ring->request_list, list) { 2839 if (i915_gem_request_completed(request, false)) 2840 continue; 2841 2842 return request; 2843 } 2844 2845 return NULL; 2846 } 2847 2848 static void i915_gem_reset_ring_status(struct drm_i915_private *dev_priv, 2849 struct intel_engine_cs *ring) 2850 { 2851 struct drm_i915_gem_request *request; 2852 bool ring_hung; 2853 2854 request = i915_gem_find_active_request(ring); 2855 2856 if (request == NULL) 2857 return; 2858 2859 ring_hung = ring->hangcheck.score >= HANGCHECK_SCORE_RING_HUNG; 2860 2861 i915_set_reset_status(dev_priv, request->ctx, ring_hung); 2862 2863 list_for_each_entry_continue(request, &ring->request_list, list) 2864 i915_set_reset_status(dev_priv, request->ctx, false); 2865 } 2866 2867 static void i915_gem_reset_ring_cleanup(struct drm_i915_private *dev_priv, 2868 struct intel_engine_cs *ring) 2869 { 2870 while (!list_empty(&ring->active_list)) { 2871 struct drm_i915_gem_object *obj; 2872 2873 obj = list_first_entry(&ring->active_list, 2874 struct drm_i915_gem_object, 2875 ring_list[ring->id]); 2876 2877 i915_gem_object_retire__read(obj, ring->id); 2878 } 2879 2880 /* 2881 * Clear the execlists queue up before freeing the requests, as those 2882 * are the ones that keep the context and ringbuffer backing objects 2883 * pinned in place. 2884 */ 2885 while (!list_empty(&ring->execlist_queue)) { 2886 struct drm_i915_gem_request *submit_req; 2887 2888 submit_req = list_first_entry(&ring->execlist_queue, 2889 struct drm_i915_gem_request, 2890 execlist_link); 2891 list_del(&submit_req->execlist_link); 2892 2893 if (submit_req->ctx != ring->default_context) 2894 intel_lr_context_unpin(ring, submit_req->ctx); 2895 2896 i915_gem_request_unreference(submit_req); 2897 } 2898 2899 /* 2900 * We must free the requests after all the corresponding objects have 2901 * been moved off active lists. Which is the same order as the normal 2902 * retire_requests function does. This is important if object hold 2903 * implicit references on things like e.g. ppgtt address spaces through 2904 * the request. 2905 */ 2906 while (!list_empty(&ring->request_list)) { 2907 struct drm_i915_gem_request *request; 2908 2909 request = list_first_entry(&ring->request_list, 2910 struct drm_i915_gem_request, 2911 list); 2912 2913 i915_gem_request_retire(request); 2914 } 2915 2916 /* This may not have been flushed before the reset, so clean it now */ 2917 i915_gem_request_assign(&ring->outstanding_lazy_request, NULL); 2918 } 2919 2920 void i915_gem_restore_fences(struct drm_device *dev) 2921 { 2922 struct drm_i915_private *dev_priv = dev->dev_private; 2923 int i; 2924 2925 for (i = 0; i < dev_priv->num_fence_regs; i++) { 2926 struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i]; 2927 2928 /* 2929 * Commit delayed tiling changes if we have an object still 2930 * attached to the fence, otherwise just clear the fence. 2931 */ 2932 if (reg->obj) { 2933 i915_gem_object_update_fence(reg->obj, reg, 2934 reg->obj->tiling_mode); 2935 } else { 2936 i915_gem_write_fence(dev, i, NULL); 2937 } 2938 } 2939 } 2940 2941 void i915_gem_reset(struct drm_device *dev) 2942 { 2943 struct drm_i915_private *dev_priv = dev->dev_private; 2944 struct intel_engine_cs *ring; 2945 int i; 2946 2947 /* 2948 * Before we free the objects from the requests, we need to inspect 2949 * them for finding the guilty party. As the requests only borrow 2950 * their reference to the objects, the inspection must be done first. 2951 */ 2952 for_each_ring(ring, dev_priv, i) 2953 i915_gem_reset_ring_status(dev_priv, ring); 2954 2955 for_each_ring(ring, dev_priv, i) 2956 i915_gem_reset_ring_cleanup(dev_priv, ring); 2957 2958 i915_gem_context_reset(dev); 2959 2960 i915_gem_restore_fences(dev); 2961 2962 WARN_ON(i915_verify_lists(dev)); 2963 } 2964 2965 /** 2966 * This function clears the request list as sequence numbers are passed. 2967 */ 2968 void 2969 i915_gem_retire_requests_ring(struct intel_engine_cs *ring) 2970 { 2971 WARN_ON(i915_verify_lists(ring->dev)); 2972 2973 /* Retire requests first as we use it above for the early return. 2974 * If we retire requests last, we may use a later seqno and so clear 2975 * the requests lists without clearing the active list, leading to 2976 * confusion. 2977 */ 2978 while (!list_empty(&ring->request_list)) { 2979 struct drm_i915_gem_request *request; 2980 2981 request = list_first_entry(&ring->request_list, 2982 struct drm_i915_gem_request, 2983 list); 2984 2985 if (!i915_gem_request_completed(request, true)) 2986 break; 2987 2988 i915_gem_request_retire(request); 2989 } 2990 2991 /* Move any buffers on the active list that are no longer referenced 2992 * by the ringbuffer to the flushing/inactive lists as appropriate, 2993 * before we free the context associated with the requests. 2994 */ 2995 while (!list_empty(&ring->active_list)) { 2996 struct drm_i915_gem_object *obj; 2997 2998 obj = list_first_entry(&ring->active_list, 2999 struct drm_i915_gem_object, 3000 ring_list[ring->id]); 3001 3002 if (!list_empty(&obj->last_read_req[ring->id]->list)) 3003 break; 3004 3005 i915_gem_object_retire__read(obj, ring->id); 3006 } 3007 3008 if (unlikely(ring->trace_irq_req && 3009 i915_gem_request_completed(ring->trace_irq_req, true))) { 3010 ring->irq_put(ring); 3011 i915_gem_request_assign(&ring->trace_irq_req, NULL); 3012 } 3013 3014 WARN_ON(i915_verify_lists(ring->dev)); 3015 } 3016 3017 bool 3018 i915_gem_retire_requests(struct drm_device *dev) 3019 { 3020 struct drm_i915_private *dev_priv = dev->dev_private; 3021 struct intel_engine_cs *ring; 3022 bool idle = true; 3023 int i; 3024 3025 for_each_ring(ring, dev_priv, i) { 3026 i915_gem_retire_requests_ring(ring); 3027 idle &= list_empty(&ring->request_list); 3028 if (i915.enable_execlists) { 3029 3030 lockmgr(&ring->execlist_lock, LK_EXCLUSIVE); 3031 idle &= list_empty(&ring->execlist_queue); 3032 lockmgr(&ring->execlist_lock, LK_RELEASE); 3033 3034 intel_execlists_retire_requests(ring); 3035 } 3036 } 3037 3038 if (idle) 3039 mod_delayed_work(dev_priv->wq, 3040 &dev_priv->mm.idle_work, 3041 msecs_to_jiffies(100)); 3042 3043 return idle; 3044 } 3045 3046 static void 3047 i915_gem_retire_work_handler(struct work_struct *work) 3048 { 3049 struct drm_i915_private *dev_priv = 3050 container_of(work, typeof(*dev_priv), mm.retire_work.work); 3051 struct drm_device *dev = dev_priv->dev; 3052 bool idle; 3053 3054 /* Come back later if the device is busy... */ 3055 idle = false; 3056 if (mutex_trylock(&dev->struct_mutex)) { 3057 idle = i915_gem_retire_requests(dev); 3058 mutex_unlock(&dev->struct_mutex); 3059 } 3060 if (!idle) 3061 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 3062 round_jiffies_up_relative(HZ)); 3063 } 3064 3065 static void 3066 i915_gem_idle_work_handler(struct work_struct *work) 3067 { 3068 struct drm_i915_private *dev_priv = 3069 container_of(work, typeof(*dev_priv), mm.idle_work.work); 3070 struct drm_device *dev = dev_priv->dev; 3071 struct intel_engine_cs *ring; 3072 int i; 3073 3074 for_each_ring(ring, dev_priv, i) 3075 if (!list_empty(&ring->request_list)) 3076 return; 3077 3078 intel_mark_idle(dev); 3079 3080 if (mutex_trylock(&dev->struct_mutex)) { 3081 struct intel_engine_cs *ring; 3082 int i; 3083 3084 for_each_ring(ring, dev_priv, i) 3085 i915_gem_batch_pool_fini(&ring->batch_pool); 3086 3087 mutex_unlock(&dev->struct_mutex); 3088 } 3089 } 3090 3091 /** 3092 * Ensures that an object will eventually get non-busy by flushing any required 3093 * write domains, emitting any outstanding lazy request and retiring and 3094 * completed requests. 3095 */ 3096 static int 3097 i915_gem_object_flush_active(struct drm_i915_gem_object *obj) 3098 { 3099 int ret, i; 3100 3101 if (!obj->active) 3102 return 0; 3103 3104 for (i = 0; i < I915_NUM_RINGS; i++) { 3105 struct drm_i915_gem_request *req; 3106 3107 req = obj->last_read_req[i]; 3108 if (req == NULL) 3109 continue; 3110 3111 if (list_empty(&req->list)) 3112 goto retire; 3113 3114 ret = i915_gem_check_olr(req); 3115 if (ret) 3116 return ret; 3117 3118 if (i915_gem_request_completed(req, true)) { 3119 __i915_gem_request_retire__upto(req); 3120 retire: 3121 i915_gem_object_retire__read(obj, i); 3122 } 3123 } 3124 3125 return 0; 3126 } 3127 3128 /** 3129 * i915_gem_wait_ioctl - implements DRM_IOCTL_I915_GEM_WAIT 3130 * @DRM_IOCTL_ARGS: standard ioctl arguments 3131 * 3132 * Returns 0 if successful, else an error is returned with the remaining time in 3133 * the timeout parameter. 3134 * -ETIME: object is still busy after timeout 3135 * -ERESTARTSYS: signal interrupted the wait 3136 * -ENONENT: object doesn't exist 3137 * Also possible, but rare: 3138 * -EAGAIN: GPU wedged 3139 * -ENOMEM: damn 3140 * -ENODEV: Internal IRQ fail 3141 * -E?: The add request failed 3142 * 3143 * The wait ioctl with a timeout of 0 reimplements the busy ioctl. With any 3144 * non-zero timeout parameter the wait ioctl will wait for the given number of 3145 * nanoseconds on an object becoming unbusy. Since the wait itself does so 3146 * without holding struct_mutex the object may become re-busied before this 3147 * function completes. A similar but shorter * race condition exists in the busy 3148 * ioctl 3149 */ 3150 int 3151 i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) 3152 { 3153 struct drm_i915_private *dev_priv = dev->dev_private; 3154 struct drm_i915_gem_wait *args = data; 3155 struct drm_i915_gem_object *obj; 3156 struct drm_i915_gem_request *req[I915_NUM_RINGS]; 3157 unsigned reset_counter; 3158 int i, n = 0; 3159 int ret; 3160 3161 if (args->flags != 0) 3162 return -EINVAL; 3163 3164 ret = i915_mutex_lock_interruptible(dev); 3165 if (ret) 3166 return ret; 3167 3168 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->bo_handle)); 3169 if (&obj->base == NULL) { 3170 mutex_unlock(&dev->struct_mutex); 3171 return -ENOENT; 3172 } 3173 3174 /* Need to make sure the object gets inactive eventually. */ 3175 ret = i915_gem_object_flush_active(obj); 3176 if (ret) 3177 goto out; 3178 3179 if (!obj->active) 3180 goto out; 3181 3182 /* Do this after OLR check to make sure we make forward progress polling 3183 * on this IOCTL with a timeout == 0 (like busy ioctl) 3184 */ 3185 if (args->timeout_ns == 0) { 3186 ret = -ETIME; 3187 goto out; 3188 } 3189 3190 drm_gem_object_unreference(&obj->base); 3191 reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter); 3192 3193 for (i = 0; i < I915_NUM_RINGS; i++) { 3194 if (obj->last_read_req[i] == NULL) 3195 continue; 3196 3197 req[n++] = i915_gem_request_reference(obj->last_read_req[i]); 3198 } 3199 3200 mutex_unlock(&dev->struct_mutex); 3201 3202 for (i = 0; i < n; i++) { 3203 if (ret == 0) 3204 ret = __i915_wait_request(req[i], reset_counter, true, 3205 args->timeout_ns > 0 ? &args->timeout_ns : NULL, 3206 file->driver_priv); 3207 i915_gem_request_unreference__unlocked(req[i]); 3208 } 3209 return ret; 3210 3211 out: 3212 drm_gem_object_unreference(&obj->base); 3213 mutex_unlock(&dev->struct_mutex); 3214 return ret; 3215 } 3216 3217 static int 3218 __i915_gem_object_sync(struct drm_i915_gem_object *obj, 3219 struct intel_engine_cs *to, 3220 struct drm_i915_gem_request *req) 3221 { 3222 struct intel_engine_cs *from; 3223 int ret; 3224 3225 from = i915_gem_request_get_ring(req); 3226 if (to == from) 3227 return 0; 3228 3229 if (i915_gem_request_completed(req, true)) 3230 return 0; 3231 3232 ret = i915_gem_check_olr(req); 3233 if (ret) 3234 return ret; 3235 3236 if (!i915_semaphore_is_enabled(obj->base.dev)) { 3237 struct drm_i915_private *i915 = to_i915(obj->base.dev); 3238 ret = __i915_wait_request(req, 3239 atomic_read(&i915->gpu_error.reset_counter), 3240 i915->mm.interruptible, 3241 NULL, 3242 &i915->rps.semaphores); 3243 if (ret) 3244 return ret; 3245 3246 i915_gem_object_retire_request(obj, req); 3247 } else { 3248 int idx = intel_ring_sync_index(from, to); 3249 u32 seqno = i915_gem_request_get_seqno(req); 3250 3251 if (seqno <= from->semaphore.sync_seqno[idx]) 3252 return 0; 3253 3254 trace_i915_gem_ring_sync_to(from, to, req); 3255 ret = to->semaphore.sync_to(to, from, seqno); 3256 if (ret) 3257 return ret; 3258 3259 /* We use last_read_req because sync_to() 3260 * might have just caused seqno wrap under 3261 * the radar. 3262 */ 3263 from->semaphore.sync_seqno[idx] = 3264 i915_gem_request_get_seqno(obj->last_read_req[from->id]); 3265 } 3266 3267 return 0; 3268 } 3269 3270 /** 3271 * i915_gem_object_sync - sync an object to a ring. 3272 * 3273 * @obj: object which may be in use on another ring. 3274 * @to: ring we wish to use the object on. May be NULL. 3275 * 3276 * This code is meant to abstract object synchronization with the GPU. 3277 * Calling with NULL implies synchronizing the object with the CPU 3278 * rather than a particular GPU ring. Conceptually we serialise writes 3279 * between engines inside the GPU. We only allow on engine to write 3280 * into a buffer at any time, but multiple readers. To ensure each has 3281 * a coherent view of memory, we must: 3282 * 3283 * - If there is an outstanding write request to the object, the new 3284 * request must wait for it to complete (either CPU or in hw, requests 3285 * on the same ring will be naturally ordered). 3286 * 3287 * - If we are a write request (pending_write_domain is set), the new 3288 * request must wait for outstanding read requests to complete. 3289 * 3290 * Returns 0 if successful, else propagates up the lower layer error. 3291 */ 3292 int 3293 i915_gem_object_sync(struct drm_i915_gem_object *obj, 3294 struct intel_engine_cs *to) 3295 { 3296 const bool readonly = obj->base.pending_write_domain == 0; 3297 struct drm_i915_gem_request *req[I915_NUM_RINGS]; 3298 int ret, i, n; 3299 3300 if (!obj->active) 3301 return 0; 3302 3303 if (to == NULL) 3304 return i915_gem_object_wait_rendering(obj, readonly); 3305 3306 n = 0; 3307 if (readonly) { 3308 if (obj->last_write_req) 3309 req[n++] = obj->last_write_req; 3310 } else { 3311 for (i = 0; i < I915_NUM_RINGS; i++) 3312 if (obj->last_read_req[i]) 3313 req[n++] = obj->last_read_req[i]; 3314 } 3315 for (i = 0; i < n; i++) { 3316 ret = __i915_gem_object_sync(obj, to, req[i]); 3317 if (ret) 3318 return ret; 3319 } 3320 3321 return 0; 3322 } 3323 3324 static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj) 3325 { 3326 u32 old_write_domain, old_read_domains; 3327 3328 /* Force a pagefault for domain tracking on next user access */ 3329 i915_gem_release_mmap(obj); 3330 3331 if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0) 3332 return; 3333 3334 /* Wait for any direct GTT access to complete */ 3335 mb(); 3336 3337 old_read_domains = obj->base.read_domains; 3338 old_write_domain = obj->base.write_domain; 3339 3340 obj->base.read_domains &= ~I915_GEM_DOMAIN_GTT; 3341 obj->base.write_domain &= ~I915_GEM_DOMAIN_GTT; 3342 3343 trace_i915_gem_object_change_domain(obj, 3344 old_read_domains, 3345 old_write_domain); 3346 } 3347 3348 int i915_vma_unbind(struct i915_vma *vma) 3349 { 3350 struct drm_i915_gem_object *obj = vma->obj; 3351 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 3352 int ret; 3353 3354 if (list_empty(&vma->vma_link)) 3355 return 0; 3356 3357 if (!drm_mm_node_allocated(&vma->node)) { 3358 i915_gem_vma_destroy(vma); 3359 return 0; 3360 } 3361 3362 if (vma->pin_count) 3363 return -EBUSY; 3364 3365 BUG_ON(obj->pages == NULL); 3366 3367 ret = i915_gem_object_wait_rendering(obj, false); 3368 if (ret) 3369 return ret; 3370 /* Continue on if we fail due to EIO, the GPU is hung so we 3371 * should be safe and we need to cleanup or else we might 3372 * cause memory corruption through use-after-free. 3373 */ 3374 3375 if (i915_is_ggtt(vma->vm) && 3376 vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) { 3377 i915_gem_object_finish_gtt(obj); 3378 3379 /* release the fence reg _after_ flushing */ 3380 ret = i915_gem_object_put_fence(obj); 3381 if (ret) 3382 return ret; 3383 } 3384 3385 trace_i915_vma_unbind(vma); 3386 3387 vma->vm->unbind_vma(vma); 3388 vma->bound = 0; 3389 3390 list_del_init(&vma->mm_list); 3391 if (i915_is_ggtt(vma->vm)) { 3392 if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) { 3393 obj->map_and_fenceable = false; 3394 } else if (vma->ggtt_view.pages) { 3395 sg_free_table(vma->ggtt_view.pages); 3396 kfree(vma->ggtt_view.pages); 3397 } 3398 vma->ggtt_view.pages = NULL; 3399 } 3400 3401 drm_mm_remove_node(&vma->node); 3402 i915_gem_vma_destroy(vma); 3403 3404 /* Since the unbound list is global, only move to that list if 3405 * no more VMAs exist. */ 3406 if (list_empty(&obj->vma_list)) 3407 list_move_tail(&obj->global_list, &dev_priv->mm.unbound_list); 3408 3409 /* And finally now the object is completely decoupled from this vma, 3410 * we can drop its hold on the backing storage and allow it to be 3411 * reaped by the shrinker. 3412 */ 3413 i915_gem_object_unpin_pages(obj); 3414 3415 return 0; 3416 } 3417 3418 int i915_gpu_idle(struct drm_device *dev) 3419 { 3420 struct drm_i915_private *dev_priv = dev->dev_private; 3421 struct intel_engine_cs *ring; 3422 int ret, i; 3423 3424 /* Flush everything onto the inactive list. */ 3425 for_each_ring(ring, dev_priv, i) { 3426 if (!i915.enable_execlists) { 3427 ret = i915_switch_context(ring, ring->default_context); 3428 if (ret) 3429 return ret; 3430 } 3431 3432 ret = intel_ring_idle(ring); 3433 if (ret) 3434 return ret; 3435 } 3436 3437 WARN_ON(i915_verify_lists(dev)); 3438 return 0; 3439 } 3440 3441 static void i965_write_fence_reg(struct drm_device *dev, int reg, 3442 struct drm_i915_gem_object *obj) 3443 { 3444 struct drm_i915_private *dev_priv = dev->dev_private; 3445 int fence_reg; 3446 int fence_pitch_shift; 3447 3448 if (INTEL_INFO(dev)->gen >= 6) { 3449 fence_reg = FENCE_REG_SANDYBRIDGE_0; 3450 fence_pitch_shift = SANDYBRIDGE_FENCE_PITCH_SHIFT; 3451 } else { 3452 fence_reg = FENCE_REG_965_0; 3453 fence_pitch_shift = I965_FENCE_PITCH_SHIFT; 3454 } 3455 3456 fence_reg += reg * 8; 3457 3458 /* To w/a incoherency with non-atomic 64-bit register updates, 3459 * we split the 64-bit update into two 32-bit writes. In order 3460 * for a partial fence not to be evaluated between writes, we 3461 * precede the update with write to turn off the fence register, 3462 * and only enable the fence as the last step. 3463 * 3464 * For extra levels of paranoia, we make sure each step lands 3465 * before applying the next step. 3466 */ 3467 I915_WRITE(fence_reg, 0); 3468 POSTING_READ(fence_reg); 3469 3470 if (obj) { 3471 u32 size = i915_gem_obj_ggtt_size(obj); 3472 uint64_t val; 3473 3474 /* Adjust fence size to match tiled area */ 3475 if (obj->tiling_mode != I915_TILING_NONE) { 3476 uint32_t row_size = obj->stride * 3477 (obj->tiling_mode == I915_TILING_Y ? 32 : 8); 3478 size = (size / row_size) * row_size; 3479 } 3480 3481 val = (uint64_t)((i915_gem_obj_ggtt_offset(obj) + size - 4096) & 3482 0xfffff000) << 32; 3483 val |= i915_gem_obj_ggtt_offset(obj) & 0xfffff000; 3484 val |= (uint64_t)((obj->stride / 128) - 1) << fence_pitch_shift; 3485 if (obj->tiling_mode == I915_TILING_Y) 3486 val |= 1 << I965_FENCE_TILING_Y_SHIFT; 3487 val |= I965_FENCE_REG_VALID; 3488 3489 I915_WRITE(fence_reg + 4, val >> 32); 3490 POSTING_READ(fence_reg + 4); 3491 3492 I915_WRITE(fence_reg + 0, val); 3493 POSTING_READ(fence_reg); 3494 } else { 3495 I915_WRITE(fence_reg + 4, 0); 3496 POSTING_READ(fence_reg + 4); 3497 } 3498 } 3499 3500 static void i915_write_fence_reg(struct drm_device *dev, int reg, 3501 struct drm_i915_gem_object *obj) 3502 { 3503 struct drm_i915_private *dev_priv = dev->dev_private; 3504 u32 val; 3505 3506 if (obj) { 3507 u32 size = i915_gem_obj_ggtt_size(obj); 3508 int pitch_val; 3509 int tile_width; 3510 3511 WARN((i915_gem_obj_ggtt_offset(obj) & ~I915_FENCE_START_MASK) || 3512 (size & -size) != size || 3513 (i915_gem_obj_ggtt_offset(obj) & (size - 1)), 3514 "object 0x%08lx [fenceable? %d] not 1M or pot-size (0x%08x) aligned\n", 3515 i915_gem_obj_ggtt_offset(obj), obj->map_and_fenceable, size); 3516 3517 if (obj->tiling_mode == I915_TILING_Y && HAS_128_BYTE_Y_TILING(dev)) 3518 tile_width = 128; 3519 else 3520 tile_width = 512; 3521 3522 /* Note: pitch better be a power of two tile widths */ 3523 pitch_val = obj->stride / tile_width; 3524 pitch_val = ffs(pitch_val) - 1; 3525 3526 val = i915_gem_obj_ggtt_offset(obj); 3527 if (obj->tiling_mode == I915_TILING_Y) 3528 val |= 1 << I830_FENCE_TILING_Y_SHIFT; 3529 val |= I915_FENCE_SIZE_BITS(size); 3530 val |= pitch_val << I830_FENCE_PITCH_SHIFT; 3531 val |= I830_FENCE_REG_VALID; 3532 } else 3533 val = 0; 3534 3535 if (reg < 8) 3536 reg = FENCE_REG_830_0 + reg * 4; 3537 else 3538 reg = FENCE_REG_945_8 + (reg - 8) * 4; 3539 3540 I915_WRITE(reg, val); 3541 POSTING_READ(reg); 3542 } 3543 3544 static void i830_write_fence_reg(struct drm_device *dev, int reg, 3545 struct drm_i915_gem_object *obj) 3546 { 3547 struct drm_i915_private *dev_priv = dev->dev_private; 3548 uint32_t val; 3549 3550 if (obj) { 3551 u32 size = i915_gem_obj_ggtt_size(obj); 3552 uint32_t pitch_val; 3553 3554 WARN((i915_gem_obj_ggtt_offset(obj) & ~I830_FENCE_START_MASK) || 3555 (size & -size) != size || 3556 (i915_gem_obj_ggtt_offset(obj) & (size - 1)), 3557 "object 0x%08lx not 512K or pot-size 0x%08x aligned\n", 3558 i915_gem_obj_ggtt_offset(obj), size); 3559 3560 pitch_val = obj->stride / 128; 3561 pitch_val = ffs(pitch_val) - 1; 3562 3563 val = i915_gem_obj_ggtt_offset(obj); 3564 if (obj->tiling_mode == I915_TILING_Y) 3565 val |= 1 << I830_FENCE_TILING_Y_SHIFT; 3566 val |= I830_FENCE_SIZE_BITS(size); 3567 val |= pitch_val << I830_FENCE_PITCH_SHIFT; 3568 val |= I830_FENCE_REG_VALID; 3569 } else 3570 val = 0; 3571 3572 I915_WRITE(FENCE_REG_830_0 + reg * 4, val); 3573 POSTING_READ(FENCE_REG_830_0 + reg * 4); 3574 } 3575 3576 inline static bool i915_gem_object_needs_mb(struct drm_i915_gem_object *obj) 3577 { 3578 return obj && obj->base.read_domains & I915_GEM_DOMAIN_GTT; 3579 } 3580 3581 static void i915_gem_write_fence(struct drm_device *dev, int reg, 3582 struct drm_i915_gem_object *obj) 3583 { 3584 struct drm_i915_private *dev_priv = dev->dev_private; 3585 3586 /* Ensure that all CPU reads are completed before installing a fence 3587 * and all writes before removing the fence. 3588 */ 3589 if (i915_gem_object_needs_mb(dev_priv->fence_regs[reg].obj)) 3590 mb(); 3591 3592 WARN(obj && (!obj->stride || !obj->tiling_mode), 3593 "bogus fence setup with stride: 0x%x, tiling mode: %i\n", 3594 obj->stride, obj->tiling_mode); 3595 3596 if (IS_GEN2(dev)) 3597 i830_write_fence_reg(dev, reg, obj); 3598 else if (IS_GEN3(dev)) 3599 i915_write_fence_reg(dev, reg, obj); 3600 else if (INTEL_INFO(dev)->gen >= 4) 3601 i965_write_fence_reg(dev, reg, obj); 3602 3603 /* And similarly be paranoid that no direct access to this region 3604 * is reordered to before the fence is installed. 3605 */ 3606 if (i915_gem_object_needs_mb(obj)) 3607 mb(); 3608 } 3609 3610 static inline int fence_number(struct drm_i915_private *dev_priv, 3611 struct drm_i915_fence_reg *fence) 3612 { 3613 return fence - dev_priv->fence_regs; 3614 } 3615 3616 static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj, 3617 struct drm_i915_fence_reg *fence, 3618 bool enable) 3619 { 3620 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 3621 int reg = fence_number(dev_priv, fence); 3622 3623 i915_gem_write_fence(obj->base.dev, reg, enable ? obj : NULL); 3624 3625 if (enable) { 3626 obj->fence_reg = reg; 3627 fence->obj = obj; 3628 list_move_tail(&fence->lru_list, &dev_priv->mm.fence_list); 3629 } else { 3630 obj->fence_reg = I915_FENCE_REG_NONE; 3631 fence->obj = NULL; 3632 list_del_init(&fence->lru_list); 3633 } 3634 obj->fence_dirty = false; 3635 } 3636 3637 static int 3638 i915_gem_object_wait_fence(struct drm_i915_gem_object *obj) 3639 { 3640 if (obj->last_fenced_req) { 3641 int ret = i915_wait_request(obj->last_fenced_req); 3642 if (ret) 3643 return ret; 3644 3645 i915_gem_request_assign(&obj->last_fenced_req, NULL); 3646 } 3647 3648 return 0; 3649 } 3650 3651 int 3652 i915_gem_object_put_fence(struct drm_i915_gem_object *obj) 3653 { 3654 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 3655 struct drm_i915_fence_reg *fence; 3656 int ret; 3657 3658 ret = i915_gem_object_wait_fence(obj); 3659 if (ret) 3660 return ret; 3661 3662 if (obj->fence_reg == I915_FENCE_REG_NONE) 3663 return 0; 3664 3665 fence = &dev_priv->fence_regs[obj->fence_reg]; 3666 3667 if (WARN_ON(fence->pin_count)) 3668 return -EBUSY; 3669 3670 i915_gem_object_fence_lost(obj); 3671 i915_gem_object_update_fence(obj, fence, false); 3672 3673 return 0; 3674 } 3675 3676 static struct drm_i915_fence_reg * 3677 i915_find_fence_reg(struct drm_device *dev) 3678 { 3679 struct drm_i915_private *dev_priv = dev->dev_private; 3680 struct drm_i915_fence_reg *reg, *avail; 3681 int i; 3682 3683 /* First try to find a free reg */ 3684 avail = NULL; 3685 for (i = dev_priv->fence_reg_start; i < dev_priv->num_fence_regs; i++) { 3686 reg = &dev_priv->fence_regs[i]; 3687 if (!reg->obj) 3688 return reg; 3689 3690 if (!reg->pin_count) 3691 avail = reg; 3692 } 3693 3694 if (avail == NULL) 3695 goto deadlock; 3696 3697 /* None available, try to steal one or wait for a user to finish */ 3698 list_for_each_entry(reg, &dev_priv->mm.fence_list, lru_list) { 3699 if (reg->pin_count) 3700 continue; 3701 3702 return reg; 3703 } 3704 3705 deadlock: 3706 /* Wait for completion of pending flips which consume fences */ 3707 if (intel_has_pending_fb_unpin(dev)) 3708 return ERR_PTR(-EAGAIN); 3709 3710 return ERR_PTR(-EDEADLK); 3711 } 3712 3713 /** 3714 * i915_gem_object_get_fence - set up fencing for an object 3715 * @obj: object to map through a fence reg 3716 * 3717 * When mapping objects through the GTT, userspace wants to be able to write 3718 * to them without having to worry about swizzling if the object is tiled. 3719 * This function walks the fence regs looking for a free one for @obj, 3720 * stealing one if it can't find any. 3721 * 3722 * It then sets up the reg based on the object's properties: address, pitch 3723 * and tiling format. 3724 * 3725 * For an untiled surface, this removes any existing fence. 3726 */ 3727 int 3728 i915_gem_object_get_fence(struct drm_i915_gem_object *obj) 3729 { 3730 struct drm_device *dev = obj->base.dev; 3731 struct drm_i915_private *dev_priv = dev->dev_private; 3732 bool enable = obj->tiling_mode != I915_TILING_NONE; 3733 struct drm_i915_fence_reg *reg; 3734 int ret; 3735 3736 /* Have we updated the tiling parameters upon the object and so 3737 * will need to serialise the write to the associated fence register? 3738 */ 3739 if (obj->fence_dirty) { 3740 ret = i915_gem_object_wait_fence(obj); 3741 if (ret) 3742 return ret; 3743 } 3744 3745 /* Just update our place in the LRU if our fence is getting reused. */ 3746 if (obj->fence_reg != I915_FENCE_REG_NONE) { 3747 reg = &dev_priv->fence_regs[obj->fence_reg]; 3748 if (!obj->fence_dirty) { 3749 list_move_tail(®->lru_list, 3750 &dev_priv->mm.fence_list); 3751 return 0; 3752 } 3753 } else if (enable) { 3754 if (WARN_ON(!obj->map_and_fenceable)) 3755 return -EINVAL; 3756 3757 reg = i915_find_fence_reg(dev); 3758 if (IS_ERR(reg)) 3759 return PTR_ERR(reg); 3760 3761 if (reg->obj) { 3762 struct drm_i915_gem_object *old = reg->obj; 3763 3764 ret = i915_gem_object_wait_fence(old); 3765 if (ret) 3766 return ret; 3767 3768 i915_gem_object_fence_lost(old); 3769 } 3770 } else 3771 return 0; 3772 3773 i915_gem_object_update_fence(obj, reg, enable); 3774 3775 return 0; 3776 } 3777 3778 static bool i915_gem_valid_gtt_space(struct i915_vma *vma, 3779 unsigned long cache_level) 3780 { 3781 struct drm_mm_node *gtt_space = &vma->node; 3782 struct drm_mm_node *other; 3783 3784 /* 3785 * On some machines we have to be careful when putting differing types 3786 * of snoopable memory together to avoid the prefetcher crossing memory 3787 * domains and dying. During vm initialisation, we decide whether or not 3788 * these constraints apply and set the drm_mm.color_adjust 3789 * appropriately. 3790 */ 3791 if (vma->vm->mm.color_adjust == NULL) 3792 return true; 3793 3794 if (!drm_mm_node_allocated(gtt_space)) 3795 return true; 3796 3797 if (list_empty(>t_space->node_list)) 3798 return true; 3799 3800 other = list_entry(gtt_space->node_list.prev, struct drm_mm_node, node_list); 3801 if (other->allocated && !other->hole_follows && other->color != cache_level) 3802 return false; 3803 3804 other = list_entry(gtt_space->node_list.next, struct drm_mm_node, node_list); 3805 if (other->allocated && !gtt_space->hole_follows && other->color != cache_level) 3806 return false; 3807 3808 return true; 3809 } 3810 3811 /** 3812 * Finds free space in the GTT aperture and binds the object or a view of it 3813 * there. 3814 */ 3815 static struct i915_vma * 3816 i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj, 3817 struct i915_address_space *vm, 3818 const struct i915_ggtt_view *ggtt_view, 3819 unsigned alignment, 3820 uint64_t flags) 3821 { 3822 struct drm_device *dev = obj->base.dev; 3823 struct drm_i915_private *dev_priv = dev->dev_private; 3824 u32 size, fence_size, fence_alignment, unfenced_alignment; 3825 unsigned long start = 3826 flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0; 3827 unsigned long end = 3828 flags & PIN_MAPPABLE ? dev_priv->gtt.mappable_end : vm->total; 3829 struct i915_vma *vma; 3830 int ret; 3831 3832 if (i915_is_ggtt(vm)) { 3833 u32 view_size; 3834 3835 if (WARN_ON(!ggtt_view)) 3836 return ERR_PTR(-EINVAL); 3837 3838 view_size = i915_ggtt_view_size(obj, ggtt_view); 3839 3840 fence_size = i915_gem_get_gtt_size(dev, 3841 view_size, 3842 obj->tiling_mode); 3843 fence_alignment = i915_gem_get_gtt_alignment(dev, 3844 view_size, 3845 obj->tiling_mode, 3846 true); 3847 unfenced_alignment = i915_gem_get_gtt_alignment(dev, 3848 view_size, 3849 obj->tiling_mode, 3850 false); 3851 size = flags & PIN_MAPPABLE ? fence_size : view_size; 3852 } else { 3853 fence_size = i915_gem_get_gtt_size(dev, 3854 obj->base.size, 3855 obj->tiling_mode); 3856 fence_alignment = i915_gem_get_gtt_alignment(dev, 3857 obj->base.size, 3858 obj->tiling_mode, 3859 true); 3860 unfenced_alignment = 3861 i915_gem_get_gtt_alignment(dev, 3862 obj->base.size, 3863 obj->tiling_mode, 3864 false); 3865 size = flags & PIN_MAPPABLE ? fence_size : obj->base.size; 3866 } 3867 3868 if (alignment == 0) 3869 alignment = flags & PIN_MAPPABLE ? fence_alignment : 3870 unfenced_alignment; 3871 if (flags & PIN_MAPPABLE && alignment & (fence_alignment - 1)) { 3872 DRM_DEBUG("Invalid object (view type=%u) alignment requested %u\n", 3873 ggtt_view ? ggtt_view->type : 0, 3874 alignment); 3875 return ERR_PTR(-EINVAL); 3876 } 3877 3878 /* If binding the object/GGTT view requires more space than the entire 3879 * aperture has, reject it early before evicting everything in a vain 3880 * attempt to find space. 3881 */ 3882 if (size > end) { 3883 DRM_DEBUG("Attempting to bind an object (view type=%u) larger than the aperture: size=%u > %s aperture=%lu\n", 3884 ggtt_view ? ggtt_view->type : 0, 3885 size, 3886 flags & PIN_MAPPABLE ? "mappable" : "total", 3887 end); 3888 return ERR_PTR(-E2BIG); 3889 } 3890 3891 ret = i915_gem_object_get_pages(obj); 3892 if (ret) 3893 return ERR_PTR(ret); 3894 3895 i915_gem_object_pin_pages(obj); 3896 3897 vma = ggtt_view ? i915_gem_obj_lookup_or_create_ggtt_vma(obj, ggtt_view) : 3898 i915_gem_obj_lookup_or_create_vma(obj, vm); 3899 3900 if (IS_ERR(vma)) 3901 goto err_unpin; 3902 3903 search_free: 3904 ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node, 3905 size, alignment, 3906 obj->cache_level, 3907 start, end, 3908 DRM_MM_SEARCH_DEFAULT, 3909 DRM_MM_CREATE_DEFAULT); 3910 if (ret) { 3911 ret = i915_gem_evict_something(dev, vm, size, alignment, 3912 obj->cache_level, 3913 start, end, 3914 flags); 3915 if (ret == 0) 3916 goto search_free; 3917 3918 goto err_free_vma; 3919 } 3920 if (WARN_ON(!i915_gem_valid_gtt_space(vma, obj->cache_level))) { 3921 ret = -EINVAL; 3922 goto err_remove_node; 3923 } 3924 3925 trace_i915_vma_bind(vma, flags); 3926 ret = i915_vma_bind(vma, obj->cache_level, flags); 3927 if (ret) 3928 goto err_remove_node; 3929 3930 list_move_tail(&obj->global_list, &dev_priv->mm.bound_list); 3931 list_add_tail(&vma->mm_list, &vm->inactive_list); 3932 3933 return vma; 3934 3935 err_remove_node: 3936 drm_mm_remove_node(&vma->node); 3937 err_free_vma: 3938 i915_gem_vma_destroy(vma); 3939 vma = ERR_PTR(ret); 3940 err_unpin: 3941 i915_gem_object_unpin_pages(obj); 3942 return vma; 3943 } 3944 3945 bool 3946 i915_gem_clflush_object(struct drm_i915_gem_object *obj, 3947 bool force) 3948 { 3949 /* If we don't have a page list set up, then we're not pinned 3950 * to GPU, and we can ignore the cache flush because it'll happen 3951 * again at bind time. 3952 */ 3953 if (obj->pages == NULL) 3954 return false; 3955 3956 /* 3957 * Stolen memory is always coherent with the GPU as it is explicitly 3958 * marked as wc by the system, or the system is cache-coherent. 3959 */ 3960 if (obj->stolen || obj->phys_handle) 3961 return false; 3962 3963 /* If the GPU is snooping the contents of the CPU cache, 3964 * we do not need to manually clear the CPU cache lines. However, 3965 * the caches are only snooped when the render cache is 3966 * flushed/invalidated. As we always have to emit invalidations 3967 * and flushes when moving into and out of the RENDER domain, correct 3968 * snooping behaviour occurs naturally as the result of our domain 3969 * tracking. 3970 */ 3971 if (!force && cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) { 3972 obj->cache_dirty = true; 3973 return false; 3974 } 3975 3976 trace_i915_gem_object_clflush(obj); 3977 drm_clflush_sg(obj->pages); 3978 obj->cache_dirty = false; 3979 3980 return true; 3981 } 3982 3983 /** Flushes the GTT write domain for the object if it's dirty. */ 3984 static void 3985 i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj) 3986 { 3987 uint32_t old_write_domain; 3988 3989 if (obj->base.write_domain != I915_GEM_DOMAIN_GTT) 3990 return; 3991 3992 /* No actual flushing is required for the GTT write domain. Writes 3993 * to it immediately go to main memory as far as we know, so there's 3994 * no chipset flush. It also doesn't land in render cache. 3995 * 3996 * However, we do have to enforce the order so that all writes through 3997 * the GTT land before any writes to the device, such as updates to 3998 * the GATT itself. 3999 */ 4000 wmb(); 4001 4002 old_write_domain = obj->base.write_domain; 4003 obj->base.write_domain = 0; 4004 4005 intel_fb_obj_flush(obj, false); 4006 4007 trace_i915_gem_object_change_domain(obj, 4008 obj->base.read_domains, 4009 old_write_domain); 4010 } 4011 4012 /** Flushes the CPU write domain for the object if it's dirty. */ 4013 static void 4014 i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj) 4015 { 4016 uint32_t old_write_domain; 4017 4018 if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) 4019 return; 4020 4021 if (i915_gem_clflush_object(obj, obj->pin_display)) 4022 i915_gem_chipset_flush(obj->base.dev); 4023 4024 old_write_domain = obj->base.write_domain; 4025 obj->base.write_domain = 0; 4026 4027 intel_fb_obj_flush(obj, false); 4028 4029 trace_i915_gem_object_change_domain(obj, 4030 obj->base.read_domains, 4031 old_write_domain); 4032 } 4033 4034 /** 4035 * Moves a single object to the GTT read, and possibly write domain. 4036 * 4037 * This function returns when the move is complete, including waiting on 4038 * flushes to occur. 4039 */ 4040 int 4041 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) 4042 { 4043 uint32_t old_write_domain, old_read_domains; 4044 struct i915_vma *vma; 4045 int ret; 4046 4047 if (obj->base.write_domain == I915_GEM_DOMAIN_GTT) 4048 return 0; 4049 4050 ret = i915_gem_object_wait_rendering(obj, !write); 4051 if (ret) 4052 return ret; 4053 4054 /* Flush and acquire obj->pages so that we are coherent through 4055 * direct access in memory with previous cached writes through 4056 * shmemfs and that our cache domain tracking remains valid. 4057 * For example, if the obj->filp was moved to swap without us 4058 * being notified and releasing the pages, we would mistakenly 4059 * continue to assume that the obj remained out of the CPU cached 4060 * domain. 4061 */ 4062 ret = i915_gem_object_get_pages(obj); 4063 if (ret) 4064 return ret; 4065 4066 i915_gem_object_flush_cpu_write_domain(obj); 4067 4068 /* Serialise direct access to this object with the barriers for 4069 * coherent writes from the GPU, by effectively invalidating the 4070 * GTT domain upon first access. 4071 */ 4072 if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0) 4073 mb(); 4074 4075 old_write_domain = obj->base.write_domain; 4076 old_read_domains = obj->base.read_domains; 4077 4078 /* It should now be out of any other write domains, and we can update 4079 * the domain values for our changes. 4080 */ 4081 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_GTT) != 0); 4082 obj->base.read_domains |= I915_GEM_DOMAIN_GTT; 4083 if (write) { 4084 obj->base.read_domains = I915_GEM_DOMAIN_GTT; 4085 obj->base.write_domain = I915_GEM_DOMAIN_GTT; 4086 obj->dirty = 1; 4087 } 4088 4089 if (write) 4090 intel_fb_obj_invalidate(obj, NULL, ORIGIN_GTT); 4091 4092 trace_i915_gem_object_change_domain(obj, 4093 old_read_domains, 4094 old_write_domain); 4095 4096 /* And bump the LRU for this access */ 4097 vma = i915_gem_obj_to_ggtt(obj); 4098 if (vma && drm_mm_node_allocated(&vma->node) && !obj->active) 4099 list_move_tail(&vma->mm_list, 4100 &to_i915(obj->base.dev)->gtt.base.inactive_list); 4101 4102 return 0; 4103 } 4104 4105 int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, 4106 enum i915_cache_level cache_level) 4107 { 4108 struct drm_device *dev = obj->base.dev; 4109 struct i915_vma *vma, *next; 4110 int ret; 4111 4112 if (obj->cache_level == cache_level) 4113 return 0; 4114 4115 if (i915_gem_obj_is_pinned(obj)) { 4116 DRM_DEBUG("can not change the cache level of pinned objects\n"); 4117 return -EBUSY; 4118 } 4119 4120 list_for_each_entry_safe(vma, next, &obj->vma_list, vma_link) { 4121 if (!i915_gem_valid_gtt_space(vma, cache_level)) { 4122 ret = i915_vma_unbind(vma); 4123 if (ret) 4124 return ret; 4125 } 4126 } 4127 4128 if (i915_gem_obj_bound_any(obj)) { 4129 ret = i915_gem_object_wait_rendering(obj, false); 4130 if (ret) 4131 return ret; 4132 4133 i915_gem_object_finish_gtt(obj); 4134 4135 /* Before SandyBridge, you could not use tiling or fence 4136 * registers with snooped memory, so relinquish any fences 4137 * currently pointing to our region in the aperture. 4138 */ 4139 if (INTEL_INFO(dev)->gen < 6) { 4140 ret = i915_gem_object_put_fence(obj); 4141 if (ret) 4142 return ret; 4143 } 4144 4145 list_for_each_entry(vma, &obj->vma_list, vma_link) 4146 if (drm_mm_node_allocated(&vma->node)) { 4147 ret = i915_vma_bind(vma, cache_level, 4148 PIN_UPDATE); 4149 if (ret) 4150 return ret; 4151 } 4152 } 4153 4154 list_for_each_entry(vma, &obj->vma_list, vma_link) 4155 vma->node.color = cache_level; 4156 obj->cache_level = cache_level; 4157 4158 if (obj->cache_dirty && 4159 obj->base.write_domain != I915_GEM_DOMAIN_CPU && 4160 cpu_write_needs_clflush(obj)) { 4161 if (i915_gem_clflush_object(obj, true)) 4162 i915_gem_chipset_flush(obj->base.dev); 4163 } 4164 4165 return 0; 4166 } 4167 4168 int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data, 4169 struct drm_file *file) 4170 { 4171 struct drm_i915_gem_caching *args = data; 4172 struct drm_i915_gem_object *obj; 4173 4174 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 4175 if (&obj->base == NULL) 4176 return -ENOENT; 4177 4178 switch (obj->cache_level) { 4179 case I915_CACHE_LLC: 4180 case I915_CACHE_L3_LLC: 4181 args->caching = I915_CACHING_CACHED; 4182 break; 4183 4184 case I915_CACHE_WT: 4185 args->caching = I915_CACHING_DISPLAY; 4186 break; 4187 4188 default: 4189 args->caching = I915_CACHING_NONE; 4190 break; 4191 } 4192 4193 drm_gem_object_unreference_unlocked(&obj->base); 4194 return 0; 4195 } 4196 4197 int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data, 4198 struct drm_file *file) 4199 { 4200 struct drm_i915_gem_caching *args = data; 4201 struct drm_i915_gem_object *obj; 4202 enum i915_cache_level level; 4203 int ret; 4204 4205 switch (args->caching) { 4206 case I915_CACHING_NONE: 4207 level = I915_CACHE_NONE; 4208 break; 4209 case I915_CACHING_CACHED: 4210 level = I915_CACHE_LLC; 4211 break; 4212 case I915_CACHING_DISPLAY: 4213 level = HAS_WT(dev) ? I915_CACHE_WT : I915_CACHE_NONE; 4214 break; 4215 default: 4216 return -EINVAL; 4217 } 4218 4219 ret = i915_mutex_lock_interruptible(dev); 4220 if (ret) 4221 return ret; 4222 4223 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 4224 if (&obj->base == NULL) { 4225 ret = -ENOENT; 4226 goto unlock; 4227 } 4228 4229 ret = i915_gem_object_set_cache_level(obj, level); 4230 4231 drm_gem_object_unreference(&obj->base); 4232 unlock: 4233 mutex_unlock(&dev->struct_mutex); 4234 return ret; 4235 } 4236 4237 /* 4238 * Prepare buffer for display plane (scanout, cursors, etc). 4239 * Can be called from an uninterruptible phase (modesetting) and allows 4240 * any flushes to be pipelined (for pageflips). 4241 */ 4242 int 4243 i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, 4244 u32 alignment, 4245 struct intel_engine_cs *pipelined, 4246 const struct i915_ggtt_view *view) 4247 { 4248 u32 old_read_domains, old_write_domain; 4249 int ret; 4250 4251 ret = i915_gem_object_sync(obj, pipelined); 4252 if (ret) 4253 return ret; 4254 4255 /* Mark the pin_display early so that we account for the 4256 * display coherency whilst setting up the cache domains. 4257 */ 4258 obj->pin_display++; 4259 4260 /* The display engine is not coherent with the LLC cache on gen6. As 4261 * a result, we make sure that the pinning that is about to occur is 4262 * done with uncached PTEs. This is lowest common denominator for all 4263 * chipsets. 4264 * 4265 * However for gen6+, we could do better by using the GFDT bit instead 4266 * of uncaching, which would allow us to flush all the LLC-cached data 4267 * with that bit in the PTE to main memory with just one PIPE_CONTROL. 4268 */ 4269 ret = i915_gem_object_set_cache_level(obj, 4270 HAS_WT(obj->base.dev) ? I915_CACHE_WT : I915_CACHE_NONE); 4271 if (ret) 4272 goto err_unpin_display; 4273 4274 /* As the user may map the buffer once pinned in the display plane 4275 * (e.g. libkms for the bootup splash), we have to ensure that we 4276 * always use map_and_fenceable for all scanout buffers. 4277 */ 4278 ret = i915_gem_object_ggtt_pin(obj, view, alignment, 4279 view->type == I915_GGTT_VIEW_NORMAL ? 4280 PIN_MAPPABLE : 0); 4281 if (ret) 4282 goto err_unpin_display; 4283 4284 i915_gem_object_flush_cpu_write_domain(obj); 4285 4286 old_write_domain = obj->base.write_domain; 4287 old_read_domains = obj->base.read_domains; 4288 4289 /* It should now be out of any other write domains, and we can update 4290 * the domain values for our changes. 4291 */ 4292 obj->base.write_domain = 0; 4293 obj->base.read_domains |= I915_GEM_DOMAIN_GTT; 4294 4295 trace_i915_gem_object_change_domain(obj, 4296 old_read_domains, 4297 old_write_domain); 4298 4299 return 0; 4300 4301 err_unpin_display: 4302 obj->pin_display--; 4303 return ret; 4304 } 4305 4306 void 4307 i915_gem_object_unpin_from_display_plane(struct drm_i915_gem_object *obj, 4308 const struct i915_ggtt_view *view) 4309 { 4310 if (WARN_ON(obj->pin_display == 0)) 4311 return; 4312 4313 i915_gem_object_ggtt_unpin_view(obj, view); 4314 4315 obj->pin_display--; 4316 } 4317 4318 /** 4319 * Moves a single object to the CPU read, and possibly write domain. 4320 * 4321 * This function returns when the move is complete, including waiting on 4322 * flushes to occur. 4323 */ 4324 int 4325 i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) 4326 { 4327 uint32_t old_write_domain, old_read_domains; 4328 int ret; 4329 4330 if (obj->base.write_domain == I915_GEM_DOMAIN_CPU) 4331 return 0; 4332 4333 ret = i915_gem_object_wait_rendering(obj, !write); 4334 if (ret) 4335 return ret; 4336 4337 i915_gem_object_flush_gtt_write_domain(obj); 4338 4339 old_write_domain = obj->base.write_domain; 4340 old_read_domains = obj->base.read_domains; 4341 4342 /* Flush the CPU cache if it's still invalid. */ 4343 if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) { 4344 i915_gem_clflush_object(obj, false); 4345 4346 obj->base.read_domains |= I915_GEM_DOMAIN_CPU; 4347 } 4348 4349 /* It should now be out of any other write domains, and we can update 4350 * the domain values for our changes. 4351 */ 4352 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_CPU) != 0); 4353 4354 /* If we're writing through the CPU, then the GPU read domains will 4355 * need to be invalidated at next use. 4356 */ 4357 if (write) { 4358 obj->base.read_domains = I915_GEM_DOMAIN_CPU; 4359 obj->base.write_domain = I915_GEM_DOMAIN_CPU; 4360 } 4361 4362 if (write) 4363 intel_fb_obj_invalidate(obj, NULL, ORIGIN_CPU); 4364 4365 trace_i915_gem_object_change_domain(obj, 4366 old_read_domains, 4367 old_write_domain); 4368 4369 return 0; 4370 } 4371 4372 /* Throttle our rendering by waiting until the ring has completed our requests 4373 * emitted over 20 msec ago. 4374 * 4375 * Note that if we were to use the current jiffies each time around the loop, 4376 * we wouldn't escape the function with any frames outstanding if the time to 4377 * render a frame was over 20ms. 4378 * 4379 * This should get us reasonable parallelism between CPU and GPU but also 4380 * relatively low latency when blocking on a particular request to finish. 4381 */ 4382 static int 4383 i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file) 4384 { 4385 struct drm_i915_private *dev_priv = dev->dev_private; 4386 struct drm_i915_file_private *file_priv = file->driver_priv; 4387 unsigned long recent_enough = jiffies - DRM_I915_THROTTLE_JIFFIES; 4388 struct drm_i915_gem_request *request, *target = NULL; 4389 unsigned reset_counter; 4390 int ret; 4391 4392 ret = i915_gem_wait_for_error(&dev_priv->gpu_error); 4393 if (ret) 4394 return ret; 4395 4396 ret = i915_gem_check_wedge(&dev_priv->gpu_error, false); 4397 if (ret) 4398 return ret; 4399 4400 spin_lock(&file_priv->mm.lock); 4401 list_for_each_entry(request, &file_priv->mm.request_list, client_list) { 4402 if (time_after_eq(request->emitted_jiffies, recent_enough)) 4403 break; 4404 4405 target = request; 4406 } 4407 reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter); 4408 if (target) 4409 i915_gem_request_reference(target); 4410 spin_unlock(&file_priv->mm.lock); 4411 4412 if (target == NULL) 4413 return 0; 4414 4415 ret = __i915_wait_request(target, reset_counter, true, NULL, NULL); 4416 if (ret == 0) 4417 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 0); 4418 4419 i915_gem_request_unreference__unlocked(target); 4420 4421 return ret; 4422 } 4423 4424 static bool 4425 i915_vma_misplaced(struct i915_vma *vma, uint32_t alignment, uint64_t flags) 4426 { 4427 struct drm_i915_gem_object *obj = vma->obj; 4428 4429 if (alignment && 4430 vma->node.start & (alignment - 1)) 4431 return true; 4432 4433 if (flags & PIN_MAPPABLE && !obj->map_and_fenceable) 4434 return true; 4435 4436 if (flags & PIN_OFFSET_BIAS && 4437 vma->node.start < (flags & PIN_OFFSET_MASK)) 4438 return true; 4439 4440 return false; 4441 } 4442 4443 static int 4444 i915_gem_object_do_pin(struct drm_i915_gem_object *obj, 4445 struct i915_address_space *vm, 4446 const struct i915_ggtt_view *ggtt_view, 4447 uint32_t alignment, 4448 uint64_t flags) 4449 { 4450 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 4451 struct i915_vma *vma; 4452 unsigned bound; 4453 int ret; 4454 4455 if (WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base)) 4456 return -ENODEV; 4457 4458 if (WARN_ON(flags & (PIN_GLOBAL | PIN_MAPPABLE) && !i915_is_ggtt(vm))) 4459 return -EINVAL; 4460 4461 if (WARN_ON((flags & (PIN_MAPPABLE | PIN_GLOBAL)) == PIN_MAPPABLE)) 4462 return -EINVAL; 4463 4464 if (WARN_ON(i915_is_ggtt(vm) != !!ggtt_view)) 4465 return -EINVAL; 4466 4467 vma = ggtt_view ? i915_gem_obj_to_ggtt_view(obj, ggtt_view) : 4468 i915_gem_obj_to_vma(obj, vm); 4469 4470 if (IS_ERR(vma)) 4471 return PTR_ERR(vma); 4472 4473 if (vma) { 4474 if (WARN_ON(vma->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT)) 4475 return -EBUSY; 4476 4477 if (i915_vma_misplaced(vma, alignment, flags)) { 4478 unsigned long offset; 4479 offset = ggtt_view ? i915_gem_obj_ggtt_offset_view(obj, ggtt_view) : 4480 i915_gem_obj_offset(obj, vm); 4481 WARN(vma->pin_count, 4482 "bo is already pinned in %s with incorrect alignment:" 4483 " offset=%lx, req.alignment=%x, req.map_and_fenceable=%d," 4484 " obj->map_and_fenceable=%d\n", 4485 ggtt_view ? "ggtt" : "ppgtt", 4486 offset, 4487 alignment, 4488 !!(flags & PIN_MAPPABLE), 4489 obj->map_and_fenceable); 4490 ret = i915_vma_unbind(vma); 4491 if (ret) 4492 return ret; 4493 4494 vma = NULL; 4495 } 4496 } 4497 4498 bound = vma ? vma->bound : 0; 4499 if (vma == NULL || !drm_mm_node_allocated(&vma->node)) { 4500 vma = i915_gem_object_bind_to_vm(obj, vm, ggtt_view, alignment, 4501 flags); 4502 if (IS_ERR(vma)) 4503 return PTR_ERR(vma); 4504 } else { 4505 ret = i915_vma_bind(vma, obj->cache_level, flags); 4506 if (ret) 4507 return ret; 4508 } 4509 4510 if (ggtt_view && ggtt_view->type == I915_GGTT_VIEW_NORMAL && 4511 (bound ^ vma->bound) & GLOBAL_BIND) { 4512 bool mappable, fenceable; 4513 u32 fence_size, fence_alignment; 4514 4515 fence_size = i915_gem_get_gtt_size(obj->base.dev, 4516 obj->base.size, 4517 obj->tiling_mode); 4518 fence_alignment = i915_gem_get_gtt_alignment(obj->base.dev, 4519 obj->base.size, 4520 obj->tiling_mode, 4521 true); 4522 4523 fenceable = (vma->node.size == fence_size && 4524 (vma->node.start & (fence_alignment - 1)) == 0); 4525 4526 mappable = (vma->node.start + fence_size <= 4527 dev_priv->gtt.mappable_end); 4528 4529 obj->map_and_fenceable = mappable && fenceable; 4530 4531 WARN_ON(flags & PIN_MAPPABLE && !obj->map_and_fenceable); 4532 } 4533 4534 vma->pin_count++; 4535 return 0; 4536 } 4537 4538 int 4539 i915_gem_object_pin(struct drm_i915_gem_object *obj, 4540 struct i915_address_space *vm, 4541 uint32_t alignment, 4542 uint64_t flags) 4543 { 4544 return i915_gem_object_do_pin(obj, vm, 4545 i915_is_ggtt(vm) ? &i915_ggtt_view_normal : NULL, 4546 alignment, flags); 4547 } 4548 4549 int 4550 i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, 4551 const struct i915_ggtt_view *view, 4552 uint32_t alignment, 4553 uint64_t flags) 4554 { 4555 if (WARN_ONCE(!view, "no view specified")) 4556 return -EINVAL; 4557 4558 return i915_gem_object_do_pin(obj, i915_obj_to_ggtt(obj), view, 4559 alignment, flags | PIN_GLOBAL); 4560 } 4561 4562 void 4563 i915_gem_object_ggtt_unpin_view(struct drm_i915_gem_object *obj, 4564 const struct i915_ggtt_view *view) 4565 { 4566 struct i915_vma *vma = i915_gem_obj_to_ggtt_view(obj, view); 4567 4568 BUG_ON(!vma); 4569 WARN_ON(vma->pin_count == 0); 4570 WARN_ON(!i915_gem_obj_ggtt_bound_view(obj, view)); 4571 4572 --vma->pin_count; 4573 } 4574 4575 bool 4576 i915_gem_object_pin_fence(struct drm_i915_gem_object *obj) 4577 { 4578 if (obj->fence_reg != I915_FENCE_REG_NONE) { 4579 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 4580 struct i915_vma *ggtt_vma = i915_gem_obj_to_ggtt(obj); 4581 4582 WARN_ON(!ggtt_vma || 4583 dev_priv->fence_regs[obj->fence_reg].pin_count > 4584 ggtt_vma->pin_count); 4585 dev_priv->fence_regs[obj->fence_reg].pin_count++; 4586 return true; 4587 } else 4588 return false; 4589 } 4590 4591 void 4592 i915_gem_object_unpin_fence(struct drm_i915_gem_object *obj) 4593 { 4594 if (obj->fence_reg != I915_FENCE_REG_NONE) { 4595 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 4596 WARN_ON(dev_priv->fence_regs[obj->fence_reg].pin_count <= 0); 4597 dev_priv->fence_regs[obj->fence_reg].pin_count--; 4598 } 4599 } 4600 4601 int 4602 i915_gem_busy_ioctl(struct drm_device *dev, void *data, 4603 struct drm_file *file) 4604 { 4605 struct drm_i915_gem_busy *args = data; 4606 struct drm_i915_gem_object *obj; 4607 int ret; 4608 4609 ret = i915_mutex_lock_interruptible(dev); 4610 if (ret) 4611 return ret; 4612 4613 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 4614 if (&obj->base == NULL) { 4615 ret = -ENOENT; 4616 goto unlock; 4617 } 4618 4619 /* Count all active objects as busy, even if they are currently not used 4620 * by the gpu. Users of this interface expect objects to eventually 4621 * become non-busy without any further actions, therefore emit any 4622 * necessary flushes here. 4623 */ 4624 ret = i915_gem_object_flush_active(obj); 4625 if (ret) 4626 goto unref; 4627 4628 BUILD_BUG_ON(I915_NUM_RINGS > 16); 4629 args->busy = obj->active << 16; 4630 if (obj->last_write_req) 4631 args->busy |= obj->last_write_req->ring->id; 4632 4633 unref: 4634 drm_gem_object_unreference(&obj->base); 4635 unlock: 4636 mutex_unlock(&dev->struct_mutex); 4637 return ret; 4638 } 4639 4640 int 4641 i915_gem_throttle_ioctl(struct drm_device *dev, void *data, 4642 struct drm_file *file_priv) 4643 { 4644 return i915_gem_ring_throttle(dev, file_priv); 4645 } 4646 4647 int 4648 i915_gem_madvise_ioctl(struct drm_device *dev, void *data, 4649 struct drm_file *file_priv) 4650 { 4651 struct drm_i915_private *dev_priv = dev->dev_private; 4652 struct drm_i915_gem_madvise *args = data; 4653 struct drm_i915_gem_object *obj; 4654 int ret; 4655 4656 switch (args->madv) { 4657 case I915_MADV_DONTNEED: 4658 case I915_MADV_WILLNEED: 4659 break; 4660 default: 4661 return -EINVAL; 4662 } 4663 4664 ret = i915_mutex_lock_interruptible(dev); 4665 if (ret) 4666 return ret; 4667 4668 obj = to_intel_bo(drm_gem_object_lookup(dev, file_priv, args->handle)); 4669 if (&obj->base == NULL) { 4670 ret = -ENOENT; 4671 goto unlock; 4672 } 4673 4674 if (i915_gem_obj_is_pinned(obj)) { 4675 ret = -EINVAL; 4676 goto out; 4677 } 4678 4679 if (obj->pages && 4680 obj->tiling_mode != I915_TILING_NONE && 4681 dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) { 4682 if (obj->madv == I915_MADV_WILLNEED) 4683 i915_gem_object_unpin_pages(obj); 4684 if (args->madv == I915_MADV_WILLNEED) 4685 i915_gem_object_pin_pages(obj); 4686 } 4687 4688 if (obj->madv != __I915_MADV_PURGED) 4689 obj->madv = args->madv; 4690 4691 /* if the object is no longer attached, discard its backing storage */ 4692 if (obj->madv == I915_MADV_DONTNEED && obj->pages == NULL) 4693 i915_gem_object_truncate(obj); 4694 4695 args->retained = obj->madv != __I915_MADV_PURGED; 4696 4697 out: 4698 drm_gem_object_unreference(&obj->base); 4699 unlock: 4700 mutex_unlock(&dev->struct_mutex); 4701 return ret; 4702 } 4703 4704 void i915_gem_object_init(struct drm_i915_gem_object *obj, 4705 const struct drm_i915_gem_object_ops *ops) 4706 { 4707 int i; 4708 4709 INIT_LIST_HEAD(&obj->global_list); 4710 for (i = 0; i < I915_NUM_RINGS; i++) 4711 INIT_LIST_HEAD(&obj->ring_list[i]); 4712 INIT_LIST_HEAD(&obj->obj_exec_link); 4713 INIT_LIST_HEAD(&obj->vma_list); 4714 INIT_LIST_HEAD(&obj->batch_pool_link); 4715 4716 obj->ops = ops; 4717 4718 obj->fence_reg = I915_FENCE_REG_NONE; 4719 obj->madv = I915_MADV_WILLNEED; 4720 4721 i915_gem_info_add_obj(obj->base.dev->dev_private, obj->base.size); 4722 } 4723 4724 static const struct drm_i915_gem_object_ops i915_gem_object_ops = { 4725 .get_pages = i915_gem_object_get_pages_gtt, 4726 .put_pages = i915_gem_object_put_pages_gtt, 4727 }; 4728 4729 struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev, 4730 size_t size) 4731 { 4732 struct drm_i915_gem_object *obj; 4733 #if 0 4734 struct address_space *mapping; 4735 gfp_t mask; 4736 #endif 4737 4738 obj = i915_gem_object_alloc(dev); 4739 if (obj == NULL) 4740 return NULL; 4741 4742 if (drm_gem_object_init(dev, &obj->base, size) != 0) { 4743 i915_gem_object_free(obj); 4744 return NULL; 4745 } 4746 4747 #if 0 4748 mask = GFP_HIGHUSER | __GFP_RECLAIMABLE; 4749 if (IS_CRESTLINE(dev) || IS_BROADWATER(dev)) { 4750 /* 965gm cannot relocate objects above 4GiB. */ 4751 mask &= ~__GFP_HIGHMEM; 4752 mask |= __GFP_DMA32; 4753 } 4754 4755 mapping = file_inode(obj->base.filp)->i_mapping; 4756 mapping_set_gfp_mask(mapping, mask); 4757 #endif 4758 4759 i915_gem_object_init(obj, &i915_gem_object_ops); 4760 4761 obj->base.write_domain = I915_GEM_DOMAIN_CPU; 4762 obj->base.read_domains = I915_GEM_DOMAIN_CPU; 4763 4764 if (HAS_LLC(dev)) { 4765 /* On some devices, we can have the GPU use the LLC (the CPU 4766 * cache) for about a 10% performance improvement 4767 * compared to uncached. Graphics requests other than 4768 * display scanout are coherent with the CPU in 4769 * accessing this cache. This means in this mode we 4770 * don't need to clflush on the CPU side, and on the 4771 * GPU side we only need to flush internal caches to 4772 * get data visible to the CPU. 4773 * 4774 * However, we maintain the display planes as UC, and so 4775 * need to rebind when first used as such. 4776 */ 4777 obj->cache_level = I915_CACHE_LLC; 4778 } else 4779 obj->cache_level = I915_CACHE_NONE; 4780 4781 trace_i915_gem_object_create(obj); 4782 4783 return obj; 4784 } 4785 4786 static bool discard_backing_storage(struct drm_i915_gem_object *obj) 4787 { 4788 /* If we are the last user of the backing storage (be it shmemfs 4789 * pages or stolen etc), we know that the pages are going to be 4790 * immediately released. In this case, we can then skip copying 4791 * back the contents from the GPU. 4792 */ 4793 4794 if (obj->madv != I915_MADV_WILLNEED) 4795 return false; 4796 4797 if (obj->base.vm_obj == NULL) 4798 return true; 4799 4800 /* At first glance, this looks racy, but then again so would be 4801 * userspace racing mmap against close. However, the first external 4802 * reference to the filp can only be obtained through the 4803 * i915_gem_mmap_ioctl() which safeguards us against the user 4804 * acquiring such a reference whilst we are in the middle of 4805 * freeing the object. 4806 */ 4807 #if 0 4808 return atomic_long_read(&obj->base.filp->f_count) == 1; 4809 #else 4810 return false; 4811 #endif 4812 } 4813 4814 void i915_gem_free_object(struct drm_gem_object *gem_obj) 4815 { 4816 struct drm_i915_gem_object *obj = to_intel_bo(gem_obj); 4817 struct drm_device *dev = obj->base.dev; 4818 struct drm_i915_private *dev_priv = dev->dev_private; 4819 struct i915_vma *vma, *next; 4820 4821 intel_runtime_pm_get(dev_priv); 4822 4823 trace_i915_gem_object_destroy(obj); 4824 4825 list_for_each_entry_safe(vma, next, &obj->vma_list, vma_link) { 4826 int ret; 4827 4828 vma->pin_count = 0; 4829 ret = i915_vma_unbind(vma); 4830 if (WARN_ON(ret == -ERESTARTSYS)) { 4831 bool was_interruptible; 4832 4833 was_interruptible = dev_priv->mm.interruptible; 4834 dev_priv->mm.interruptible = false; 4835 4836 WARN_ON(i915_vma_unbind(vma)); 4837 4838 dev_priv->mm.interruptible = was_interruptible; 4839 } 4840 } 4841 4842 /* Stolen objects don't hold a ref, but do hold pin count. Fix that up 4843 * before progressing. */ 4844 if (obj->stolen) 4845 i915_gem_object_unpin_pages(obj); 4846 4847 WARN_ON(obj->frontbuffer_bits); 4848 4849 if (obj->pages && obj->madv == I915_MADV_WILLNEED && 4850 dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES && 4851 obj->tiling_mode != I915_TILING_NONE) 4852 i915_gem_object_unpin_pages(obj); 4853 4854 if (WARN_ON(obj->pages_pin_count)) 4855 obj->pages_pin_count = 0; 4856 if (discard_backing_storage(obj)) 4857 obj->madv = I915_MADV_DONTNEED; 4858 i915_gem_object_put_pages(obj); 4859 i915_gem_object_free_mmap_offset(obj); 4860 4861 BUG_ON(obj->pages); 4862 4863 #if 0 4864 if (obj->base.import_attach) 4865 drm_prime_gem_destroy(&obj->base, NULL); 4866 #endif 4867 4868 if (obj->ops->release) 4869 obj->ops->release(obj); 4870 4871 drm_gem_object_release(&obj->base); 4872 i915_gem_info_remove_obj(dev_priv, obj->base.size); 4873 4874 kfree(obj->bit_17); 4875 i915_gem_object_free(obj); 4876 4877 intel_runtime_pm_put(dev_priv); 4878 } 4879 4880 struct i915_vma *i915_gem_obj_to_vma(struct drm_i915_gem_object *obj, 4881 struct i915_address_space *vm) 4882 { 4883 struct i915_vma *vma; 4884 list_for_each_entry(vma, &obj->vma_list, vma_link) { 4885 if (i915_is_ggtt(vma->vm) && 4886 vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL) 4887 continue; 4888 if (vma->vm == vm) 4889 return vma; 4890 } 4891 return NULL; 4892 } 4893 4894 struct i915_vma *i915_gem_obj_to_ggtt_view(struct drm_i915_gem_object *obj, 4895 const struct i915_ggtt_view *view) 4896 { 4897 struct i915_address_space *ggtt = i915_obj_to_ggtt(obj); 4898 struct i915_vma *vma; 4899 4900 if (WARN_ONCE(!view, "no view specified")) 4901 return ERR_PTR(-EINVAL); 4902 4903 list_for_each_entry(vma, &obj->vma_list, vma_link) 4904 if (vma->vm == ggtt && 4905 i915_ggtt_view_equal(&vma->ggtt_view, view)) 4906 return vma; 4907 return NULL; 4908 } 4909 4910 void i915_gem_vma_destroy(struct i915_vma *vma) 4911 { 4912 struct i915_address_space *vm = NULL; 4913 WARN_ON(vma->node.allocated); 4914 4915 /* Keep the vma as a placeholder in the execbuffer reservation lists */ 4916 if (!list_empty(&vma->exec_list)) 4917 return; 4918 4919 vm = vma->vm; 4920 4921 if (!i915_is_ggtt(vm)) 4922 i915_ppgtt_put(i915_vm_to_ppgtt(vm)); 4923 4924 list_del(&vma->vma_link); 4925 4926 kfree(vma); 4927 } 4928 4929 static void 4930 i915_gem_stop_ringbuffers(struct drm_device *dev) 4931 { 4932 struct drm_i915_private *dev_priv = dev->dev_private; 4933 struct intel_engine_cs *ring; 4934 int i; 4935 4936 for_each_ring(ring, dev_priv, i) 4937 dev_priv->gt.stop_ring(ring); 4938 } 4939 4940 int 4941 i915_gem_suspend(struct drm_device *dev) 4942 { 4943 struct drm_i915_private *dev_priv = dev->dev_private; 4944 int ret = 0; 4945 4946 mutex_lock(&dev->struct_mutex); 4947 ret = i915_gpu_idle(dev); 4948 if (ret) 4949 goto err; 4950 4951 i915_gem_retire_requests(dev); 4952 4953 i915_gem_stop_ringbuffers(dev); 4954 mutex_unlock(&dev->struct_mutex); 4955 4956 cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work); 4957 cancel_delayed_work_sync(&dev_priv->mm.retire_work); 4958 #if 0 4959 flush_delayed_work(&dev_priv->mm.idle_work); 4960 #endif 4961 4962 /* Assert that we sucessfully flushed all the work and 4963 * reset the GPU back to its idle, low power state. 4964 */ 4965 WARN_ON(dev_priv->mm.busy); 4966 4967 return 0; 4968 4969 err: 4970 mutex_unlock(&dev->struct_mutex); 4971 return ret; 4972 } 4973 4974 int i915_gem_l3_remap(struct intel_engine_cs *ring, int slice) 4975 { 4976 struct drm_device *dev = ring->dev; 4977 struct drm_i915_private *dev_priv = dev->dev_private; 4978 u32 reg_base = GEN7_L3LOG_BASE + (slice * 0x200); 4979 u32 *remap_info = dev_priv->l3_parity.remap_info[slice]; 4980 int i, ret; 4981 4982 if (!HAS_L3_DPF(dev) || !remap_info) 4983 return 0; 4984 4985 ret = intel_ring_begin(ring, GEN7_L3LOG_SIZE / 4 * 3); 4986 if (ret) 4987 return ret; 4988 4989 /* 4990 * Note: We do not worry about the concurrent register cacheline hang 4991 * here because no other code should access these registers other than 4992 * at initialization time. 4993 */ 4994 for (i = 0; i < GEN7_L3LOG_SIZE; i += 4) { 4995 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); 4996 intel_ring_emit(ring, reg_base + i); 4997 intel_ring_emit(ring, remap_info[i/4]); 4998 } 4999 5000 intel_ring_advance(ring); 5001 5002 return ret; 5003 } 5004 5005 void i915_gem_init_swizzling(struct drm_device *dev) 5006 { 5007 struct drm_i915_private *dev_priv = dev->dev_private; 5008 5009 if (INTEL_INFO(dev)->gen < 5 || 5010 dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE) 5011 return; 5012 5013 I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) | 5014 DISP_TILE_SURFACE_SWIZZLING); 5015 5016 if (IS_GEN5(dev)) 5017 return; 5018 5019 I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_SWZCTL); 5020 if (IS_GEN6(dev)) 5021 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_SNB)); 5022 else if (IS_GEN7(dev)) 5023 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_IVB)); 5024 else if (IS_GEN8(dev)) 5025 I915_WRITE(GAMTARBMODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_BDW)); 5026 else 5027 BUG(); 5028 } 5029 5030 static bool 5031 intel_enable_blt(struct drm_device *dev) 5032 { 5033 if (!HAS_BLT(dev)) 5034 return false; 5035 5036 /* The blitter was dysfunctional on early prototypes */ 5037 if (IS_GEN6(dev) && dev->pdev->revision < 8) { 5038 DRM_INFO("BLT not supported on this pre-production hardware;" 5039 " graphics performance will be degraded.\n"); 5040 return false; 5041 } 5042 5043 return true; 5044 } 5045 5046 static void init_unused_ring(struct drm_device *dev, u32 base) 5047 { 5048 struct drm_i915_private *dev_priv = dev->dev_private; 5049 5050 I915_WRITE(RING_CTL(base), 0); 5051 I915_WRITE(RING_HEAD(base), 0); 5052 I915_WRITE(RING_TAIL(base), 0); 5053 I915_WRITE(RING_START(base), 0); 5054 } 5055 5056 static void init_unused_rings(struct drm_device *dev) 5057 { 5058 if (IS_I830(dev)) { 5059 init_unused_ring(dev, PRB1_BASE); 5060 init_unused_ring(dev, SRB0_BASE); 5061 init_unused_ring(dev, SRB1_BASE); 5062 init_unused_ring(dev, SRB2_BASE); 5063 init_unused_ring(dev, SRB3_BASE); 5064 } else if (IS_GEN2(dev)) { 5065 init_unused_ring(dev, SRB0_BASE); 5066 init_unused_ring(dev, SRB1_BASE); 5067 } else if (IS_GEN3(dev)) { 5068 init_unused_ring(dev, PRB1_BASE); 5069 init_unused_ring(dev, PRB2_BASE); 5070 } 5071 } 5072 5073 int i915_gem_init_rings(struct drm_device *dev) 5074 { 5075 struct drm_i915_private *dev_priv = dev->dev_private; 5076 int ret; 5077 5078 ret = intel_init_render_ring_buffer(dev); 5079 if (ret) 5080 return ret; 5081 5082 if (HAS_BSD(dev)) { 5083 ret = intel_init_bsd_ring_buffer(dev); 5084 if (ret) 5085 goto cleanup_render_ring; 5086 } 5087 5088 if (intel_enable_blt(dev)) { 5089 ret = intel_init_blt_ring_buffer(dev); 5090 if (ret) 5091 goto cleanup_bsd_ring; 5092 } 5093 5094 if (HAS_VEBOX(dev)) { 5095 ret = intel_init_vebox_ring_buffer(dev); 5096 if (ret) 5097 goto cleanup_blt_ring; 5098 } 5099 5100 if (HAS_BSD2(dev)) { 5101 ret = intel_init_bsd2_ring_buffer(dev); 5102 if (ret) 5103 goto cleanup_vebox_ring; 5104 } 5105 5106 ret = i915_gem_set_seqno(dev, ((u32)~0 - 0x1000)); 5107 if (ret) 5108 goto cleanup_bsd2_ring; 5109 5110 return 0; 5111 5112 cleanup_bsd2_ring: 5113 intel_cleanup_ring_buffer(&dev_priv->ring[VCS2]); 5114 cleanup_vebox_ring: 5115 intel_cleanup_ring_buffer(&dev_priv->ring[VECS]); 5116 cleanup_blt_ring: 5117 intel_cleanup_ring_buffer(&dev_priv->ring[BCS]); 5118 cleanup_bsd_ring: 5119 intel_cleanup_ring_buffer(&dev_priv->ring[VCS]); 5120 cleanup_render_ring: 5121 intel_cleanup_ring_buffer(&dev_priv->ring[RCS]); 5122 5123 return ret; 5124 } 5125 5126 int 5127 i915_gem_init_hw(struct drm_device *dev) 5128 { 5129 struct drm_i915_private *dev_priv = dev->dev_private; 5130 struct intel_engine_cs *ring; 5131 int ret, i; 5132 5133 #if 0 5134 if (INTEL_INFO(dev)->gen < 6 && !intel_enable_gtt()) 5135 return -EIO; 5136 #endif 5137 5138 /* Double layer security blanket, see i915_gem_init() */ 5139 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 5140 5141 if (dev_priv->ellc_size) 5142 I915_WRITE(HSW_IDICR, I915_READ(HSW_IDICR) | IDIHASHMSK(0xf)); 5143 5144 if (IS_HASWELL(dev)) 5145 I915_WRITE(MI_PREDICATE_RESULT_2, IS_HSW_GT3(dev) ? 5146 LOWER_SLICE_ENABLED : LOWER_SLICE_DISABLED); 5147 5148 if (HAS_PCH_NOP(dev)) { 5149 if (IS_IVYBRIDGE(dev)) { 5150 u32 temp = I915_READ(GEN7_MSG_CTL); 5151 temp &= ~(WAIT_FOR_PCH_FLR_ACK | WAIT_FOR_PCH_RESET_ACK); 5152 I915_WRITE(GEN7_MSG_CTL, temp); 5153 } else if (INTEL_INFO(dev)->gen >= 7) { 5154 u32 temp = I915_READ(HSW_NDE_RSTWRN_OPT); 5155 temp &= ~RESET_PCH_HANDSHAKE_ENABLE; 5156 I915_WRITE(HSW_NDE_RSTWRN_OPT, temp); 5157 } 5158 } 5159 5160 i915_gem_init_swizzling(dev); 5161 5162 /* 5163 * At least 830 can leave some of the unused rings 5164 * "active" (ie. head != tail) after resume which 5165 * will prevent c3 entry. Makes sure all unused rings 5166 * are totally idle. 5167 */ 5168 init_unused_rings(dev); 5169 5170 for_each_ring(ring, dev_priv, i) { 5171 ret = ring->init_hw(ring); 5172 if (ret) 5173 goto out; 5174 } 5175 5176 for (i = 0; i < NUM_L3_SLICES(dev); i++) 5177 i915_gem_l3_remap(&dev_priv->ring[RCS], i); 5178 5179 ret = i915_ppgtt_init_hw(dev); 5180 if (ret && ret != -EIO) { 5181 DRM_ERROR("PPGTT enable failed %d\n", ret); 5182 i915_gem_cleanup_ringbuffer(dev); 5183 } 5184 5185 ret = i915_gem_context_enable(dev_priv); 5186 if (ret && ret != -EIO) { 5187 DRM_ERROR("Context enable failed %d\n", ret); 5188 i915_gem_cleanup_ringbuffer(dev); 5189 5190 goto out; 5191 } 5192 5193 out: 5194 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 5195 return ret; 5196 } 5197 5198 int i915_gem_init(struct drm_device *dev) 5199 { 5200 struct drm_i915_private *dev_priv = dev->dev_private; 5201 int ret; 5202 5203 i915.enable_execlists = intel_sanitize_enable_execlists(dev, 5204 i915.enable_execlists); 5205 5206 mutex_lock(&dev->struct_mutex); 5207 5208 if (IS_VALLEYVIEW(dev)) { 5209 /* VLVA0 (potential hack), BIOS isn't actually waking us */ 5210 I915_WRITE(VLV_GTLC_WAKE_CTRL, VLV_GTLC_ALLOWWAKEREQ); 5211 if (wait_for((I915_READ(VLV_GTLC_PW_STATUS) & 5212 VLV_GTLC_ALLOWWAKEACK), 10)) 5213 DRM_DEBUG_DRIVER("allow wake ack timed out\n"); 5214 } 5215 5216 if (!i915.enable_execlists) { 5217 dev_priv->gt.execbuf_submit = i915_gem_ringbuffer_submission; 5218 dev_priv->gt.init_rings = i915_gem_init_rings; 5219 dev_priv->gt.cleanup_ring = intel_cleanup_ring_buffer; 5220 dev_priv->gt.stop_ring = intel_stop_ring_buffer; 5221 } else { 5222 dev_priv->gt.execbuf_submit = intel_execlists_submission; 5223 dev_priv->gt.init_rings = intel_logical_rings_init; 5224 dev_priv->gt.cleanup_ring = intel_logical_ring_cleanup; 5225 dev_priv->gt.stop_ring = intel_logical_ring_stop; 5226 } 5227 5228 /* This is just a security blanket to placate dragons. 5229 * On some systems, we very sporadically observe that the first TLBs 5230 * used by the CS may be stale, despite us poking the TLB reset. If 5231 * we hold the forcewake during initialisation these problems 5232 * just magically go away. 5233 */ 5234 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 5235 5236 ret = i915_gem_init_userptr(dev); 5237 if (ret) 5238 goto out_unlock; 5239 5240 i915_gem_init_global_gtt(dev); 5241 5242 ret = i915_gem_context_init(dev); 5243 if (ret) 5244 goto out_unlock; 5245 5246 ret = dev_priv->gt.init_rings(dev); 5247 if (ret) 5248 goto out_unlock; 5249 5250 ret = i915_gem_init_hw(dev); 5251 if (ret == -EIO) { 5252 /* Allow ring initialisation to fail by marking the GPU as 5253 * wedged. But we only want to do this where the GPU is angry, 5254 * for all other failure, such as an allocation failure, bail. 5255 */ 5256 DRM_ERROR("Failed to initialize GPU, declaring it wedged\n"); 5257 atomic_set_mask(I915_WEDGED, &dev_priv->gpu_error.reset_counter); 5258 ret = 0; 5259 } 5260 5261 out_unlock: 5262 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 5263 mutex_unlock(&dev->struct_mutex); 5264 5265 return ret; 5266 } 5267 5268 void 5269 i915_gem_cleanup_ringbuffer(struct drm_device *dev) 5270 { 5271 struct drm_i915_private *dev_priv = dev->dev_private; 5272 struct intel_engine_cs *ring; 5273 int i; 5274 5275 for_each_ring(ring, dev_priv, i) 5276 dev_priv->gt.cleanup_ring(ring); 5277 } 5278 5279 static void 5280 init_ring_lists(struct intel_engine_cs *ring) 5281 { 5282 INIT_LIST_HEAD(&ring->active_list); 5283 INIT_LIST_HEAD(&ring->request_list); 5284 } 5285 5286 void i915_init_vm(struct drm_i915_private *dev_priv, 5287 struct i915_address_space *vm) 5288 { 5289 if (!i915_is_ggtt(vm)) 5290 drm_mm_init(&vm->mm, vm->start, vm->total); 5291 vm->dev = dev_priv->dev; 5292 INIT_LIST_HEAD(&vm->active_list); 5293 INIT_LIST_HEAD(&vm->inactive_list); 5294 INIT_LIST_HEAD(&vm->global_link); 5295 list_add_tail(&vm->global_link, &dev_priv->vm_list); 5296 } 5297 5298 void 5299 i915_gem_load(struct drm_device *dev) 5300 { 5301 struct drm_i915_private *dev_priv = dev->dev_private; 5302 int i; 5303 5304 INIT_LIST_HEAD(&dev_priv->vm_list); 5305 i915_init_vm(dev_priv, &dev_priv->gtt.base); 5306 5307 INIT_LIST_HEAD(&dev_priv->context_list); 5308 INIT_LIST_HEAD(&dev_priv->mm.unbound_list); 5309 INIT_LIST_HEAD(&dev_priv->mm.bound_list); 5310 INIT_LIST_HEAD(&dev_priv->mm.fence_list); 5311 for (i = 0; i < I915_NUM_RINGS; i++) 5312 init_ring_lists(&dev_priv->ring[i]); 5313 for (i = 0; i < I915_MAX_NUM_FENCES; i++) 5314 INIT_LIST_HEAD(&dev_priv->fence_regs[i].lru_list); 5315 INIT_DELAYED_WORK(&dev_priv->mm.retire_work, 5316 i915_gem_retire_work_handler); 5317 INIT_DELAYED_WORK(&dev_priv->mm.idle_work, 5318 i915_gem_idle_work_handler); 5319 init_waitqueue_head(&dev_priv->gpu_error.reset_queue); 5320 5321 dev_priv->relative_constants_mode = I915_EXEC_CONSTANTS_REL_GENERAL; 5322 5323 if (INTEL_INFO(dev)->gen >= 7 && !IS_VALLEYVIEW(dev)) 5324 dev_priv->num_fence_regs = 32; 5325 else if (INTEL_INFO(dev)->gen >= 4 || IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev)) 5326 dev_priv->num_fence_regs = 16; 5327 else 5328 dev_priv->num_fence_regs = 8; 5329 5330 if (intel_vgpu_active(dev)) 5331 dev_priv->num_fence_regs = 5332 I915_READ(vgtif_reg(avail_rs.fence_num)); 5333 5334 /* Initialize fence registers to zero */ 5335 INIT_LIST_HEAD(&dev_priv->mm.fence_list); 5336 i915_gem_restore_fences(dev); 5337 5338 i915_gem_detect_bit_6_swizzle(dev); 5339 init_waitqueue_head(&dev_priv->pending_flip_queue); 5340 5341 dev_priv->mm.interruptible = true; 5342 5343 i915_gem_shrinker_init(dev_priv); 5344 5345 lockinit(&dev_priv->fb_tracking.lock, "drmftl", 0, LK_CANRECURSE); 5346 } 5347 5348 void i915_gem_release(struct drm_device *dev, struct drm_file *file) 5349 { 5350 struct drm_i915_file_private *file_priv = file->driver_priv; 5351 5352 /* Clean up our request list when the client is going away, so that 5353 * later retire_requests won't dereference our soon-to-be-gone 5354 * file_priv. 5355 */ 5356 spin_lock(&file_priv->mm.lock); 5357 while (!list_empty(&file_priv->mm.request_list)) { 5358 struct drm_i915_gem_request *request; 5359 5360 request = list_first_entry(&file_priv->mm.request_list, 5361 struct drm_i915_gem_request, 5362 client_list); 5363 list_del(&request->client_list); 5364 request->file_priv = NULL; 5365 } 5366 spin_unlock(&file_priv->mm.lock); 5367 5368 if (!list_empty(&file_priv->rps.link)) { 5369 lockmgr(&to_i915(dev)->rps.client_lock, LK_EXCLUSIVE); 5370 list_del(&file_priv->rps.link); 5371 lockmgr(&to_i915(dev)->rps.client_lock, LK_RELEASE); 5372 } 5373 } 5374 5375 int 5376 i915_gem_pager_ctor(void *handle, vm_ooffset_t size, vm_prot_t prot, 5377 vm_ooffset_t foff, struct ucred *cred, u_short *color) 5378 { 5379 *color = 0; /* XXXKIB */ 5380 return (0); 5381 } 5382 5383 void 5384 i915_gem_pager_dtor(void *handle) 5385 { 5386 struct drm_gem_object *obj; 5387 struct drm_device *dev; 5388 5389 obj = handle; 5390 dev = obj->dev; 5391 5392 mutex_lock(&dev->struct_mutex); 5393 drm_gem_free_mmap_offset(obj); 5394 i915_gem_release_mmap(to_intel_bo(obj)); 5395 drm_gem_object_unreference(obj); 5396 mutex_unlock(&dev->struct_mutex); 5397 } 5398 5399 int i915_gem_open(struct drm_device *dev, struct drm_file *file) 5400 { 5401 struct drm_i915_file_private *file_priv; 5402 int ret; 5403 5404 DRM_DEBUG_DRIVER("\n"); 5405 5406 file_priv = kzalloc(sizeof(*file_priv), GFP_KERNEL); 5407 if (!file_priv) 5408 return -ENOMEM; 5409 5410 file->driver_priv = file_priv; 5411 file_priv->dev_priv = dev->dev_private; 5412 file_priv->file = file; 5413 INIT_LIST_HEAD(&file_priv->rps.link); 5414 5415 spin_init(&file_priv->mm.lock, "i915_priv"); 5416 INIT_LIST_HEAD(&file_priv->mm.request_list); 5417 5418 ret = i915_gem_context_open(dev, file); 5419 if (ret) 5420 kfree(file_priv); 5421 5422 return ret; 5423 } 5424 5425 /** 5426 * i915_gem_track_fb - update frontbuffer tracking 5427 * old: current GEM buffer for the frontbuffer slots 5428 * new: new GEM buffer for the frontbuffer slots 5429 * frontbuffer_bits: bitmask of frontbuffer slots 5430 * 5431 * This updates the frontbuffer tracking bits @frontbuffer_bits by clearing them 5432 * from @old and setting them in @new. Both @old and @new can be NULL. 5433 */ 5434 void i915_gem_track_fb(struct drm_i915_gem_object *old, 5435 struct drm_i915_gem_object *new, 5436 unsigned frontbuffer_bits) 5437 { 5438 if (old) { 5439 WARN_ON(!mutex_is_locked(&old->base.dev->struct_mutex)); 5440 WARN_ON(!(old->frontbuffer_bits & frontbuffer_bits)); 5441 old->frontbuffer_bits &= ~frontbuffer_bits; 5442 } 5443 5444 if (new) { 5445 WARN_ON(!mutex_is_locked(&new->base.dev->struct_mutex)); 5446 WARN_ON(new->frontbuffer_bits & frontbuffer_bits); 5447 new->frontbuffer_bits |= frontbuffer_bits; 5448 } 5449 } 5450 5451 /* All the new VM stuff */ 5452 unsigned long 5453 i915_gem_obj_offset(struct drm_i915_gem_object *o, 5454 struct i915_address_space *vm) 5455 { 5456 struct drm_i915_private *dev_priv = o->base.dev->dev_private; 5457 struct i915_vma *vma; 5458 5459 WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base); 5460 5461 list_for_each_entry(vma, &o->vma_list, vma_link) { 5462 if (i915_is_ggtt(vma->vm) && 5463 vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL) 5464 continue; 5465 if (vma->vm == vm) 5466 return vma->node.start; 5467 } 5468 5469 WARN(1, "%s vma for this object not found.\n", 5470 i915_is_ggtt(vm) ? "global" : "ppgtt"); 5471 return -1; 5472 } 5473 5474 unsigned long 5475 i915_gem_obj_ggtt_offset_view(struct drm_i915_gem_object *o, 5476 const struct i915_ggtt_view *view) 5477 { 5478 struct i915_address_space *ggtt = i915_obj_to_ggtt(o); 5479 struct i915_vma *vma; 5480 5481 list_for_each_entry(vma, &o->vma_list, vma_link) 5482 if (vma->vm == ggtt && 5483 i915_ggtt_view_equal(&vma->ggtt_view, view)) 5484 return vma->node.start; 5485 5486 WARN(1, "global vma for this object not found. (view=%u)\n", view->type); 5487 return -1; 5488 } 5489 5490 bool i915_gem_obj_bound(struct drm_i915_gem_object *o, 5491 struct i915_address_space *vm) 5492 { 5493 struct i915_vma *vma; 5494 5495 list_for_each_entry(vma, &o->vma_list, vma_link) { 5496 if (i915_is_ggtt(vma->vm) && 5497 vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL) 5498 continue; 5499 if (vma->vm == vm && drm_mm_node_allocated(&vma->node)) 5500 return true; 5501 } 5502 5503 return false; 5504 } 5505 5506 bool i915_gem_obj_ggtt_bound_view(struct drm_i915_gem_object *o, 5507 const struct i915_ggtt_view *view) 5508 { 5509 struct i915_address_space *ggtt = i915_obj_to_ggtt(o); 5510 struct i915_vma *vma; 5511 5512 list_for_each_entry(vma, &o->vma_list, vma_link) 5513 if (vma->vm == ggtt && 5514 i915_ggtt_view_equal(&vma->ggtt_view, view) && 5515 drm_mm_node_allocated(&vma->node)) 5516 return true; 5517 5518 return false; 5519 } 5520 5521 bool i915_gem_obj_bound_any(struct drm_i915_gem_object *o) 5522 { 5523 struct i915_vma *vma; 5524 5525 list_for_each_entry(vma, &o->vma_list, vma_link) 5526 if (drm_mm_node_allocated(&vma->node)) 5527 return true; 5528 5529 return false; 5530 } 5531 5532 unsigned long i915_gem_obj_size(struct drm_i915_gem_object *o, 5533 struct i915_address_space *vm) 5534 { 5535 struct drm_i915_private *dev_priv = o->base.dev->dev_private; 5536 struct i915_vma *vma; 5537 5538 WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base); 5539 5540 BUG_ON(list_empty(&o->vma_list)); 5541 5542 list_for_each_entry(vma, &o->vma_list, vma_link) { 5543 if (i915_is_ggtt(vma->vm) && 5544 vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL) 5545 continue; 5546 if (vma->vm == vm) 5547 return vma->node.size; 5548 } 5549 return 0; 5550 } 5551 5552 bool i915_gem_obj_is_pinned(struct drm_i915_gem_object *obj) 5553 { 5554 struct i915_vma *vma; 5555 list_for_each_entry(vma, &obj->vma_list, vma_link) 5556 if (vma->pin_count > 0) 5557 return true; 5558 5559 return false; 5560 } 5561 5562