1 /* 2 * Copyright © 2008-2015 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Eric Anholt <eric@anholt.net> 25 * 26 */ 27 28 #include <drm/drmP.h> 29 #include <drm/drm_vma_manager.h> 30 #include <drm/i915_drm.h> 31 #include "i915_drv.h" 32 #include "i915_gem_dmabuf.h" 33 #include "i915_vgpu.h" 34 #include "i915_trace.h" 35 #include "intel_drv.h" 36 #include "intel_frontbuffer.h" 37 #include "intel_mocs.h" 38 #include <linux/reservation.h> 39 #include <linux/shmem_fs.h> 40 #include <linux/slab.h> 41 #include <linux/swap.h> 42 #include <linux/pci.h> 43 #include <linux/dma-buf.h> 44 45 #include <sys/mman.h> 46 #include <vm/vm_map.h> 47 #include <vm/vm_param.h> 48 49 static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj); 50 static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj); 51 52 static bool cpu_cache_is_coherent(struct drm_device *dev, 53 enum i915_cache_level level) 54 { 55 return HAS_LLC(dev) || level != I915_CACHE_NONE; 56 } 57 58 static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj) 59 { 60 if (obj->base.write_domain == I915_GEM_DOMAIN_CPU) 61 return false; 62 63 if (!cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) 64 return true; 65 66 return obj->pin_display; 67 } 68 69 static int 70 insert_mappable_node(struct drm_i915_private *i915, 71 struct drm_mm_node *node, u32 size) 72 { 73 memset(node, 0, sizeof(*node)); 74 return drm_mm_insert_node_in_range_generic(&i915->ggtt.base.mm, node, 75 size, 0, 0, 0, 76 i915->ggtt.mappable_end, 77 DRM_MM_SEARCH_DEFAULT, 78 DRM_MM_CREATE_DEFAULT); 79 } 80 81 static void 82 remove_mappable_node(struct drm_mm_node *node) 83 { 84 drm_mm_remove_node(node); 85 } 86 87 /* some bookkeeping */ 88 static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv, 89 size_t size) 90 { 91 lockmgr(&dev_priv->mm.object_stat_lock, LK_EXCLUSIVE); 92 dev_priv->mm.object_count++; 93 dev_priv->mm.object_memory += size; 94 lockmgr(&dev_priv->mm.object_stat_lock, LK_RELEASE); 95 } 96 97 static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv, 98 size_t size) 99 { 100 lockmgr(&dev_priv->mm.object_stat_lock, LK_EXCLUSIVE); 101 dev_priv->mm.object_count--; 102 dev_priv->mm.object_memory -= size; 103 lockmgr(&dev_priv->mm.object_stat_lock, LK_RELEASE); 104 } 105 106 static int 107 i915_gem_wait_for_error(struct i915_gpu_error *error) 108 { 109 int ret; 110 111 if (!i915_reset_in_progress(error)) 112 return 0; 113 114 /* 115 * Only wait 10 seconds for the gpu reset to complete to avoid hanging 116 * userspace. If it takes that long something really bad is going on and 117 * we should simply try to bail out and fail as gracefully as possible. 118 */ 119 ret = wait_event_interruptible_timeout(error->reset_queue, 120 !i915_reset_in_progress(error), 121 10*HZ); 122 if (ret == 0) { 123 DRM_ERROR("Timed out waiting for the gpu reset to complete\n"); 124 return -EIO; 125 } else if (ret < 0) { 126 return ret; 127 } else { 128 return 0; 129 } 130 } 131 132 int i915_mutex_lock_interruptible(struct drm_device *dev) 133 { 134 struct drm_i915_private *dev_priv = to_i915(dev); 135 int ret; 136 137 ret = i915_gem_wait_for_error(&dev_priv->gpu_error); 138 if (ret) 139 return ret; 140 141 ret = mutex_lock_interruptible(&dev->struct_mutex); 142 if (ret) 143 return ret; 144 145 return 0; 146 } 147 148 int 149 i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, 150 struct drm_file *file) 151 { 152 struct drm_i915_private *dev_priv = to_i915(dev); 153 struct i915_ggtt *ggtt = &dev_priv->ggtt; 154 struct drm_i915_gem_get_aperture *args = data; 155 struct i915_vma *vma; 156 size_t pinned; 157 158 pinned = 0; 159 mutex_lock(&dev->struct_mutex); 160 list_for_each_entry(vma, &ggtt->base.active_list, vm_link) 161 if (i915_vma_is_pinned(vma)) 162 pinned += vma->node.size; 163 list_for_each_entry(vma, &ggtt->base.inactive_list, vm_link) 164 if (i915_vma_is_pinned(vma)) 165 pinned += vma->node.size; 166 mutex_unlock(&dev->struct_mutex); 167 168 args->aper_size = ggtt->base.total; 169 args->aper_available_size = args->aper_size - pinned; 170 171 return 0; 172 } 173 174 #if 0 175 static int 176 i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj) 177 { 178 struct address_space *mapping = file_inode(obj->base.filp)->i_mapping; 179 char *vaddr = obj->phys_handle->vaddr; 180 struct sg_table *st; 181 struct scatterlist *sg; 182 int i; 183 184 if (WARN_ON(i915_gem_object_needs_bit17_swizzle(obj))) 185 return -EINVAL; 186 187 for (i = 0; i < obj->base.size / PAGE_SIZE; i++) { 188 struct page *page; 189 char *src; 190 191 page = shmem_read_mapping_page(mapping, i); 192 if (IS_ERR(page)) 193 return PTR_ERR(page); 194 195 src = kmap_atomic(page); 196 memcpy(vaddr, src, PAGE_SIZE); 197 drm_clflush_virt_range(vaddr, PAGE_SIZE); 198 kunmap_atomic(src); 199 200 put_page(page); 201 vaddr += PAGE_SIZE; 202 } 203 204 i915_gem_chipset_flush(to_i915(obj->base.dev)); 205 206 st = kmalloc(sizeof(*st), GFP_KERNEL); 207 if (st == NULL) 208 return -ENOMEM; 209 210 if (sg_alloc_table(st, 1, GFP_KERNEL)) { 211 kfree(st); 212 return -ENOMEM; 213 } 214 215 sg = st->sgl; 216 sg->offset = 0; 217 sg->length = obj->base.size; 218 219 sg_dma_address(sg) = obj->phys_handle->busaddr; 220 sg_dma_len(sg) = obj->base.size; 221 222 obj->pages = st; 223 return 0; 224 } 225 226 static void 227 i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj) 228 { 229 int ret; 230 231 BUG_ON(obj->madv == __I915_MADV_PURGED); 232 233 ret = i915_gem_object_set_to_cpu_domain(obj, true); 234 if (WARN_ON(ret)) { 235 /* In the event of a disaster, abandon all caches and 236 * hope for the best. 237 */ 238 obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU; 239 } 240 241 if (obj->madv == I915_MADV_DONTNEED) 242 obj->dirty = 0; 243 244 if (obj->dirty) { 245 struct address_space *mapping = file_inode(obj->base.filp)->i_mapping; 246 char *vaddr = obj->phys_handle->vaddr; 247 int i; 248 249 for (i = 0; i < obj->base.size / PAGE_SIZE; i++) { 250 struct page *page; 251 char *dst; 252 253 page = shmem_read_mapping_page(mapping, i); 254 if (IS_ERR(page)) 255 continue; 256 257 dst = kmap_atomic(page); 258 drm_clflush_virt_range(vaddr, PAGE_SIZE); 259 memcpy(dst, vaddr, PAGE_SIZE); 260 kunmap_atomic(dst); 261 262 set_page_dirty(page); 263 if (obj->madv == I915_MADV_WILLNEED) 264 mark_page_accessed(page); 265 put_page(page); 266 vaddr += PAGE_SIZE; 267 } 268 obj->dirty = 0; 269 } 270 271 sg_free_table(obj->pages); 272 kfree(obj->pages); 273 } 274 275 static void 276 i915_gem_object_release_phys(struct drm_i915_gem_object *obj) 277 { 278 drm_pci_free(obj->base.dev, obj->phys_handle); 279 } 280 #endif 281 282 static const struct drm_i915_gem_object_ops i915_gem_phys_ops = { 283 #if 0 284 .get_pages = i915_gem_object_get_pages_phys, 285 .put_pages = i915_gem_object_put_pages_phys, 286 .release = i915_gem_object_release_phys, 287 #endif 288 }; 289 290 int 291 i915_gem_object_unbind(struct drm_i915_gem_object *obj) 292 { 293 struct i915_vma *vma; 294 LINUX_LIST_HEAD(still_in_list); 295 int ret; 296 297 /* The vma will only be freed if it is marked as closed, and if we wait 298 * upon rendering to the vma, we may unbind anything in the list. 299 */ 300 while ((vma = list_first_entry_or_null(&obj->vma_list, 301 struct i915_vma, 302 obj_link))) { 303 list_move_tail(&vma->obj_link, &still_in_list); 304 ret = i915_vma_unbind(vma); 305 if (ret) 306 break; 307 } 308 list_splice(&still_in_list, &obj->vma_list); 309 310 return ret; 311 } 312 313 /** 314 * Ensures that all rendering to the object has completed and the object is 315 * safe to unbind from the GTT or access from the CPU. 316 * @obj: i915 gem object 317 * @readonly: waiting for just read access or read-write access 318 */ 319 int 320 i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj, 321 bool readonly) 322 { 323 struct reservation_object *resv; 324 struct i915_gem_active *active; 325 unsigned long active_mask; 326 int idx; 327 328 lockdep_assert_held(&obj->base.dev->struct_mutex); 329 330 if (!readonly) { 331 active = obj->last_read; 332 active_mask = i915_gem_object_get_active(obj); 333 } else { 334 active_mask = 1; 335 active = &obj->last_write; 336 } 337 338 for_each_active(active_mask, idx) { 339 int ret; 340 341 ret = i915_gem_active_wait(&active[idx], 342 &obj->base.dev->struct_mutex); 343 if (ret) 344 return ret; 345 } 346 347 resv = i915_gem_object_get_dmabuf_resv(obj); 348 if (resv) { 349 long err; 350 351 err = reservation_object_wait_timeout_rcu(resv, !readonly, true, 352 MAX_SCHEDULE_TIMEOUT); 353 if (err < 0) 354 return err; 355 } 356 357 return 0; 358 } 359 360 /* A nonblocking variant of the above wait. Must be called prior to 361 * acquiring the mutex for the object, as the object state may change 362 * during this call. A reference must be held by the caller for the object. 363 */ 364 static __must_check int 365 __unsafe_wait_rendering(struct drm_i915_gem_object *obj, 366 struct intel_rps_client *rps, 367 bool readonly) 368 { 369 struct i915_gem_active *active; 370 unsigned long active_mask; 371 int idx; 372 373 active_mask = __I915_BO_ACTIVE(obj); 374 if (!active_mask) 375 return 0; 376 377 if (!readonly) { 378 active = obj->last_read; 379 } else { 380 active_mask = 1; 381 active = &obj->last_write; 382 } 383 384 for_each_active(active_mask, idx) { 385 int ret; 386 387 ret = i915_gem_active_wait_unlocked(&active[idx], 388 true, NULL, rps); 389 if (ret) 390 return ret; 391 } 392 393 return 0; 394 } 395 396 static struct intel_rps_client *to_rps_client(struct drm_file *file) 397 { 398 struct drm_i915_file_private *fpriv = file->driver_priv; 399 400 return &fpriv->rps; 401 } 402 403 int 404 i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, 405 int align) 406 { 407 drm_dma_handle_t *phys; 408 int ret; 409 410 if (obj->phys_handle) { 411 if ((unsigned long)obj->phys_handle->vaddr & (align -1)) 412 return -EBUSY; 413 414 return 0; 415 } 416 417 if (obj->madv != I915_MADV_WILLNEED) 418 return -EFAULT; 419 420 if (obj->base.filp == NULL) 421 return -EINVAL; 422 423 ret = i915_gem_object_unbind(obj); 424 if (ret) 425 return ret; 426 427 ret = i915_gem_object_put_pages(obj); 428 if (ret) 429 return ret; 430 431 /* create a new object */ 432 phys = drm_pci_alloc(obj->base.dev, obj->base.size, align); 433 if (!phys) 434 return -ENOMEM; 435 436 obj->phys_handle = phys; 437 obj->ops = &i915_gem_phys_ops; 438 439 return i915_gem_object_get_pages(obj); 440 } 441 442 static int 443 i915_gem_phys_pwrite(struct drm_i915_gem_object *obj, 444 struct drm_i915_gem_pwrite *args, 445 struct drm_file *file_priv) 446 { 447 struct drm_device *dev = obj->base.dev; 448 void *vaddr = obj->phys_handle->vaddr + args->offset; 449 char __user *user_data = u64_to_user_ptr(args->data_ptr); 450 int ret = 0; 451 452 /* We manually control the domain here and pretend that it 453 * remains coherent i.e. in the GTT domain, like shmem_pwrite. 454 */ 455 ret = i915_gem_object_wait_rendering(obj, false); 456 if (ret) 457 return ret; 458 459 intel_fb_obj_invalidate(obj, ORIGIN_CPU); 460 if (__copy_from_user_inatomic_nocache(vaddr, user_data, args->size)) { 461 unsigned long unwritten; 462 463 /* The physical object once assigned is fixed for the lifetime 464 * of the obj, so we can safely drop the lock and continue 465 * to access vaddr. 466 */ 467 mutex_unlock(&dev->struct_mutex); 468 unwritten = copy_from_user(vaddr, user_data, args->size); 469 mutex_lock(&dev->struct_mutex); 470 if (unwritten) { 471 ret = -EFAULT; 472 goto out; 473 } 474 } 475 476 drm_clflush_virt_range(vaddr, args->size); 477 i915_gem_chipset_flush(to_i915(dev)); 478 479 out: 480 intel_fb_obj_flush(obj, false, ORIGIN_CPU); 481 return ret; 482 } 483 484 void *i915_gem_object_alloc(struct drm_device *dev) 485 { 486 return kzalloc(sizeof(struct drm_i915_gem_object), GFP_KERNEL); 487 } 488 489 void i915_gem_object_free(struct drm_i915_gem_object *obj) 490 { 491 kfree(obj); 492 } 493 494 static int 495 i915_gem_create(struct drm_file *file, 496 struct drm_device *dev, 497 uint64_t size, 498 uint32_t *handle_p) 499 { 500 struct drm_i915_gem_object *obj; 501 int ret; 502 u32 handle; 503 504 size = roundup(size, PAGE_SIZE); 505 if (size == 0) 506 return -EINVAL; 507 508 /* Allocate the new object */ 509 obj = i915_gem_object_create(dev, size); 510 if (IS_ERR(obj)) 511 return PTR_ERR(obj); 512 513 ret = drm_gem_handle_create(file, &obj->base, &handle); 514 /* drop reference from allocate - handle holds it now */ 515 i915_gem_object_put_unlocked(obj); 516 if (ret) 517 return ret; 518 519 *handle_p = handle; 520 return 0; 521 } 522 523 int 524 i915_gem_dumb_create(struct drm_file *file, 525 struct drm_device *dev, 526 struct drm_mode_create_dumb *args) 527 { 528 /* have to work out size/pitch and return them */ 529 args->pitch = ALIGN(args->width * DIV_ROUND_UP(args->bpp, 8), 64); 530 args->size = args->pitch * args->height; 531 return i915_gem_create(file, dev, 532 args->size, &args->handle); 533 } 534 535 /** 536 * Creates a new mm object and returns a handle to it. 537 * @dev: drm device pointer 538 * @data: ioctl data blob 539 * @file: drm file pointer 540 */ 541 int 542 i915_gem_create_ioctl(struct drm_device *dev, void *data, 543 struct drm_file *file) 544 { 545 struct drm_i915_gem_create *args = data; 546 547 return i915_gem_create(file, dev, 548 args->size, &args->handle); 549 } 550 551 static inline int 552 __copy_to_user_swizzled(char __user *cpu_vaddr, 553 const char *gpu_vaddr, int gpu_offset, 554 int length) 555 { 556 int ret, cpu_offset = 0; 557 558 while (length > 0) { 559 int cacheline_end = ALIGN(gpu_offset + 1, 64); 560 int this_length = min(cacheline_end - gpu_offset, length); 561 int swizzled_gpu_offset = gpu_offset ^ 64; 562 563 ret = __copy_to_user(cpu_vaddr + cpu_offset, 564 gpu_vaddr + swizzled_gpu_offset, 565 this_length); 566 if (ret) 567 return ret + length; 568 569 cpu_offset += this_length; 570 gpu_offset += this_length; 571 length -= this_length; 572 } 573 574 return 0; 575 } 576 577 static inline int 578 __copy_from_user_swizzled(char *gpu_vaddr, int gpu_offset, 579 const char __user *cpu_vaddr, 580 int length) 581 { 582 int ret, cpu_offset = 0; 583 584 while (length > 0) { 585 int cacheline_end = ALIGN(gpu_offset + 1, 64); 586 int this_length = min(cacheline_end - gpu_offset, length); 587 int swizzled_gpu_offset = gpu_offset ^ 64; 588 589 ret = __copy_from_user(gpu_vaddr + swizzled_gpu_offset, 590 cpu_vaddr + cpu_offset, 591 this_length); 592 if (ret) 593 return ret + length; 594 595 cpu_offset += this_length; 596 gpu_offset += this_length; 597 length -= this_length; 598 } 599 600 return 0; 601 } 602 603 /* 604 * Pins the specified object's pages and synchronizes the object with 605 * GPU accesses. Sets needs_clflush to non-zero if the caller should 606 * flush the object from the CPU cache. 607 */ 608 int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj, 609 int *needs_clflush) 610 { 611 int ret; 612 613 *needs_clflush = 0; 614 615 if (WARN_ON(!i915_gem_object_has_struct_page(obj))) 616 return -EINVAL; 617 618 ret = i915_gem_object_wait_rendering(obj, true); 619 if (ret) 620 return ret; 621 622 if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)) { 623 /* If we're not in the cpu read domain, set ourself into the gtt 624 * read domain and manually flush cachelines (if required). This 625 * optimizes for the case when the gpu will dirty the data 626 * anyway again before the next pread happens. */ 627 *needs_clflush = !cpu_cache_is_coherent(obj->base.dev, 628 obj->cache_level); 629 } 630 631 ret = i915_gem_object_get_pages(obj); 632 if (ret) 633 return ret; 634 635 i915_gem_object_pin_pages(obj); 636 637 return ret; 638 } 639 640 /* Per-page copy function for the shmem pread fastpath. 641 * Flushes invalid cachelines before reading the target if 642 * needs_clflush is set. */ 643 static int 644 shmem_pread_fast(struct page *page, int shmem_page_offset, int page_length, 645 char __user *user_data, 646 bool page_do_bit17_swizzling, bool needs_clflush) 647 { 648 char *vaddr; 649 int ret; 650 651 if (unlikely(page_do_bit17_swizzling)) 652 return -EINVAL; 653 654 vaddr = kmap_atomic(page); 655 if (needs_clflush) 656 drm_clflush_virt_range(vaddr + shmem_page_offset, 657 page_length); 658 ret = __copy_to_user_inatomic(user_data, 659 vaddr + shmem_page_offset, 660 page_length); 661 kunmap_atomic(vaddr); 662 663 return ret ? -EFAULT : 0; 664 } 665 666 static void 667 shmem_clflush_swizzled_range(char *addr, unsigned long length, 668 bool swizzled) 669 { 670 if (unlikely(swizzled)) { 671 unsigned long start = (unsigned long) addr; 672 unsigned long end = (unsigned long) addr + length; 673 674 /* For swizzling simply ensure that we always flush both 675 * channels. Lame, but simple and it works. Swizzled 676 * pwrite/pread is far from a hotpath - current userspace 677 * doesn't use it at all. */ 678 start = round_down(start, 128); 679 end = round_up(end, 128); 680 681 drm_clflush_virt_range((void *)start, end - start); 682 } else { 683 drm_clflush_virt_range(addr, length); 684 } 685 686 } 687 688 /* Only difference to the fast-path function is that this can handle bit17 689 * and uses non-atomic copy and kmap functions. */ 690 static int 691 shmem_pread_slow(struct page *page, int shmem_page_offset, int page_length, 692 char __user *user_data, 693 bool page_do_bit17_swizzling, bool needs_clflush) 694 { 695 char *vaddr; 696 int ret; 697 698 vaddr = kmap(page); 699 if (needs_clflush) 700 shmem_clflush_swizzled_range(vaddr + shmem_page_offset, 701 page_length, 702 page_do_bit17_swizzling); 703 704 if (page_do_bit17_swizzling) 705 ret = __copy_to_user_swizzled(user_data, 706 vaddr, shmem_page_offset, 707 page_length); 708 else 709 ret = __copy_to_user(user_data, 710 vaddr + shmem_page_offset, 711 page_length); 712 kunmap(page); 713 714 return ret ? - EFAULT : 0; 715 } 716 717 static inline unsigned long 718 slow_user_access(struct io_mapping *mapping, 719 uint64_t page_base, int page_offset, 720 char __user *user_data, 721 unsigned long length, bool pwrite) 722 { 723 void __iomem *ioaddr; 724 void *vaddr; 725 uint64_t unwritten; 726 727 ioaddr = io_mapping_map_wc(mapping, page_base, PAGE_SIZE); 728 /* We can use the cpu mem copy function because this is X86. */ 729 vaddr = (void __force *)ioaddr + page_offset; 730 if (pwrite) 731 unwritten = __copy_from_user(vaddr, user_data, length); 732 else 733 unwritten = __copy_to_user(user_data, vaddr, length); 734 735 io_mapping_unmap(ioaddr); 736 return unwritten; 737 } 738 739 static int 740 i915_gem_gtt_pread(struct drm_device *dev, 741 struct drm_i915_gem_object *obj, uint64_t size, 742 uint64_t data_offset, uint64_t data_ptr) 743 { 744 struct drm_i915_private *dev_priv = to_i915(dev); 745 struct i915_ggtt *ggtt = &dev_priv->ggtt; 746 struct drm_mm_node node; 747 char __user *user_data; 748 uint64_t remain; 749 uint64_t offset; 750 int ret; 751 752 ret = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, PIN_MAPPABLE); 753 if (ret) { 754 ret = insert_mappable_node(dev_priv, &node, PAGE_SIZE); 755 if (ret) 756 goto out; 757 758 ret = i915_gem_object_get_pages(obj); 759 if (ret) { 760 remove_mappable_node(&node); 761 goto out; 762 } 763 764 i915_gem_object_pin_pages(obj); 765 } else { 766 node.start = i915_gem_obj_ggtt_offset(obj); 767 node.allocated = false; 768 ret = i915_gem_object_put_fence(obj); 769 if (ret) 770 goto out_unpin; 771 } 772 773 ret = i915_gem_object_set_to_gtt_domain(obj, false); 774 if (ret) 775 goto out_unpin; 776 777 user_data = u64_to_user_ptr(data_ptr); 778 remain = size; 779 offset = data_offset; 780 781 mutex_unlock(&dev->struct_mutex); 782 if (likely(!i915.prefault_disable)) { 783 ret = fault_in_multipages_writeable(user_data, remain); 784 if (ret) { 785 mutex_lock(&dev->struct_mutex); 786 goto out_unpin; 787 } 788 } 789 790 while (remain > 0) { 791 /* Operation in this page 792 * 793 * page_base = page offset within aperture 794 * page_offset = offset within page 795 * page_length = bytes to copy for this page 796 */ 797 u32 page_base = node.start; 798 unsigned page_offset = offset_in_page(offset); 799 unsigned page_length = PAGE_SIZE - page_offset; 800 page_length = remain < page_length ? remain : page_length; 801 if (node.allocated) { 802 wmb(); 803 ggtt->base.insert_page(&ggtt->base, 804 i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT), 805 node.start, 806 I915_CACHE_NONE, 0); 807 wmb(); 808 } else { 809 page_base += offset & PAGE_MASK; 810 } 811 /* This is a slow read/write as it tries to read from 812 * and write to user memory which may result into page 813 * faults, and so we cannot perform this under struct_mutex. 814 */ 815 if (slow_user_access(ggtt->mappable, page_base, 816 page_offset, user_data, 817 page_length, false)) { 818 ret = -EFAULT; 819 break; 820 } 821 822 remain -= page_length; 823 user_data += page_length; 824 offset += page_length; 825 } 826 827 mutex_lock(&dev->struct_mutex); 828 if (ret == 0 && (obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0) { 829 /* The user has modified the object whilst we tried 830 * reading from it, and we now have no idea what domain 831 * the pages should be in. As we have just been touching 832 * them directly, flush everything back to the GTT 833 * domain. 834 */ 835 ret = i915_gem_object_set_to_gtt_domain(obj, false); 836 } 837 838 out_unpin: 839 if (node.allocated) { 840 wmb(); 841 ggtt->base.clear_range(&ggtt->base, 842 node.start, node.size, 843 true); 844 i915_gem_object_unpin_pages(obj); 845 remove_mappable_node(&node); 846 } else { 847 i915_gem_object_ggtt_unpin(obj); 848 } 849 out: 850 return ret; 851 } 852 853 static int 854 i915_gem_shmem_pread(struct drm_device *dev, 855 struct drm_i915_gem_object *obj, 856 struct drm_i915_gem_pread *args, 857 struct drm_file *file) 858 { 859 char __user *user_data; 860 ssize_t remain; 861 loff_t offset; 862 int shmem_page_offset, page_length, ret = 0; 863 int obj_do_bit17_swizzling, page_do_bit17_swizzling; 864 int prefaulted = 0; 865 int needs_clflush = 0; 866 struct sg_page_iter sg_iter; 867 868 if (!i915_gem_object_has_struct_page(obj)) 869 return -ENODEV; 870 871 user_data = u64_to_user_ptr(args->data_ptr); 872 remain = args->size; 873 874 obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj); 875 876 ret = i915_gem_obj_prepare_shmem_read(obj, &needs_clflush); 877 if (ret) 878 return ret; 879 880 offset = args->offset; 881 882 for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 883 offset >> PAGE_SHIFT) { 884 struct page *page = sg_page_iter_page(&sg_iter); 885 886 if (remain <= 0) 887 break; 888 889 /* Operation in this page 890 * 891 * shmem_page_offset = offset within page in shmem file 892 * page_length = bytes to copy for this page 893 */ 894 shmem_page_offset = offset_in_page(offset); 895 page_length = remain; 896 if ((shmem_page_offset + page_length) > PAGE_SIZE) 897 page_length = PAGE_SIZE - shmem_page_offset; 898 899 page_do_bit17_swizzling = obj_do_bit17_swizzling && 900 (page_to_phys(page) & (1 << 17)) != 0; 901 902 ret = shmem_pread_fast(page, shmem_page_offset, page_length, 903 user_data, page_do_bit17_swizzling, 904 needs_clflush); 905 if (ret == 0) 906 goto next_page; 907 908 mutex_unlock(&dev->struct_mutex); 909 910 if (likely(!i915.prefault_disable) && !prefaulted) { 911 ret = fault_in_multipages_writeable(user_data, remain); 912 /* Userspace is tricking us, but we've already clobbered 913 * its pages with the prefault and promised to write the 914 * data up to the first fault. Hence ignore any errors 915 * and just continue. */ 916 (void)ret; 917 prefaulted = 1; 918 } 919 920 ret = shmem_pread_slow(page, shmem_page_offset, page_length, 921 user_data, page_do_bit17_swizzling, 922 needs_clflush); 923 924 mutex_lock(&dev->struct_mutex); 925 926 if (ret) 927 goto out; 928 929 next_page: 930 remain -= page_length; 931 user_data += page_length; 932 offset += page_length; 933 } 934 935 out: 936 i915_gem_object_unpin_pages(obj); 937 938 return ret; 939 } 940 941 /** 942 * Reads data from the object referenced by handle. 943 * @dev: drm device pointer 944 * @data: ioctl data blob 945 * @file: drm file pointer 946 * 947 * On error, the contents of *data are undefined. 948 */ 949 int 950 i915_gem_pread_ioctl(struct drm_device *dev, void *data, 951 struct drm_file *file) 952 { 953 struct drm_i915_gem_pread *args = data; 954 struct drm_i915_gem_object *obj; 955 int ret = 0; 956 957 if (args->size == 0) 958 return 0; 959 960 #if 0 961 if (!access_ok(VERIFY_WRITE, 962 u64_to_user_ptr(args->data_ptr), 963 args->size)) 964 return -EFAULT; 965 #endif 966 967 obj = i915_gem_object_lookup(file, args->handle); 968 if (!obj) 969 return -ENOENT; 970 971 /* Bounds check source. */ 972 if (args->offset > obj->base.size || 973 args->size > obj->base.size - args->offset) { 974 ret = -EINVAL; 975 goto err; 976 } 977 978 trace_i915_gem_object_pread(obj, args->offset, args->size); 979 980 ret = __unsafe_wait_rendering(obj, to_rps_client(file), true); 981 if (ret) 982 goto err; 983 984 ret = i915_mutex_lock_interruptible(dev); 985 if (ret) 986 goto err; 987 988 ret = i915_gem_shmem_pread(dev, obj, args, file); 989 990 /* pread for non shmem backed objects */ 991 if (ret == -EFAULT || ret == -ENODEV) { 992 intel_runtime_pm_get(to_i915(dev)); 993 ret = i915_gem_gtt_pread(dev, obj, args->size, 994 args->offset, args->data_ptr); 995 intel_runtime_pm_put(to_i915(dev)); 996 } 997 998 i915_gem_object_put(obj); 999 mutex_unlock(&dev->struct_mutex); 1000 1001 return ret; 1002 1003 err: 1004 i915_gem_object_put_unlocked(obj); 1005 return ret; 1006 } 1007 1008 /* This is the fast write path which cannot handle 1009 * page faults in the source data 1010 */ 1011 1012 static inline int 1013 fast_user_write(struct io_mapping *mapping, 1014 loff_t page_base, int page_offset, 1015 char __user *user_data, 1016 int length) 1017 { 1018 void __iomem *vaddr_atomic; 1019 void *vaddr; 1020 unsigned long unwritten; 1021 1022 vaddr_atomic = io_mapping_map_atomic_wc(mapping, page_base); 1023 /* We can use the cpu mem copy function because this is X86. */ 1024 vaddr = (void __force*)vaddr_atomic + page_offset; 1025 unwritten = __copy_from_user_inatomic_nocache(vaddr, 1026 user_data, length); 1027 io_mapping_unmap_atomic(vaddr_atomic); 1028 return unwritten; 1029 } 1030 1031 /** 1032 * This is the fast pwrite path, where we copy the data directly from the 1033 * user into the GTT, uncached. 1034 * @i915: i915 device private data 1035 * @obj: i915 gem object 1036 * @args: pwrite arguments structure 1037 * @file: drm file pointer 1038 */ 1039 static int 1040 i915_gem_gtt_pwrite_fast(struct drm_i915_private *i915, 1041 struct drm_i915_gem_object *obj, 1042 struct drm_i915_gem_pwrite *args, 1043 struct drm_file *file) 1044 { 1045 struct i915_ggtt *ggtt = &i915->ggtt; 1046 struct drm_device *dev = obj->base.dev; 1047 struct drm_mm_node node; 1048 uint64_t remain, offset; 1049 char __user *user_data; 1050 int ret; 1051 bool hit_slow_path = false; 1052 1053 if (i915_gem_object_is_tiled(obj)) 1054 return -EFAULT; 1055 1056 ret = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 1057 PIN_MAPPABLE | PIN_NONBLOCK); 1058 if (ret) { 1059 ret = insert_mappable_node(i915, &node, PAGE_SIZE); 1060 if (ret) 1061 goto out; 1062 1063 ret = i915_gem_object_get_pages(obj); 1064 if (ret) { 1065 remove_mappable_node(&node); 1066 goto out; 1067 } 1068 1069 i915_gem_object_pin_pages(obj); 1070 } else { 1071 node.start = i915_gem_obj_ggtt_offset(obj); 1072 node.allocated = false; 1073 ret = i915_gem_object_put_fence(obj); 1074 if (ret) 1075 goto out_unpin; 1076 } 1077 1078 ret = i915_gem_object_set_to_gtt_domain(obj, true); 1079 if (ret) 1080 goto out_unpin; 1081 1082 intel_fb_obj_invalidate(obj, ORIGIN_GTT); 1083 obj->dirty = true; 1084 1085 user_data = u64_to_user_ptr(args->data_ptr); 1086 offset = args->offset; 1087 remain = args->size; 1088 while (remain) { 1089 /* Operation in this page 1090 * 1091 * page_base = page offset within aperture 1092 * page_offset = offset within page 1093 * page_length = bytes to copy for this page 1094 */ 1095 u32 page_base = node.start; 1096 unsigned page_offset = offset_in_page(offset); 1097 unsigned page_length = PAGE_SIZE - page_offset; 1098 page_length = remain < page_length ? remain : page_length; 1099 if (node.allocated) { 1100 wmb(); /* flush the write before we modify the GGTT */ 1101 ggtt->base.insert_page(&ggtt->base, 1102 i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT), 1103 node.start, I915_CACHE_NONE, 0); 1104 wmb(); /* flush modifications to the GGTT (insert_page) */ 1105 } else { 1106 page_base += offset & LINUX_PAGE_MASK; 1107 } 1108 /* If we get a fault while copying data, then (presumably) our 1109 * source page isn't available. Return the error and we'll 1110 * retry in the slow path. 1111 * If the object is non-shmem backed, we retry again with the 1112 * path that handles page fault. 1113 */ 1114 if (fast_user_write(ggtt->mappable, page_base, 1115 page_offset, user_data, page_length)) { 1116 hit_slow_path = true; 1117 mutex_unlock(&dev->struct_mutex); 1118 if (slow_user_access(ggtt->mappable, 1119 page_base, 1120 page_offset, user_data, 1121 page_length, true)) { 1122 ret = -EFAULT; 1123 mutex_lock(&dev->struct_mutex); 1124 goto out_flush; 1125 } 1126 1127 mutex_lock(&dev->struct_mutex); 1128 } 1129 1130 remain -= page_length; 1131 user_data += page_length; 1132 offset += page_length; 1133 } 1134 1135 out_flush: 1136 if (hit_slow_path) { 1137 if (ret == 0 && 1138 (obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0) { 1139 /* The user has modified the object whilst we tried 1140 * reading from it, and we now have no idea what domain 1141 * the pages should be in. As we have just been touching 1142 * them directly, flush everything back to the GTT 1143 * domain. 1144 */ 1145 ret = i915_gem_object_set_to_gtt_domain(obj, false); 1146 } 1147 } 1148 1149 intel_fb_obj_flush(obj, false, ORIGIN_GTT); 1150 out_unpin: 1151 if (node.allocated) { 1152 wmb(); 1153 ggtt->base.clear_range(&ggtt->base, 1154 node.start, node.size, 1155 true); 1156 i915_gem_object_unpin_pages(obj); 1157 remove_mappable_node(&node); 1158 } else { 1159 i915_gem_object_ggtt_unpin(obj); 1160 } 1161 out: 1162 return ret; 1163 } 1164 1165 /* Per-page copy function for the shmem pwrite fastpath. 1166 * Flushes invalid cachelines before writing to the target if 1167 * needs_clflush_before is set and flushes out any written cachelines after 1168 * writing if needs_clflush is set. */ 1169 static int 1170 shmem_pwrite_fast(struct page *page, int shmem_page_offset, int page_length, 1171 char __user *user_data, 1172 bool page_do_bit17_swizzling, 1173 bool needs_clflush_before, 1174 bool needs_clflush_after) 1175 { 1176 char *vaddr; 1177 int ret; 1178 1179 if (unlikely(page_do_bit17_swizzling)) 1180 return -EINVAL; 1181 1182 vaddr = kmap_atomic(page); 1183 if (needs_clflush_before) 1184 drm_clflush_virt_range(vaddr + shmem_page_offset, 1185 page_length); 1186 ret = __copy_from_user_inatomic(vaddr + shmem_page_offset, 1187 user_data, page_length); 1188 if (needs_clflush_after) 1189 drm_clflush_virt_range(vaddr + shmem_page_offset, 1190 page_length); 1191 kunmap_atomic(vaddr); 1192 1193 return ret ? -EFAULT : 0; 1194 } 1195 1196 /* Only difference to the fast-path function is that this can handle bit17 1197 * and uses non-atomic copy and kmap functions. */ 1198 static int 1199 shmem_pwrite_slow(struct page *page, int shmem_page_offset, int page_length, 1200 char __user *user_data, 1201 bool page_do_bit17_swizzling, 1202 bool needs_clflush_before, 1203 bool needs_clflush_after) 1204 { 1205 char *vaddr; 1206 int ret; 1207 1208 vaddr = kmap(page); 1209 if (unlikely(needs_clflush_before || page_do_bit17_swizzling)) 1210 shmem_clflush_swizzled_range(vaddr + shmem_page_offset, 1211 page_length, 1212 page_do_bit17_swizzling); 1213 if (page_do_bit17_swizzling) 1214 ret = __copy_from_user_swizzled(vaddr, shmem_page_offset, 1215 user_data, 1216 page_length); 1217 else 1218 ret = __copy_from_user(vaddr + shmem_page_offset, 1219 user_data, 1220 page_length); 1221 if (needs_clflush_after) 1222 shmem_clflush_swizzled_range(vaddr + shmem_page_offset, 1223 page_length, 1224 page_do_bit17_swizzling); 1225 kunmap(page); 1226 1227 return ret ? -EFAULT : 0; 1228 } 1229 1230 static int 1231 i915_gem_shmem_pwrite(struct drm_device *dev, 1232 struct drm_i915_gem_object *obj, 1233 struct drm_i915_gem_pwrite *args, 1234 struct drm_file *file) 1235 { 1236 ssize_t remain; 1237 loff_t offset; 1238 char __user *user_data; 1239 int shmem_page_offset, page_length, ret = 0; 1240 int obj_do_bit17_swizzling, page_do_bit17_swizzling; 1241 int hit_slowpath = 0; 1242 int needs_clflush_after = 0; 1243 int needs_clflush_before = 0; 1244 struct sg_page_iter sg_iter; 1245 1246 user_data = u64_to_user_ptr(args->data_ptr); 1247 remain = args->size; 1248 1249 obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj); 1250 1251 ret = i915_gem_object_wait_rendering(obj, false); 1252 if (ret) 1253 return ret; 1254 1255 if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) { 1256 /* If we're not in the cpu write domain, set ourself into the gtt 1257 * write domain and manually flush cachelines (if required). This 1258 * optimizes for the case when the gpu will use the data 1259 * right away and we therefore have to clflush anyway. */ 1260 needs_clflush_after = cpu_write_needs_clflush(obj); 1261 } 1262 /* Same trick applies to invalidate partially written cachelines read 1263 * before writing. */ 1264 if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) 1265 needs_clflush_before = 1266 !cpu_cache_is_coherent(dev, obj->cache_level); 1267 1268 ret = i915_gem_object_get_pages(obj); 1269 if (ret) 1270 return ret; 1271 1272 intel_fb_obj_invalidate(obj, ORIGIN_CPU); 1273 1274 i915_gem_object_pin_pages(obj); 1275 1276 offset = args->offset; 1277 obj->dirty = 1; 1278 1279 VM_OBJECT_LOCK(obj->base.filp); 1280 vm_object_pip_add(obj->base.filp, 1); 1281 1282 for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 1283 offset >> PAGE_SHIFT) { 1284 struct page *page = sg_page_iter_page(&sg_iter); 1285 int partial_cacheline_write; 1286 1287 if (remain <= 0) 1288 break; 1289 1290 /* Operation in this page 1291 * 1292 * shmem_page_offset = offset within page in shmem file 1293 * page_length = bytes to copy for this page 1294 */ 1295 shmem_page_offset = offset_in_page(offset); 1296 1297 page_length = remain; 1298 if ((shmem_page_offset + page_length) > PAGE_SIZE) 1299 page_length = PAGE_SIZE - shmem_page_offset; 1300 1301 /* If we don't overwrite a cacheline completely we need to be 1302 * careful to have up-to-date data by first clflushing. Don't 1303 * overcomplicate things and flush the entire patch. */ 1304 partial_cacheline_write = needs_clflush_before && 1305 ((shmem_page_offset | page_length) 1306 & (boot_cpu_data.x86_clflush_size - 1)); 1307 1308 page_do_bit17_swizzling = obj_do_bit17_swizzling && 1309 (page_to_phys(page) & (1 << 17)) != 0; 1310 1311 ret = shmem_pwrite_fast(page, shmem_page_offset, page_length, 1312 user_data, page_do_bit17_swizzling, 1313 partial_cacheline_write, 1314 needs_clflush_after); 1315 if (ret == 0) 1316 goto next_page; 1317 1318 hit_slowpath = 1; 1319 mutex_unlock(&dev->struct_mutex); 1320 ret = shmem_pwrite_slow(page, shmem_page_offset, page_length, 1321 user_data, page_do_bit17_swizzling, 1322 partial_cacheline_write, 1323 needs_clflush_after); 1324 1325 mutex_lock(&dev->struct_mutex); 1326 1327 if (ret) 1328 goto out; 1329 1330 next_page: 1331 remain -= page_length; 1332 user_data += page_length; 1333 offset += page_length; 1334 } 1335 1336 out: 1337 vm_object_pip_wakeup(obj->base.filp); 1338 VM_OBJECT_UNLOCK(obj->base.filp); 1339 i915_gem_object_unpin_pages(obj); 1340 1341 if (hit_slowpath) { 1342 /* 1343 * Fixup: Flush cpu caches in case we didn't flush the dirty 1344 * cachelines in-line while writing and the object moved 1345 * out of the cpu write domain while we've dropped the lock. 1346 */ 1347 if (!needs_clflush_after && 1348 obj->base.write_domain != I915_GEM_DOMAIN_CPU) { 1349 if (i915_gem_clflush_object(obj, obj->pin_display)) 1350 needs_clflush_after = true; 1351 } 1352 } 1353 1354 if (needs_clflush_after) 1355 i915_gem_chipset_flush(to_i915(dev)); 1356 else 1357 obj->cache_dirty = true; 1358 1359 intel_fb_obj_flush(obj, false, ORIGIN_CPU); 1360 return ret; 1361 } 1362 1363 /** 1364 * Writes data to the object referenced by handle. 1365 * @dev: drm device 1366 * @data: ioctl data blob 1367 * @file: drm file 1368 * 1369 * On error, the contents of the buffer that were to be modified are undefined. 1370 */ 1371 int 1372 i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, 1373 struct drm_file *file) 1374 { 1375 struct drm_i915_private *dev_priv = to_i915(dev); 1376 struct drm_i915_gem_pwrite *args = data; 1377 struct drm_i915_gem_object *obj; 1378 int ret; 1379 1380 if (args->size == 0) 1381 return 0; 1382 1383 #if 0 1384 if (!access_ok(VERIFY_READ, 1385 u64_to_user_ptr(args->data_ptr), 1386 args->size)) 1387 return -EFAULT; 1388 #endif 1389 1390 if (likely(!i915.prefault_disable)) { 1391 ret = fault_in_multipages_readable(u64_to_user_ptr(args->data_ptr), 1392 args->size); 1393 if (ret) 1394 return -EFAULT; 1395 } 1396 1397 obj = i915_gem_object_lookup(file, args->handle); 1398 if (!obj) 1399 return -ENOENT; 1400 1401 /* Bounds check destination. */ 1402 if (args->offset > obj->base.size || 1403 args->size > obj->base.size - args->offset) { 1404 ret = -EINVAL; 1405 goto err; 1406 } 1407 1408 trace_i915_gem_object_pwrite(obj, args->offset, args->size); 1409 1410 ret = __unsafe_wait_rendering(obj, to_rps_client(file), false); 1411 if (ret) 1412 goto err; 1413 1414 intel_runtime_pm_get(dev_priv); 1415 1416 ret = i915_mutex_lock_interruptible(dev); 1417 if (ret) 1418 goto err_rpm; 1419 1420 ret = -EFAULT; 1421 /* We can only do the GTT pwrite on untiled buffers, as otherwise 1422 * it would end up going through the fenced access, and we'll get 1423 * different detiling behavior between reading and writing. 1424 * pread/pwrite currently are reading and writing from the CPU 1425 * perspective, requiring manual detiling by the client. 1426 */ 1427 if (!i915_gem_object_has_struct_page(obj) || 1428 cpu_write_needs_clflush(obj)) { 1429 ret = i915_gem_gtt_pwrite_fast(dev_priv, obj, args, file); 1430 /* Note that the gtt paths might fail with non-page-backed user 1431 * pointers (e.g. gtt mappings when moving data between 1432 * textures). Fallback to the shmem path in that case. */ 1433 } 1434 1435 if (ret == -EFAULT || ret == -ENOSPC) { 1436 if (obj->phys_handle) 1437 ret = i915_gem_phys_pwrite(obj, args, file); 1438 else if (i915_gem_object_has_struct_page(obj)) 1439 ret = i915_gem_shmem_pwrite(dev, obj, args, file); 1440 else 1441 ret = -ENODEV; 1442 } 1443 1444 i915_gem_object_put(obj); 1445 mutex_unlock(&dev->struct_mutex); 1446 intel_runtime_pm_put(dev_priv); 1447 1448 return ret; 1449 1450 err_rpm: 1451 intel_runtime_pm_put(dev_priv); 1452 err: 1453 i915_gem_object_put_unlocked(obj); 1454 return ret; 1455 } 1456 1457 static enum fb_op_origin 1458 write_origin(struct drm_i915_gem_object *obj, unsigned domain) 1459 { 1460 return domain == I915_GEM_DOMAIN_GTT && !obj->has_wc_mmap ? 1461 ORIGIN_GTT : ORIGIN_CPU; 1462 } 1463 1464 /** 1465 * Called when user space prepares to use an object with the CPU, either 1466 * through the mmap ioctl's mapping or a GTT mapping. 1467 * @dev: drm device 1468 * @data: ioctl data blob 1469 * @file: drm file 1470 */ 1471 int 1472 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, 1473 struct drm_file *file) 1474 { 1475 struct drm_i915_gem_set_domain *args = data; 1476 struct drm_i915_gem_object *obj; 1477 uint32_t read_domains = args->read_domains; 1478 uint32_t write_domain = args->write_domain; 1479 int ret; 1480 1481 /* Only handle setting domains to types used by the CPU. */ 1482 if ((write_domain | read_domains) & I915_GEM_GPU_DOMAINS) 1483 return -EINVAL; 1484 1485 /* Having something in the write domain implies it's in the read 1486 * domain, and only that read domain. Enforce that in the request. 1487 */ 1488 if (write_domain != 0 && read_domains != write_domain) 1489 return -EINVAL; 1490 1491 obj = i915_gem_object_lookup(file, args->handle); 1492 if (!obj) 1493 return -ENOENT; 1494 1495 /* Try to flush the object off the GPU without holding the lock. 1496 * We will repeat the flush holding the lock in the normal manner 1497 * to catch cases where we are gazumped. 1498 */ 1499 ret = __unsafe_wait_rendering(obj, to_rps_client(file), !write_domain); 1500 if (ret) 1501 goto err; 1502 1503 ret = i915_mutex_lock_interruptible(dev); 1504 if (ret) 1505 goto err; 1506 1507 if (read_domains & I915_GEM_DOMAIN_GTT) 1508 ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0); 1509 else 1510 ret = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0); 1511 1512 if (write_domain != 0) 1513 intel_fb_obj_invalidate(obj, write_origin(obj, write_domain)); 1514 1515 i915_gem_object_put(obj); 1516 mutex_unlock(&dev->struct_mutex); 1517 return ret; 1518 1519 err: 1520 i915_gem_object_put_unlocked(obj); 1521 return ret; 1522 } 1523 1524 /** 1525 * Called when user space has done writes to this buffer 1526 * @dev: drm device 1527 * @data: ioctl data blob 1528 * @file: drm file 1529 */ 1530 int 1531 i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data, 1532 struct drm_file *file) 1533 { 1534 struct drm_i915_gem_sw_finish *args = data; 1535 struct drm_i915_gem_object *obj; 1536 int err = 0; 1537 1538 obj = i915_gem_object_lookup(file, args->handle); 1539 if (!obj) 1540 return -ENOENT; 1541 1542 /* Pinned buffers may be scanout, so flush the cache */ 1543 if (READ_ONCE(obj->pin_display)) { 1544 err = i915_mutex_lock_interruptible(dev); 1545 if (!err) { 1546 i915_gem_object_flush_cpu_write_domain(obj); 1547 mutex_unlock(&dev->struct_mutex); 1548 } 1549 } 1550 1551 i915_gem_object_put_unlocked(obj); 1552 return err; 1553 } 1554 1555 /** 1556 * i915_gem_mmap_ioctl - Maps the contents of an object, returning the address 1557 * it is mapped to. 1558 * @dev: drm device 1559 * @data: ioctl data blob 1560 * @file: drm file 1561 * 1562 * While the mapping holds a reference on the contents of the object, it doesn't 1563 * imply a ref on the object itself. 1564 * 1565 * IMPORTANT: 1566 * 1567 * DRM driver writers who look a this function as an example for how to do GEM 1568 * mmap support, please don't implement mmap support like here. The modern way 1569 * to implement DRM mmap support is with an mmap offset ioctl (like 1570 * i915_gem_mmap_gtt) and then using the mmap syscall on the DRM fd directly. 1571 * That way debug tooling like valgrind will understand what's going on, hiding 1572 * the mmap call in a driver private ioctl will break that. The i915 driver only 1573 * does cpu mmaps this way because we didn't know better. 1574 */ 1575 int 1576 i915_gem_mmap_ioctl(struct drm_device *dev, void *data, 1577 struct drm_file *file) 1578 { 1579 struct drm_i915_gem_mmap *args = data; 1580 struct drm_i915_gem_object *obj; 1581 unsigned long addr; 1582 1583 struct proc *p = curproc; 1584 vm_map_t map = &p->p_vmspace->vm_map; 1585 vm_size_t size; 1586 int error = 0, rv; 1587 1588 if (args->flags & ~(I915_MMAP_WC)) 1589 return -EINVAL; 1590 1591 #if 0 1592 if (args->flags & I915_MMAP_WC && !boot_cpu_has(X86_FEATURE_PAT)) 1593 return -ENODEV; 1594 #endif 1595 1596 obj = i915_gem_object_lookup(file, args->handle); 1597 if (!obj) 1598 return -ENOENT; 1599 1600 /* prime objects have no backing filp to GEM mmap 1601 * pages from. 1602 */ 1603 if (!obj->base.filp) { 1604 i915_gem_object_put_unlocked(obj); 1605 return -EINVAL; 1606 } 1607 1608 if (args->size == 0) 1609 goto out; 1610 1611 size = round_page(args->size); 1612 if (map->size + size > p->p_rlimit[RLIMIT_VMEM].rlim_cur) { 1613 error = -ENOMEM; 1614 goto out; 1615 } 1616 1617 /* 1618 * Call hint to ensure that NULL is not returned as a valid address 1619 * and to reduce vm_map traversals. XXX causes instability, use a 1620 * fixed low address as the start point instead to avoid the NULL 1621 * return issue. 1622 */ 1623 addr = PAGE_SIZE; 1624 1625 /* 1626 * Use 256KB alignment. It is unclear why this matters for a 1627 * virtual address but it appears to fix a number of application/X 1628 * crashes and kms console switching is much faster. 1629 */ 1630 vm_object_hold(obj->base.filp); 1631 vm_object_reference_locked(obj->base.filp); 1632 vm_object_drop(obj->base.filp); 1633 1634 /* Something gets wrong here: fails to mmap 4096 */ 1635 rv = vm_map_find(map, obj->base.filp, NULL, 1636 args->offset, &addr, args->size, 1637 256 * 1024, /* align */ 1638 TRUE, /* fitit */ 1639 VM_MAPTYPE_NORMAL, VM_SUBSYS_DRM_GEM, 1640 VM_PROT_READ | VM_PROT_WRITE, /* prot */ 1641 VM_PROT_READ | VM_PROT_WRITE, /* max */ 1642 MAP_SHARED /* cow */); 1643 if (rv != KERN_SUCCESS) { 1644 vm_object_deallocate(obj->base.filp); 1645 error = -vm_mmap_to_errno(rv); 1646 } else { 1647 args->addr_ptr = (uint64_t)addr; 1648 } 1649 1650 if (args->flags & I915_MMAP_WC) { /* I915_PARAM_MMAP_VERSION */ 1651 #if 0 1652 struct mm_struct *mm = current->mm; 1653 struct vm_area_struct *vma; 1654 1655 if (down_write_killable(&mm->mmap_sem)) { 1656 i915_gem_object_put_unlocked(obj); 1657 return -EINTR; 1658 } 1659 vma = find_vma(mm, addr); 1660 if (vma) 1661 vma->vm_page_prot = 1662 pgprot_writecombine(vm_get_page_prot(vma->vm_flags)); 1663 else 1664 addr = -ENOMEM; 1665 up_write(&mm->mmap_sem); 1666 1667 /* This may race, but that's ok, it only gets set */ 1668 WRITE_ONCE(obj->has_wc_mmap, true); 1669 #endif 1670 } 1671 1672 out: 1673 i915_gem_object_put_unlocked(obj); 1674 if (error != 0) 1675 return error; 1676 1677 args->addr_ptr = (uint64_t) addr; 1678 1679 return 0; 1680 } 1681 1682 /** 1683 * i915_gem_fault - fault a page into the GTT 1684 * 1685 * vm_obj is locked on entry and expected to be locked on return. 1686 * 1687 * The vm_pager has placemarked the object with an anonymous memory page 1688 * which we must replace atomically to avoid races against concurrent faults 1689 * on the same page. XXX we currently are unable to do this atomically. 1690 * 1691 * If we are to return an error we should not touch the anonymous page, 1692 * the caller will deallocate it. 1693 * 1694 * XXX Most GEM calls appear to be interruptable, but we can't hard loop 1695 * in that case. Release all resources and wait 1 tick before retrying. 1696 * This is a huge problem which needs to be fixed by getting rid of most 1697 * of the interruptability. The linux code does not retry but does appear 1698 * to have some sort of mechanism (VM_FAULT_NOPAGE ?) for the higher level 1699 * to be able to retry. 1700 * 1701 * -- 1702 * @vma: VMA in question 1703 * @vmf: fault info 1704 * 1705 * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped 1706 * from userspace. The fault handler takes care of binding the object to 1707 * the GTT (if needed), allocating and programming a fence register (again, 1708 * only if needed based on whether the old reg is still valid or the object 1709 * is tiled) and inserting a new PTE into the faulting process. 1710 * 1711 * Note that the faulting process may involve evicting existing objects 1712 * from the GTT and/or fence registers to make room. So performance may 1713 * suffer if the GTT working set is large or there are few fence registers 1714 * left. 1715 * 1716 * vm_obj is locked on entry and expected to be locked on return. The VM 1717 * pager has placed an anonymous memory page at (obj,offset) which we have 1718 * to replace. 1719 */ 1720 int i915_gem_fault(vm_object_t vm_obj, vm_ooffset_t offset, int prot, vm_page_t *mres) 1721 { 1722 struct drm_i915_gem_object *obj = to_intel_bo(vm_obj->handle); 1723 struct drm_device *dev = obj->base.dev; 1724 struct drm_i915_private *dev_priv = to_i915(dev); 1725 struct i915_ggtt *ggtt = &dev_priv->ggtt; 1726 struct i915_ggtt_view view = i915_ggtt_view_normal; 1727 bool write = !!(prot & VM_PROT_WRITE); 1728 unsigned long page_offset; 1729 vm_page_t m; 1730 int ret; 1731 1732 /* We don't use vmf->pgoff since that has the fake offset */ 1733 page_offset = (unsigned long)offset; 1734 1735 /* 1736 * vm_fault() has supplied us with a busied page placeholding 1737 * the operation. This presents a lock order reversal issue 1738 * again i915_gem_release_mmap() for our device mutex. 1739 * 1740 * Deal with the problem by getting rid of the placeholder now, 1741 * and then dealing with the potential for a new placeholder when 1742 * we try to insert later. 1743 */ 1744 if (*mres != NULL) { 1745 m = *mres; 1746 *mres = NULL; 1747 if ((m->busy_count & PBUSY_LOCKED) == 0) 1748 kprintf("i915_gem_fault: Page was not busy\n"); 1749 else 1750 vm_page_remove(m); 1751 vm_page_free(m); 1752 } 1753 1754 m = NULL; 1755 1756 retry: 1757 trace_i915_gem_object_fault(obj, page_offset, true, write); 1758 1759 /* Try to flush the object off the GPU first without holding the lock. 1760 * Upon acquiring the lock, we will perform our sanity checks and then 1761 * repeat the flush holding the lock in the normal manner to catch cases 1762 * where we are gazumped. 1763 */ 1764 ret = __unsafe_wait_rendering(obj, NULL, !write); 1765 if (ret) 1766 goto err; 1767 1768 intel_runtime_pm_get(dev_priv); 1769 1770 ret = i915_mutex_lock_interruptible(dev); 1771 if (ret) 1772 goto err_rpm; 1773 1774 /* Access to snoopable pages through the GTT is incoherent. */ 1775 if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(dev)) { 1776 ret = -EFAULT; 1777 goto err_unlock; 1778 } 1779 1780 /* Use a partial view if the object is bigger than the aperture. */ 1781 if (obj->base.size >= ggtt->mappable_end && 1782 !i915_gem_object_is_tiled(obj)) { 1783 #if 0 1784 static const unsigned int chunk_size = 256; // 1 MiB 1785 1786 memset(&view, 0, sizeof(view)); 1787 view.type = I915_GGTT_VIEW_PARTIAL; 1788 view.params.partial.offset = rounddown(page_offset, chunk_size); 1789 view.params.partial.size = 1790 min_t(unsigned int, 1791 chunk_size, 1792 (vma->vm_end - vma->vm_start)/PAGE_SIZE - 1793 view.params.partial.offset); 1794 #endif 1795 } 1796 1797 /* Now pin it into the GTT if needed */ 1798 ret = i915_gem_object_ggtt_pin(obj, &view, 0, 0, PIN_MAPPABLE); 1799 if (ret) 1800 goto err_unlock; 1801 1802 ret = i915_gem_object_set_to_gtt_domain(obj, write); 1803 if (ret) 1804 goto err_unpin; 1805 1806 ret = i915_gem_object_get_fence(obj); 1807 if (ret) 1808 goto err_unpin; 1809 1810 /* 1811 * START FREEBSD MAGIC 1812 * 1813 * Add a pip count to avoid destruction and certain other 1814 * complex operations (such as collapses?) while unlocked. 1815 */ 1816 vm_object_pip_add(vm_obj, 1); 1817 1818 ret = 0; 1819 m = NULL; 1820 1821 /* 1822 * Since the object lock was dropped, another thread might have 1823 * faulted on the same GTT address and instantiated the mapping. 1824 * Recheck. 1825 */ 1826 m = vm_page_lookup(vm_obj, OFF_TO_IDX(offset)); 1827 if (m != NULL) { 1828 /* 1829 * Try to busy the page, retry on failure (non-zero ret). 1830 */ 1831 if (vm_page_busy_try(m, false)) { 1832 kprintf("i915_gem_fault: BUSY\n"); 1833 ret = -EINTR; 1834 goto err_unlock; 1835 } 1836 goto have_page; 1837 } 1838 /* END FREEBSD MAGIC */ 1839 1840 obj->fault_mappable = true; 1841 1842 /* Finally, remap it using the new GTT offset */ 1843 m = vm_phys_fictitious_to_vm_page(ggtt->mappable_base + 1844 i915_gem_obj_ggtt_offset_view(obj, &view) + offset); 1845 if (m == NULL) { 1846 ret = -EFAULT; 1847 goto err_unpin; 1848 } 1849 KASSERT((m->flags & PG_FICTITIOUS) != 0, ("not fictitious %p", m)); 1850 KASSERT(m->wire_count == 1, ("wire_count not 1 %p", m)); 1851 1852 /* 1853 * Try to busy the page. Fails on non-zero return. 1854 */ 1855 if (vm_page_busy_try(m, false)) { 1856 kprintf("i915_gem_fault: BUSY(2)\n"); 1857 ret = -EINTR; 1858 goto err_unpin; 1859 } 1860 m->valid = VM_PAGE_BITS_ALL; 1861 1862 #if 1 1863 /* 1864 * This should always work since we already checked via a lookup 1865 * above. 1866 */ 1867 if (vm_page_insert(m, vm_obj, OFF_TO_IDX(offset)) == FALSE) { 1868 kprintf("i915:gem_fault: page %p,%jd already in object\n", 1869 vm_obj, 1870 OFF_TO_IDX(offset)); 1871 vm_page_wakeup(m); 1872 ret = -EINTR; 1873 goto err_unpin; 1874 } 1875 #else 1876 /* NOT COMPILED ATM */ 1877 if (unlikely(view.type == I915_GGTT_VIEW_PARTIAL)) { 1878 /* Overriding existing pages in partial view does not cause 1879 * us any trouble as TLBs are still valid because the fault 1880 * is due to userspace losing part of the mapping or never 1881 * having accessed it before (at this partials' range). 1882 */ 1883 unsigned long base = vma->vm_start + 1884 (view.params.partial.offset << PAGE_SHIFT); 1885 unsigned int i; 1886 1887 for (i = 0; i < view.params.partial.size; i++) { 1888 ret = vm_insert_pfn(vma, base + i * PAGE_SIZE, pfn + i); 1889 if (ret) 1890 break; 1891 } 1892 1893 obj->fault_mappable = true; 1894 } else { 1895 if (!obj->fault_mappable) { 1896 unsigned long size = min_t(unsigned long, 1897 vma->vm_end - vma->vm_start, 1898 obj->base.size); 1899 int i; 1900 1901 for (i = 0; i < size >> PAGE_SHIFT; i++) { 1902 ret = vm_insert_pfn(vma, 1903 (unsigned long)vma->vm_start + i * PAGE_SIZE, 1904 pfn + i); 1905 if (ret) 1906 break; 1907 } 1908 1909 obj->fault_mappable = true; 1910 } else 1911 ret = vm_insert_pfn(vma, 1912 (unsigned long)vmf->virtual_address, 1913 pfn + page_offset); 1914 } 1915 #endif 1916 1917 have_page: 1918 *mres = m; 1919 1920 i915_gem_object_ggtt_unpin_view(obj, &view); 1921 mutex_unlock(&dev->struct_mutex); 1922 ret = VM_PAGER_OK; 1923 goto done; 1924 1925 /* 1926 * ALTERNATIVE ERROR RETURN. 1927 * 1928 * OBJECT EXPECTED TO BE LOCKED. 1929 */ 1930 err_unpin: 1931 i915_gem_object_ggtt_unpin_view(obj, &view); 1932 err_unlock: 1933 mutex_unlock(&dev->struct_mutex); 1934 err_rpm: 1935 intel_runtime_pm_put(dev_priv); 1936 err: 1937 switch (ret) { 1938 case -EIO: 1939 /* 1940 * We eat errors when the gpu is terminally wedged to avoid 1941 * userspace unduly crashing (gl has no provisions for mmaps to 1942 * fail). But any other -EIO isn't ours (e.g. swap in failure) 1943 * and so needs to be reported. 1944 */ 1945 if (!i915_terminally_wedged(&dev_priv->gpu_error)) { 1946 // ret = VM_FAULT_SIGBUS; 1947 break; 1948 } 1949 case -EAGAIN: 1950 /* 1951 * EAGAIN means the gpu is hung and we'll wait for the error 1952 * handler to reset everything when re-faulting in 1953 * i915_mutex_lock_interruptible. 1954 */ 1955 case -ERESTARTSYS: 1956 case -EINTR: 1957 VM_OBJECT_UNLOCK(vm_obj); 1958 int dummy; 1959 tsleep(&dummy, 0, "delay", 1); /* XXX */ 1960 VM_OBJECT_LOCK(vm_obj); 1961 goto retry; 1962 default: 1963 WARN_ONCE(ret, "unhandled error in i915_gem_fault: %i\n", ret); 1964 ret = VM_PAGER_ERROR; 1965 break; 1966 } 1967 1968 done: 1969 vm_object_pip_wakeup(vm_obj); 1970 1971 return ret; 1972 } 1973 1974 /** 1975 * i915_gem_release_mmap - remove physical page mappings 1976 * @obj: obj in question 1977 * 1978 * Preserve the reservation of the mmapping with the DRM core code, but 1979 * relinquish ownership of the pages back to the system. 1980 * 1981 * It is vital that we remove the page mapping if we have mapped a tiled 1982 * object through the GTT and then lose the fence register due to 1983 * resource pressure. Similarly if the object has been moved out of the 1984 * aperture, than pages mapped into userspace must be revoked. Removing the 1985 * mapping will then trigger a page fault on the next user access, allowing 1986 * fixup by i915_gem_fault(). 1987 */ 1988 void 1989 i915_gem_release_mmap(struct drm_i915_gem_object *obj) 1990 { 1991 vm_object_t devobj; 1992 vm_page_t m; 1993 int i, page_count; 1994 1995 /* Serialisation between user GTT access and our code depends upon 1996 * revoking the CPU's PTE whilst the mutex is held. The next user 1997 * pagefault then has to wait until we release the mutex. 1998 */ 1999 lockdep_assert_held(&obj->base.dev->struct_mutex); 2000 2001 if (!obj->fault_mappable) 2002 return; 2003 2004 devobj = cdev_pager_lookup(obj); 2005 if (devobj != NULL) { 2006 page_count = OFF_TO_IDX(obj->base.size); 2007 2008 VM_OBJECT_LOCK(devobj); 2009 for (i = 0; i < page_count; i++) { 2010 m = vm_page_lookup_busy_wait(devobj, i, TRUE, "915unm"); 2011 if (m == NULL) 2012 continue; 2013 cdev_pager_free_page(devobj, m); 2014 } 2015 VM_OBJECT_UNLOCK(devobj); 2016 vm_object_deallocate(devobj); 2017 } 2018 2019 /* Ensure that the CPU's PTE are revoked and there are not outstanding 2020 * memory transactions from userspace before we return. The TLB 2021 * flushing implied above by changing the PTE above *should* be 2022 * sufficient, an extra barrier here just provides us with a bit 2023 * of paranoid documentation about our requirement to serialise 2024 * memory writes before touching registers / GSM. 2025 */ 2026 wmb(); 2027 2028 obj->fault_mappable = false; 2029 } 2030 2031 void 2032 i915_gem_release_all_mmaps(struct drm_i915_private *dev_priv) 2033 { 2034 struct drm_i915_gem_object *obj; 2035 2036 list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) 2037 i915_gem_release_mmap(obj); 2038 } 2039 2040 /** 2041 * i915_gem_get_ggtt_size - return required global GTT size for an object 2042 * @dev_priv: i915 device 2043 * @size: object size 2044 * @tiling_mode: tiling mode 2045 * 2046 * Return the required global GTT size for an object, taking into account 2047 * potential fence register mapping. 2048 */ 2049 u64 i915_gem_get_ggtt_size(struct drm_i915_private *dev_priv, 2050 u64 size, int tiling_mode) 2051 { 2052 u64 ggtt_size; 2053 2054 GEM_BUG_ON(size == 0); 2055 2056 if (INTEL_GEN(dev_priv) >= 4 || 2057 tiling_mode == I915_TILING_NONE) 2058 return size; 2059 2060 /* Previous chips need a power-of-two fence region when tiling */ 2061 if (IS_GEN3(dev_priv)) 2062 ggtt_size = 1024*1024; 2063 else 2064 ggtt_size = 512*1024; 2065 2066 while (ggtt_size < size) 2067 ggtt_size <<= 1; 2068 2069 return ggtt_size; 2070 } 2071 2072 /** 2073 * i915_gem_get_ggtt_alignment - return required global GTT alignment 2074 * @dev_priv: i915 device 2075 * @size: object size 2076 * @tiling_mode: tiling mode 2077 * @fenced: is fenced alignment required or not 2078 * 2079 * Return the required global GTT alignment for an object, taking into account 2080 * potential fence register mapping. 2081 */ 2082 u64 i915_gem_get_ggtt_alignment(struct drm_i915_private *dev_priv, u64 size, 2083 int tiling_mode, bool fenced) 2084 { 2085 GEM_BUG_ON(size == 0); 2086 2087 /* 2088 * Minimum alignment is 4k (GTT page size), but might be greater 2089 * if a fence register is needed for the object. 2090 */ 2091 if (INTEL_GEN(dev_priv) >= 4 || (!fenced && IS_G33(dev_priv)) || 2092 tiling_mode == I915_TILING_NONE) 2093 return 4096; 2094 2095 /* 2096 * Previous chips need to be aligned to the size of the smallest 2097 * fence register that can contain the object. 2098 */ 2099 return i915_gem_get_ggtt_size(dev_priv, size, tiling_mode); 2100 } 2101 2102 static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj) 2103 { 2104 struct drm_i915_private *dev_priv = to_i915(obj->base.dev); 2105 int err; 2106 2107 err = drm_gem_create_mmap_offset(&obj->base); 2108 if (!err) 2109 return 0; 2110 2111 /* We can idle the GPU locklessly to flush stale objects, but in order 2112 * to claim that space for ourselves, we need to take the big 2113 * struct_mutex to free the requests+objects and allocate our slot. 2114 */ 2115 err = i915_gem_wait_for_idle(dev_priv, true); 2116 if (err) 2117 return err; 2118 2119 err = i915_mutex_lock_interruptible(&dev_priv->drm); 2120 if (!err) { 2121 i915_gem_retire_requests(dev_priv); 2122 err = drm_gem_create_mmap_offset(&obj->base); 2123 mutex_unlock(&dev_priv->drm.struct_mutex); 2124 } 2125 2126 return err; 2127 } 2128 2129 #if 0 2130 static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj) 2131 { 2132 drm_gem_free_mmap_offset(&obj->base); 2133 } 2134 #endif 2135 2136 int 2137 i915_gem_mmap_gtt(struct drm_file *file, 2138 struct drm_device *dev, 2139 uint32_t handle, 2140 uint64_t *offset) 2141 { 2142 struct drm_i915_gem_object *obj; 2143 int ret; 2144 2145 obj = i915_gem_object_lookup(file, handle); 2146 if (!obj) 2147 return -ENOENT; 2148 2149 ret = i915_gem_object_create_mmap_offset(obj); 2150 if (ret == 0) 2151 *offset = DRM_GEM_MAPPING_OFF(obj->base.map_list.key) | 2152 DRM_GEM_MAPPING_KEY; 2153 2154 i915_gem_object_put_unlocked(obj); 2155 return ret; 2156 } 2157 2158 /** 2159 * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing 2160 * @dev: DRM device 2161 * @data: GTT mapping ioctl data 2162 * @file: GEM object info 2163 * 2164 * Simply returns the fake offset to userspace so it can mmap it. 2165 * The mmap call will end up in drm_gem_mmap(), which will set things 2166 * up so we can get faults in the handler above. 2167 * 2168 * The fault handler will take care of binding the object into the GTT 2169 * (since it may have been evicted to make room for something), allocating 2170 * a fence register, and mapping the appropriate aperture address into 2171 * userspace. 2172 */ 2173 int 2174 i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data, 2175 struct drm_file *file) 2176 { 2177 struct drm_i915_gem_mmap_gtt *args = data; 2178 2179 return i915_gem_mmap_gtt(file, dev, args->handle, (uint64_t *)&args->offset); 2180 } 2181 2182 /* Immediately discard the backing storage */ 2183 static void 2184 i915_gem_object_truncate(struct drm_i915_gem_object *obj) 2185 { 2186 vm_object_t vm_obj = obj->base.filp; 2187 2188 if (obj->base.filp == NULL) 2189 return; 2190 2191 VM_OBJECT_LOCK(vm_obj); 2192 vm_object_page_remove(vm_obj, 0, 0, false); 2193 VM_OBJECT_UNLOCK(vm_obj); 2194 2195 /* Our goal here is to return as much of the memory as 2196 * is possible back to the system as we are called from OOM. 2197 * To do this we must instruct the shmfs to drop all of its 2198 * backing pages, *now*. 2199 */ 2200 #if 0 2201 shmem_truncate_range(file_inode(obj->base.filp), 0, (loff_t)-1); 2202 #endif 2203 obj->madv = __I915_MADV_PURGED; 2204 } 2205 2206 /* Try to discard unwanted pages */ 2207 static void 2208 i915_gem_object_invalidate(struct drm_i915_gem_object *obj) 2209 { 2210 #if 0 2211 struct address_space *mapping; 2212 #endif 2213 2214 switch (obj->madv) { 2215 case I915_MADV_DONTNEED: 2216 i915_gem_object_truncate(obj); 2217 case __I915_MADV_PURGED: 2218 return; 2219 } 2220 2221 if (obj->base.filp == NULL) 2222 return; 2223 2224 #if 0 2225 mapping = file_inode(obj->base.filp)->i_mapping, 2226 #endif 2227 invalidate_mapping_pages(obj->base.filp, 0, (loff_t)-1); 2228 } 2229 2230 static void 2231 i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj) 2232 { 2233 struct sgt_iter sgt_iter; 2234 struct page *page; 2235 int ret; 2236 2237 BUG_ON(obj->madv == __I915_MADV_PURGED); 2238 2239 ret = i915_gem_object_set_to_cpu_domain(obj, true); 2240 if (WARN_ON(ret)) { 2241 /* In the event of a disaster, abandon all caches and 2242 * hope for the best. 2243 */ 2244 i915_gem_clflush_object(obj, true); 2245 obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU; 2246 } 2247 2248 i915_gem_gtt_finish_object(obj); 2249 2250 if (i915_gem_object_needs_bit17_swizzle(obj)) 2251 i915_gem_object_save_bit_17_swizzle(obj); 2252 2253 if (obj->madv == I915_MADV_DONTNEED) 2254 obj->dirty = 0; 2255 2256 for_each_sgt_page(page, sgt_iter, obj->pages) { 2257 if (obj->dirty) 2258 set_page_dirty(page); 2259 2260 if (obj->madv == I915_MADV_WILLNEED) 2261 mark_page_accessed(page); 2262 2263 vm_page_busy_wait((struct vm_page *)page, FALSE, "i915gem"); 2264 vm_page_unwire((struct vm_page *)page, 1); 2265 vm_page_wakeup((struct vm_page *)page); 2266 } 2267 obj->dirty = 0; 2268 2269 sg_free_table(obj->pages); 2270 kfree(obj->pages); 2271 } 2272 2273 int 2274 i915_gem_object_put_pages(struct drm_i915_gem_object *obj) 2275 { 2276 const struct drm_i915_gem_object_ops *ops = obj->ops; 2277 2278 if (obj->pages == NULL) 2279 return 0; 2280 2281 if (obj->pages_pin_count) 2282 return -EBUSY; 2283 2284 GEM_BUG_ON(obj->bind_count); 2285 2286 /* ->put_pages might need to allocate memory for the bit17 swizzle 2287 * array, hence protect them from being reaped by removing them from gtt 2288 * lists early. */ 2289 list_del(&obj->global_list); 2290 2291 if (obj->mapping) { 2292 if (is_vmalloc_addr(obj->mapping)) 2293 vunmap(obj->mapping); 2294 else 2295 kunmap(kmap_to_page(obj->mapping)); 2296 obj->mapping = NULL; 2297 } 2298 2299 ops->put_pages(obj); 2300 obj->pages = NULL; 2301 2302 i915_gem_object_invalidate(obj); 2303 2304 return 0; 2305 } 2306 2307 static int 2308 i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) 2309 { 2310 struct drm_i915_private *dev_priv = to_i915(obj->base.dev); 2311 int page_count, i; 2312 vm_object_t vm_obj; 2313 struct sg_table *st; 2314 struct scatterlist *sg; 2315 struct sgt_iter sgt_iter; 2316 struct page *page; 2317 unsigned long last_pfn = 0; /* suppress gcc warning */ 2318 int ret; 2319 2320 /* Assert that the object is not currently in any GPU domain. As it 2321 * wasn't in the GTT, there shouldn't be any way it could have been in 2322 * a GPU cache 2323 */ 2324 BUG_ON(obj->base.read_domains & I915_GEM_GPU_DOMAINS); 2325 BUG_ON(obj->base.write_domain & I915_GEM_GPU_DOMAINS); 2326 2327 st = kmalloc(sizeof(*st), M_DRM, GFP_KERNEL); 2328 if (st == NULL) 2329 return -ENOMEM; 2330 2331 page_count = obj->base.size / PAGE_SIZE; 2332 if (sg_alloc_table(st, page_count, GFP_KERNEL)) { 2333 kfree(st); 2334 return -ENOMEM; 2335 } 2336 2337 /* Get the list of pages out of our struct file. They'll be pinned 2338 * at this point until we release them. 2339 * 2340 * Fail silently without starting the shrinker 2341 */ 2342 vm_obj = obj->base.filp; 2343 VM_OBJECT_LOCK(vm_obj); 2344 sg = st->sgl; 2345 st->nents = 0; 2346 for (i = 0; i < page_count; i++) { 2347 page = shmem_read_mapping_page(vm_obj, i); 2348 if (IS_ERR(page)) { 2349 i915_gem_shrink(dev_priv, 2350 page_count, 2351 I915_SHRINK_BOUND | 2352 I915_SHRINK_UNBOUND | 2353 I915_SHRINK_PURGEABLE); 2354 page = shmem_read_mapping_page(vm_obj, i); 2355 } 2356 if (IS_ERR(page)) { 2357 /* We've tried hard to allocate the memory by reaping 2358 * our own buffer, now let the real VM do its job and 2359 * go down in flames if truly OOM. 2360 */ 2361 i915_gem_shrink_all(dev_priv); 2362 page = shmem_read_mapping_page(vm_obj, i); 2363 if (IS_ERR(page)) { 2364 ret = PTR_ERR(page); 2365 goto err_pages; 2366 } 2367 } 2368 #ifdef CONFIG_SWIOTLB 2369 if (swiotlb_nr_tbl()) { 2370 st->nents++; 2371 sg_set_page(sg, page, PAGE_SIZE, 0); 2372 sg = sg_next(sg); 2373 continue; 2374 } 2375 #endif 2376 if (!i || page_to_pfn(page) != last_pfn + 1) { 2377 if (i) 2378 sg = sg_next(sg); 2379 st->nents++; 2380 sg_set_page(sg, page, PAGE_SIZE, 0); 2381 } else { 2382 sg->length += PAGE_SIZE; 2383 } 2384 last_pfn = page_to_pfn(page); 2385 2386 /* Check that the i965g/gm workaround works. */ 2387 } 2388 #ifdef CONFIG_SWIOTLB 2389 if (!swiotlb_nr_tbl()) 2390 #endif 2391 sg_mark_end(sg); 2392 obj->pages = st; 2393 VM_OBJECT_UNLOCK(vm_obj); 2394 2395 ret = i915_gem_gtt_prepare_object(obj); 2396 if (ret) 2397 goto err_pages; 2398 2399 if (i915_gem_object_needs_bit17_swizzle(obj)) 2400 i915_gem_object_do_bit_17_swizzle(obj); 2401 2402 if (i915_gem_object_is_tiled(obj) && 2403 dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) 2404 i915_gem_object_pin_pages(obj); 2405 2406 return 0; 2407 2408 err_pages: 2409 sg_mark_end(sg); 2410 for_each_sgt_page(page, sgt_iter, st) 2411 { 2412 struct vm_page *vmp = (struct vm_page *)page; 2413 vm_page_busy_wait(vmp, FALSE, "i915gem"); 2414 vm_page_unwire(vmp, 0); 2415 vm_page_wakeup(vmp); 2416 } 2417 VM_OBJECT_UNLOCK(vm_obj); 2418 sg_free_table(st); 2419 kfree(st); 2420 2421 /* shmemfs first checks if there is enough memory to allocate the page 2422 * and reports ENOSPC should there be insufficient, along with the usual 2423 * ENOMEM for a genuine allocation failure. 2424 * 2425 * We use ENOSPC in our driver to mean that we have run out of aperture 2426 * space and so want to translate the error from shmemfs back to our 2427 * usual understanding of ENOMEM. 2428 */ 2429 if (ret == -ENOSPC) 2430 ret = -ENOMEM; 2431 2432 return ret; 2433 } 2434 2435 /* Ensure that the associated pages are gathered from the backing storage 2436 * and pinned into our object. i915_gem_object_get_pages() may be called 2437 * multiple times before they are released by a single call to 2438 * i915_gem_object_put_pages() - once the pages are no longer referenced 2439 * either as a result of memory pressure (reaping pages under the shrinker) 2440 * or as the object is itself released. 2441 */ 2442 int 2443 i915_gem_object_get_pages(struct drm_i915_gem_object *obj) 2444 { 2445 struct drm_i915_private *dev_priv = to_i915(obj->base.dev); 2446 const struct drm_i915_gem_object_ops *ops = obj->ops; 2447 int ret; 2448 2449 if (obj->pages) 2450 return 0; 2451 2452 if (obj->madv != I915_MADV_WILLNEED) { 2453 DRM_DEBUG("Attempting to obtain a purgeable object\n"); 2454 return -EFAULT; 2455 } 2456 2457 BUG_ON(obj->pages_pin_count); 2458 2459 ret = ops->get_pages(obj); 2460 if (ret) 2461 return ret; 2462 2463 list_add_tail(&obj->global_list, &dev_priv->mm.unbound_list); 2464 2465 obj->get_page.sg = obj->pages->sgl; 2466 obj->get_page.last = 0; 2467 2468 return 0; 2469 } 2470 2471 /* The 'mapping' part of i915_gem_object_pin_map() below */ 2472 static void *i915_gem_object_map(const struct drm_i915_gem_object *obj) 2473 { 2474 unsigned long n_pages = obj->base.size >> PAGE_SHIFT; 2475 struct sg_table *sgt = obj->pages; 2476 struct sgt_iter sgt_iter; 2477 struct page *page; 2478 struct page *stack_pages[32]; 2479 struct page **pages = stack_pages; 2480 unsigned long i = 0; 2481 void *addr; 2482 2483 /* A single page can always be kmapped */ 2484 if (n_pages == 1) 2485 return kmap(sg_page(sgt->sgl)); 2486 2487 if (n_pages > ARRAY_SIZE(stack_pages)) { 2488 /* Too big for stack -- allocate temporary array instead */ 2489 pages = drm_malloc_gfp(n_pages, sizeof(*pages), GFP_TEMPORARY); 2490 if (!pages) 2491 return NULL; 2492 } 2493 2494 for_each_sgt_page(page, sgt_iter, sgt) 2495 pages[i++] = page; 2496 2497 /* Check that we have the expected number of pages */ 2498 GEM_BUG_ON(i != n_pages); 2499 2500 addr = vmap(pages, n_pages, 0, PAGE_KERNEL); 2501 2502 if (pages != stack_pages) 2503 drm_free_large(pages); 2504 2505 return addr; 2506 } 2507 2508 /* get, pin, and map the pages of the object into kernel space */ 2509 void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj) 2510 { 2511 int ret; 2512 2513 lockdep_assert_held(&obj->base.dev->struct_mutex); 2514 2515 ret = i915_gem_object_get_pages(obj); 2516 if (ret) 2517 return ERR_PTR(ret); 2518 2519 i915_gem_object_pin_pages(obj); 2520 2521 if (!obj->mapping) { 2522 obj->mapping = i915_gem_object_map(obj); 2523 if (!obj->mapping) { 2524 i915_gem_object_unpin_pages(obj); 2525 return ERR_PTR(-ENOMEM); 2526 } 2527 } 2528 2529 return obj->mapping; 2530 } 2531 2532 static void 2533 i915_gem_object_retire__write(struct i915_gem_active *active, 2534 struct drm_i915_gem_request *request) 2535 { 2536 struct drm_i915_gem_object *obj = 2537 container_of(active, struct drm_i915_gem_object, last_write); 2538 2539 intel_fb_obj_flush(obj, true, ORIGIN_CS); 2540 } 2541 2542 static void 2543 i915_gem_object_retire__read(struct i915_gem_active *active, 2544 struct drm_i915_gem_request *request) 2545 { 2546 int idx = request->engine->id; 2547 struct drm_i915_gem_object *obj = 2548 container_of(active, struct drm_i915_gem_object, last_read[idx]); 2549 2550 GEM_BUG_ON(!i915_gem_object_has_active_engine(obj, idx)); 2551 2552 i915_gem_object_clear_active(obj, idx); 2553 if (i915_gem_object_is_active(obj)) 2554 return; 2555 2556 /* Bump our place on the bound list to keep it roughly in LRU order 2557 * so that we don't steal from recently used but inactive objects 2558 * (unless we are forced to ofc!) 2559 */ 2560 if (obj->bind_count) 2561 list_move_tail(&obj->global_list, 2562 &request->i915->mm.bound_list); 2563 2564 i915_gem_object_put(obj); 2565 } 2566 2567 static bool i915_context_is_banned(const struct i915_gem_context *ctx) 2568 { 2569 unsigned long elapsed; 2570 2571 if (ctx->hang_stats.banned) 2572 return true; 2573 2574 elapsed = get_seconds() - ctx->hang_stats.guilty_ts; 2575 if (ctx->hang_stats.ban_period_seconds && 2576 elapsed <= ctx->hang_stats.ban_period_seconds) { 2577 DRM_DEBUG("context hanging too fast, banning!\n"); 2578 return true; 2579 } 2580 2581 return false; 2582 } 2583 2584 static void i915_set_reset_status(struct i915_gem_context *ctx, 2585 const bool guilty) 2586 { 2587 struct i915_ctx_hang_stats *hs = &ctx->hang_stats; 2588 2589 if (guilty) { 2590 hs->banned = i915_context_is_banned(ctx); 2591 hs->batch_active++; 2592 hs->guilty_ts = get_seconds(); 2593 } else { 2594 hs->batch_pending++; 2595 } 2596 } 2597 2598 struct drm_i915_gem_request * 2599 i915_gem_find_active_request(struct intel_engine_cs *engine) 2600 { 2601 struct drm_i915_gem_request *request; 2602 2603 /* We are called by the error capture and reset at a random 2604 * point in time. In particular, note that neither is crucially 2605 * ordered with an interrupt. After a hang, the GPU is dead and we 2606 * assume that no more writes can happen (we waited long enough for 2607 * all writes that were in transaction to be flushed) - adding an 2608 * extra delay for a recent interrupt is pointless. Hence, we do 2609 * not need an engine->irq_seqno_barrier() before the seqno reads. 2610 */ 2611 list_for_each_entry(request, &engine->request_list, link) { 2612 if (i915_gem_request_completed(request)) 2613 continue; 2614 2615 return request; 2616 } 2617 2618 return NULL; 2619 } 2620 2621 static void i915_gem_reset_engine_status(struct intel_engine_cs *engine) 2622 { 2623 struct drm_i915_gem_request *request; 2624 bool ring_hung; 2625 2626 request = i915_gem_find_active_request(engine); 2627 if (request == NULL) 2628 return; 2629 2630 ring_hung = engine->hangcheck.score >= HANGCHECK_SCORE_RING_HUNG; 2631 2632 i915_set_reset_status(request->ctx, ring_hung); 2633 list_for_each_entry_continue(request, &engine->request_list, link) 2634 i915_set_reset_status(request->ctx, false); 2635 } 2636 2637 static void i915_gem_reset_engine_cleanup(struct intel_engine_cs *engine) 2638 { 2639 struct drm_i915_gem_request *request; 2640 struct intel_ring *ring; 2641 2642 /* Mark all pending requests as complete so that any concurrent 2643 * (lockless) lookup doesn't try and wait upon the request as we 2644 * reset it. 2645 */ 2646 intel_engine_init_seqno(engine, engine->last_submitted_seqno); 2647 2648 /* 2649 * Clear the execlists queue up before freeing the requests, as those 2650 * are the ones that keep the context and ringbuffer backing objects 2651 * pinned in place. 2652 */ 2653 2654 if (i915.enable_execlists) { 2655 /* Ensure irq handler finishes or is cancelled. */ 2656 tasklet_kill(&engine->irq_tasklet); 2657 2658 intel_execlists_cancel_requests(engine); 2659 } 2660 2661 /* 2662 * We must free the requests after all the corresponding objects have 2663 * been moved off active lists. Which is the same order as the normal 2664 * retire_requests function does. This is important if object hold 2665 * implicit references on things like e.g. ppgtt address spaces through 2666 * the request. 2667 */ 2668 request = i915_gem_active_raw(&engine->last_request, 2669 &engine->i915->drm.struct_mutex); 2670 if (request) 2671 i915_gem_request_retire_upto(request); 2672 GEM_BUG_ON(intel_engine_is_active(engine)); 2673 2674 /* Having flushed all requests from all queues, we know that all 2675 * ringbuffers must now be empty. However, since we do not reclaim 2676 * all space when retiring the request (to prevent HEADs colliding 2677 * with rapid ringbuffer wraparound) the amount of available space 2678 * upon reset is less than when we start. Do one more pass over 2679 * all the ringbuffers to reset last_retired_head. 2680 */ 2681 list_for_each_entry(ring, &engine->buffers, link) { 2682 ring->last_retired_head = ring->tail; 2683 intel_ring_update_space(ring); 2684 } 2685 2686 engine->i915->gt.active_engines &= ~intel_engine_flag(engine); 2687 } 2688 2689 void i915_gem_reset(struct drm_device *dev) 2690 { 2691 struct drm_i915_private *dev_priv = to_i915(dev); 2692 struct intel_engine_cs *engine; 2693 2694 /* 2695 * Before we free the objects from the requests, we need to inspect 2696 * them for finding the guilty party. As the requests only borrow 2697 * their reference to the objects, the inspection must be done first. 2698 */ 2699 for_each_engine(engine, dev_priv) 2700 i915_gem_reset_engine_status(engine); 2701 2702 for_each_engine(engine, dev_priv) 2703 i915_gem_reset_engine_cleanup(engine); 2704 mod_delayed_work(dev_priv->wq, &dev_priv->gt.idle_work, 0); 2705 2706 i915_gem_context_reset(dev); 2707 2708 i915_gem_restore_fences(dev); 2709 } 2710 2711 static void 2712 i915_gem_retire_work_handler(struct work_struct *work) 2713 { 2714 struct drm_i915_private *dev_priv = 2715 container_of(work, typeof(*dev_priv), gt.retire_work.work); 2716 struct drm_device *dev = &dev_priv->drm; 2717 2718 /* Come back later if the device is busy... */ 2719 if (mutex_trylock(&dev->struct_mutex)) { 2720 i915_gem_retire_requests(dev_priv); 2721 mutex_unlock(&dev->struct_mutex); 2722 } 2723 2724 /* Keep the retire handler running until we are finally idle. 2725 * We do not need to do this test under locking as in the worst-case 2726 * we queue the retire worker once too often. 2727 */ 2728 if (READ_ONCE(dev_priv->gt.awake)) { 2729 i915_queue_hangcheck(dev_priv); 2730 queue_delayed_work(dev_priv->wq, 2731 &dev_priv->gt.retire_work, 2732 round_jiffies_up_relative(HZ)); 2733 } 2734 } 2735 2736 static void 2737 i915_gem_idle_work_handler(struct work_struct *work) 2738 { 2739 struct drm_i915_private *dev_priv = 2740 container_of(work, typeof(*dev_priv), gt.idle_work.work); 2741 struct drm_device *dev = &dev_priv->drm; 2742 struct intel_engine_cs *engine; 2743 unsigned int stuck_engines; 2744 bool rearm_hangcheck; 2745 2746 if (!READ_ONCE(dev_priv->gt.awake)) 2747 return; 2748 2749 if (READ_ONCE(dev_priv->gt.active_engines)) 2750 return; 2751 2752 rearm_hangcheck = 2753 cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work); 2754 2755 if (!mutex_trylock(&dev->struct_mutex)) { 2756 /* Currently busy, come back later */ 2757 mod_delayed_work(dev_priv->wq, 2758 &dev_priv->gt.idle_work, 2759 msecs_to_jiffies(50)); 2760 goto out_rearm; 2761 } 2762 2763 if (dev_priv->gt.active_engines) 2764 goto out_unlock; 2765 2766 for_each_engine(engine, dev_priv) 2767 i915_gem_batch_pool_fini(&engine->batch_pool); 2768 2769 GEM_BUG_ON(!dev_priv->gt.awake); 2770 dev_priv->gt.awake = false; 2771 rearm_hangcheck = false; 2772 2773 /* As we have disabled hangcheck, we need to unstick any waiters still 2774 * hanging around. However, as we may be racing against the interrupt 2775 * handler or the waiters themselves, we skip enabling the fake-irq. 2776 */ 2777 stuck_engines = intel_kick_waiters(dev_priv); 2778 if (unlikely(stuck_engines)) 2779 DRM_DEBUG_DRIVER("kicked stuck waiters (%x)...missed irq?\n", 2780 stuck_engines); 2781 2782 if (INTEL_GEN(dev_priv) >= 6) 2783 gen6_rps_idle(dev_priv); 2784 intel_runtime_pm_put(dev_priv); 2785 out_unlock: 2786 mutex_unlock(&dev->struct_mutex); 2787 2788 out_rearm: 2789 if (rearm_hangcheck) { 2790 GEM_BUG_ON(!dev_priv->gt.awake); 2791 i915_queue_hangcheck(dev_priv); 2792 } 2793 } 2794 2795 void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file) 2796 { 2797 struct drm_i915_gem_object *obj = to_intel_bo(gem); 2798 struct drm_i915_file_private *fpriv = file->driver_priv; 2799 struct i915_vma *vma, *vn; 2800 2801 mutex_lock(&obj->base.dev->struct_mutex); 2802 list_for_each_entry_safe(vma, vn, &obj->vma_list, obj_link) 2803 if (vma->vm->file == fpriv) 2804 i915_vma_close(vma); 2805 mutex_unlock(&obj->base.dev->struct_mutex); 2806 } 2807 2808 /** 2809 * i915_gem_wait_ioctl - implements DRM_IOCTL_I915_GEM_WAIT 2810 * @dev: drm device pointer 2811 * @data: ioctl data blob 2812 * @file: drm file pointer 2813 * 2814 * Returns 0 if successful, else an error is returned with the remaining time in 2815 * the timeout parameter. 2816 * -ETIME: object is still busy after timeout 2817 * -ERESTARTSYS: signal interrupted the wait 2818 * -ENONENT: object doesn't exist 2819 * Also possible, but rare: 2820 * -EAGAIN: GPU wedged 2821 * -ENOMEM: damn 2822 * -ENODEV: Internal IRQ fail 2823 * -E?: The add request failed 2824 * 2825 * The wait ioctl with a timeout of 0 reimplements the busy ioctl. With any 2826 * non-zero timeout parameter the wait ioctl will wait for the given number of 2827 * nanoseconds on an object becoming unbusy. Since the wait itself does so 2828 * without holding struct_mutex the object may become re-busied before this 2829 * function completes. A similar but shorter * race condition exists in the busy 2830 * ioctl 2831 */ 2832 int 2833 i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) 2834 { 2835 struct drm_i915_gem_wait *args = data; 2836 struct intel_rps_client *rps = to_rps_client(file); 2837 struct drm_i915_gem_object *obj; 2838 unsigned long active; 2839 int idx, ret = 0; 2840 2841 if (args->flags != 0) 2842 return -EINVAL; 2843 2844 obj = i915_gem_object_lookup(file, args->bo_handle); 2845 if (!obj) 2846 return -ENOENT; 2847 2848 active = __I915_BO_ACTIVE(obj); 2849 for_each_active(active, idx) { 2850 s64 *timeout = args->timeout_ns >= 0 ? &args->timeout_ns : NULL; 2851 ret = i915_gem_active_wait_unlocked(&obj->last_read[idx], true, 2852 timeout, rps); 2853 if (ret) 2854 break; 2855 } 2856 2857 i915_gem_object_put_unlocked(obj); 2858 return ret; 2859 } 2860 2861 static int 2862 __i915_gem_object_sync(struct drm_i915_gem_request *to, 2863 struct drm_i915_gem_request *from) 2864 { 2865 int ret; 2866 2867 if (to->engine == from->engine) 2868 return 0; 2869 2870 if (!i915.semaphores) { 2871 ret = i915_wait_request(from, 2872 from->i915->mm.interruptible, 2873 NULL, 2874 NO_WAITBOOST); 2875 if (ret) 2876 return ret; 2877 } else { 2878 int idx = intel_engine_sync_index(from->engine, to->engine); 2879 if (from->fence.seqno <= from->engine->semaphore.sync_seqno[idx]) 2880 return 0; 2881 2882 trace_i915_gem_ring_sync_to(to, from); 2883 ret = to->engine->semaphore.sync_to(to, from); 2884 if (ret) 2885 return ret; 2886 2887 from->engine->semaphore.sync_seqno[idx] = from->fence.seqno; 2888 } 2889 2890 return 0; 2891 } 2892 2893 /** 2894 * i915_gem_object_sync - sync an object to a ring. 2895 * 2896 * @obj: object which may be in use on another ring. 2897 * @to: request we are wishing to use 2898 * 2899 * This code is meant to abstract object synchronization with the GPU. 2900 * Conceptually we serialise writes between engines inside the GPU. 2901 * We only allow one engine to write into a buffer at any time, but 2902 * multiple readers. To ensure each has a coherent view of memory, we must: 2903 * 2904 * - If there is an outstanding write request to the object, the new 2905 * request must wait for it to complete (either CPU or in hw, requests 2906 * on the same ring will be naturally ordered). 2907 * 2908 * - If we are a write request (pending_write_domain is set), the new 2909 * request must wait for outstanding read requests to complete. 2910 * 2911 * Returns 0 if successful, else propagates up the lower layer error. 2912 */ 2913 int 2914 i915_gem_object_sync(struct drm_i915_gem_object *obj, 2915 struct drm_i915_gem_request *to) 2916 { 2917 struct i915_gem_active *active; 2918 unsigned long active_mask; 2919 int idx; 2920 2921 lockdep_assert_held(&obj->base.dev->struct_mutex); 2922 2923 active_mask = i915_gem_object_get_active(obj); 2924 if (!active_mask) 2925 return 0; 2926 2927 if (obj->base.pending_write_domain) { 2928 active = obj->last_read; 2929 } else { 2930 active_mask = 1; 2931 active = &obj->last_write; 2932 } 2933 2934 for_each_active(active_mask, idx) { 2935 struct drm_i915_gem_request *request; 2936 int ret; 2937 2938 request = i915_gem_active_peek(&active[idx], 2939 &obj->base.dev->struct_mutex); 2940 if (!request) 2941 continue; 2942 2943 ret = __i915_gem_object_sync(to, request); 2944 if (ret) 2945 return ret; 2946 } 2947 2948 return 0; 2949 } 2950 2951 static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj) 2952 { 2953 u32 old_write_domain, old_read_domains; 2954 2955 /* Force a pagefault for domain tracking on next user access */ 2956 i915_gem_release_mmap(obj); 2957 2958 if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0) 2959 return; 2960 2961 old_read_domains = obj->base.read_domains; 2962 old_write_domain = obj->base.write_domain; 2963 2964 obj->base.read_domains &= ~I915_GEM_DOMAIN_GTT; 2965 obj->base.write_domain &= ~I915_GEM_DOMAIN_GTT; 2966 2967 trace_i915_gem_object_change_domain(obj, 2968 old_read_domains, 2969 old_write_domain); 2970 } 2971 2972 static void __i915_vma_iounmap(struct i915_vma *vma) 2973 { 2974 GEM_BUG_ON(i915_vma_is_pinned(vma)); 2975 2976 if (vma->iomap == NULL) 2977 return; 2978 2979 io_mapping_unmap(vma->iomap); 2980 vma->iomap = NULL; 2981 } 2982 2983 int i915_vma_unbind(struct i915_vma *vma) 2984 { 2985 struct drm_i915_gem_object *obj = vma->obj; 2986 unsigned long active; 2987 int ret; 2988 2989 /* First wait upon any activity as retiring the request may 2990 * have side-effects such as unpinning or even unbinding this vma. 2991 */ 2992 active = i915_vma_get_active(vma); 2993 if (active) { 2994 int idx; 2995 2996 /* When a closed VMA is retired, it is unbound - eek. 2997 * In order to prevent it from being recursively closed, 2998 * take a pin on the vma so that the second unbind is 2999 * aborted. 3000 */ 3001 __i915_vma_pin(vma); 3002 3003 for_each_active(active, idx) { 3004 ret = i915_gem_active_retire(&vma->last_read[idx], 3005 &vma->vm->dev->struct_mutex); 3006 if (ret) 3007 break; 3008 } 3009 3010 __i915_vma_unpin(vma); 3011 if (ret) 3012 return ret; 3013 3014 GEM_BUG_ON(i915_vma_is_active(vma)); 3015 } 3016 3017 if (i915_vma_is_pinned(vma)) 3018 return -EBUSY; 3019 3020 if (!drm_mm_node_allocated(&vma->node)) 3021 goto destroy; 3022 3023 GEM_BUG_ON(obj->bind_count == 0); 3024 GEM_BUG_ON(!obj->pages); 3025 3026 if (i915_vma_is_ggtt(vma) && 3027 vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) { 3028 i915_gem_object_finish_gtt(obj); 3029 3030 /* release the fence reg _after_ flushing */ 3031 ret = i915_gem_object_put_fence(obj); 3032 if (ret) 3033 return ret; 3034 3035 __i915_vma_iounmap(vma); 3036 } 3037 3038 if (likely(!vma->vm->closed)) { 3039 trace_i915_vma_unbind(vma); 3040 vma->vm->unbind_vma(vma); 3041 } 3042 vma->flags &= ~(I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND); 3043 3044 drm_mm_remove_node(&vma->node); 3045 list_move_tail(&vma->vm_link, &vma->vm->unbound_list); 3046 3047 if (i915_vma_is_ggtt(vma)) { 3048 if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) { 3049 obj->map_and_fenceable = false; 3050 } else if (vma->ggtt_view.pages) { 3051 sg_free_table(vma->ggtt_view.pages); 3052 kfree(vma->ggtt_view.pages); 3053 } 3054 vma->ggtt_view.pages = NULL; 3055 } 3056 3057 /* Since the unbound list is global, only move to that list if 3058 * no more VMAs exist. */ 3059 if (--obj->bind_count == 0) 3060 list_move_tail(&obj->global_list, 3061 &to_i915(obj->base.dev)->mm.unbound_list); 3062 3063 /* And finally now the object is completely decoupled from this vma, 3064 * we can drop its hold on the backing storage and allow it to be 3065 * reaped by the shrinker. 3066 */ 3067 i915_gem_object_unpin_pages(obj); 3068 3069 destroy: 3070 if (unlikely(i915_vma_is_closed(vma))) 3071 i915_vma_destroy(vma); 3072 3073 return 0; 3074 } 3075 3076 int i915_gem_wait_for_idle(struct drm_i915_private *dev_priv, 3077 bool interruptible) 3078 { 3079 struct intel_engine_cs *engine; 3080 int ret; 3081 3082 for_each_engine(engine, dev_priv) { 3083 if (engine->last_context == NULL) 3084 continue; 3085 3086 ret = intel_engine_idle(engine, interruptible); 3087 if (ret) 3088 return ret; 3089 } 3090 3091 return 0; 3092 } 3093 3094 static bool i915_gem_valid_gtt_space(struct i915_vma *vma, 3095 unsigned long cache_level) 3096 { 3097 struct drm_mm_node *gtt_space = &vma->node; 3098 struct drm_mm_node *other; 3099 3100 /* 3101 * On some machines we have to be careful when putting differing types 3102 * of snoopable memory together to avoid the prefetcher crossing memory 3103 * domains and dying. During vm initialisation, we decide whether or not 3104 * these constraints apply and set the drm_mm.color_adjust 3105 * appropriately. 3106 */ 3107 if (vma->vm->mm.color_adjust == NULL) 3108 return true; 3109 3110 if (!drm_mm_node_allocated(gtt_space)) 3111 return true; 3112 3113 if (list_empty(>t_space->node_list)) 3114 return true; 3115 3116 other = list_entry(gtt_space->node_list.prev, struct drm_mm_node, node_list); 3117 if (other->allocated && !other->hole_follows && other->color != cache_level) 3118 return false; 3119 3120 other = list_entry(gtt_space->node_list.next, struct drm_mm_node, node_list); 3121 if (other->allocated && !gtt_space->hole_follows && other->color != cache_level) 3122 return false; 3123 3124 return true; 3125 } 3126 3127 /** 3128 * i915_vma_insert - finds a slot for the vma in its address space 3129 * @vma: the vma 3130 * @size: requested size in bytes (can be larger than the VMA) 3131 * @alignment: required alignment 3132 * @flags: mask of PIN_* flags to use 3133 * 3134 * First we try to allocate some free space that meets the requirements for 3135 * the VMA. Failiing that, if the flags permit, it will evict an old VMA, 3136 * preferrably the oldest idle entry to make room for the new VMA. 3137 * 3138 * Returns: 3139 * 0 on success, negative error code otherwise. 3140 */ 3141 static int 3142 i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) 3143 { 3144 struct drm_i915_private *dev_priv = to_i915(vma->vm->dev); 3145 struct drm_i915_gem_object *obj = vma->obj; 3146 u64 start, end; 3147 u64 min_alignment; 3148 int ret; 3149 3150 GEM_BUG_ON(vma->flags & (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND)); 3151 GEM_BUG_ON(drm_mm_node_allocated(&vma->node)); 3152 3153 size = max(size, vma->size); 3154 if (flags & PIN_MAPPABLE) 3155 size = i915_gem_get_ggtt_size(dev_priv, size, 3156 i915_gem_object_get_tiling(obj)); 3157 3158 min_alignment = 3159 i915_gem_get_ggtt_alignment(dev_priv, size, 3160 i915_gem_object_get_tiling(obj), 3161 flags & PIN_MAPPABLE); 3162 if (alignment == 0) 3163 alignment = min_alignment; 3164 if (alignment & (min_alignment - 1)) { 3165 DRM_DEBUG("Invalid object alignment requested %llu, minimum %llu\n", 3166 alignment, min_alignment); 3167 return -EINVAL; 3168 } 3169 3170 start = flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0; 3171 3172 end = vma->vm->total; 3173 if (flags & PIN_MAPPABLE) 3174 end = min_t(u64, end, dev_priv->ggtt.mappable_end); 3175 if (flags & PIN_ZONE_4G) 3176 end = min_t(u64, end, (1ULL << 32) - PAGE_SIZE); 3177 3178 /* If binding the object/GGTT view requires more space than the entire 3179 * aperture has, reject it early before evicting everything in a vain 3180 * attempt to find space. 3181 */ 3182 if (size > end) { 3183 DRM_DEBUG("Attempting to bind an object larger than the aperture: request=%llu [object=%zd] > %s aperture=%llu\n", 3184 size, obj->base.size, 3185 flags & PIN_MAPPABLE ? "mappable" : "total", 3186 end); 3187 return -E2BIG; 3188 } 3189 3190 ret = i915_gem_object_get_pages(obj); 3191 if (ret) 3192 return ret; 3193 3194 i915_gem_object_pin_pages(obj); 3195 3196 if (flags & PIN_OFFSET_FIXED) { 3197 u64 offset = flags & PIN_OFFSET_MASK; 3198 if (offset & (alignment - 1) || offset > end - size) { 3199 ret = -EINVAL; 3200 goto err_unpin; 3201 } 3202 3203 vma->node.start = offset; 3204 vma->node.size = size; 3205 vma->node.color = obj->cache_level; 3206 ret = drm_mm_reserve_node(&vma->vm->mm, &vma->node); 3207 if (ret) { 3208 ret = i915_gem_evict_for_vma(vma); 3209 if (ret == 0) 3210 ret = drm_mm_reserve_node(&vma->vm->mm, &vma->node); 3211 if (ret) 3212 goto err_unpin; 3213 } 3214 } else { 3215 u32 search_flag, alloc_flag; 3216 3217 if (flags & PIN_HIGH) { 3218 search_flag = DRM_MM_SEARCH_BELOW; 3219 alloc_flag = DRM_MM_CREATE_TOP; 3220 } else { 3221 search_flag = DRM_MM_SEARCH_DEFAULT; 3222 alloc_flag = DRM_MM_CREATE_DEFAULT; 3223 } 3224 3225 /* We only allocate in PAGE_SIZE/GTT_PAGE_SIZE (4096) chunks, 3226 * so we know that we always have a minimum alignment of 4096. 3227 * The drm_mm range manager is optimised to return results 3228 * with zero alignment, so where possible use the optimal 3229 * path. 3230 */ 3231 if (alignment <= 4096) 3232 alignment = 0; 3233 3234 search_free: 3235 ret = drm_mm_insert_node_in_range_generic(&vma->vm->mm, 3236 &vma->node, 3237 size, alignment, 3238 obj->cache_level, 3239 start, end, 3240 search_flag, 3241 alloc_flag); 3242 if (ret) { 3243 ret = i915_gem_evict_something(vma->vm, size, alignment, 3244 obj->cache_level, 3245 start, end, 3246 flags); 3247 if (ret == 0) 3248 goto search_free; 3249 3250 goto err_unpin; 3251 } 3252 } 3253 GEM_BUG_ON(!i915_gem_valid_gtt_space(vma, obj->cache_level)); 3254 3255 list_move_tail(&obj->global_list, &dev_priv->mm.bound_list); 3256 list_move_tail(&vma->vm_link, &vma->vm->inactive_list); 3257 obj->bind_count++; 3258 3259 return 0; 3260 3261 err_unpin: 3262 i915_gem_object_unpin_pages(obj); 3263 return ret; 3264 } 3265 3266 bool 3267 i915_gem_clflush_object(struct drm_i915_gem_object *obj, 3268 bool force) 3269 { 3270 /* If we don't have a page list set up, then we're not pinned 3271 * to GPU, and we can ignore the cache flush because it'll happen 3272 * again at bind time. 3273 */ 3274 if (obj->pages == NULL) 3275 return false; 3276 3277 /* 3278 * Stolen memory is always coherent with the GPU as it is explicitly 3279 * marked as wc by the system, or the system is cache-coherent. 3280 */ 3281 if (obj->stolen || obj->phys_handle) 3282 return false; 3283 3284 /* If the GPU is snooping the contents of the CPU cache, 3285 * we do not need to manually clear the CPU cache lines. However, 3286 * the caches are only snooped when the render cache is 3287 * flushed/invalidated. As we always have to emit invalidations 3288 * and flushes when moving into and out of the RENDER domain, correct 3289 * snooping behaviour occurs naturally as the result of our domain 3290 * tracking. 3291 */ 3292 if (!force && cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) { 3293 obj->cache_dirty = true; 3294 return false; 3295 } 3296 3297 trace_i915_gem_object_clflush(obj); 3298 drm_clflush_sg(obj->pages); 3299 obj->cache_dirty = false; 3300 3301 return true; 3302 } 3303 3304 /** Flushes the GTT write domain for the object if it's dirty. */ 3305 static void 3306 i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj) 3307 { 3308 uint32_t old_write_domain; 3309 3310 if (obj->base.write_domain != I915_GEM_DOMAIN_GTT) 3311 return; 3312 3313 /* No actual flushing is required for the GTT write domain. Writes 3314 * to it immediately go to main memory as far as we know, so there's 3315 * no chipset flush. It also doesn't land in render cache. 3316 * 3317 * However, we do have to enforce the order so that all writes through 3318 * the GTT land before any writes to the device, such as updates to 3319 * the GATT itself. 3320 */ 3321 wmb(); 3322 3323 old_write_domain = obj->base.write_domain; 3324 obj->base.write_domain = 0; 3325 3326 intel_fb_obj_flush(obj, false, ORIGIN_GTT); 3327 3328 trace_i915_gem_object_change_domain(obj, 3329 obj->base.read_domains, 3330 old_write_domain); 3331 } 3332 3333 /** Flushes the CPU write domain for the object if it's dirty. */ 3334 static void 3335 i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj) 3336 { 3337 uint32_t old_write_domain; 3338 3339 if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) 3340 return; 3341 3342 if (i915_gem_clflush_object(obj, obj->pin_display)) 3343 i915_gem_chipset_flush(to_i915(obj->base.dev)); 3344 3345 old_write_domain = obj->base.write_domain; 3346 obj->base.write_domain = 0; 3347 3348 intel_fb_obj_flush(obj, false, ORIGIN_CPU); 3349 3350 trace_i915_gem_object_change_domain(obj, 3351 obj->base.read_domains, 3352 old_write_domain); 3353 } 3354 3355 /** 3356 * Moves a single object to the GTT read, and possibly write domain. 3357 * @obj: object to act on 3358 * @write: ask for write access or read only 3359 * 3360 * This function returns when the move is complete, including waiting on 3361 * flushes to occur. 3362 */ 3363 int 3364 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) 3365 { 3366 uint32_t old_write_domain, old_read_domains; 3367 struct i915_vma *vma; 3368 int ret; 3369 3370 ret = i915_gem_object_wait_rendering(obj, !write); 3371 if (ret) 3372 return ret; 3373 3374 if (obj->base.write_domain == I915_GEM_DOMAIN_GTT) 3375 return 0; 3376 3377 /* Flush and acquire obj->pages so that we are coherent through 3378 * direct access in memory with previous cached writes through 3379 * shmemfs and that our cache domain tracking remains valid. 3380 * For example, if the obj->filp was moved to swap without us 3381 * being notified and releasing the pages, we would mistakenly 3382 * continue to assume that the obj remained out of the CPU cached 3383 * domain. 3384 */ 3385 ret = i915_gem_object_get_pages(obj); 3386 if (ret) 3387 return ret; 3388 3389 i915_gem_object_flush_cpu_write_domain(obj); 3390 3391 /* Serialise direct access to this object with the barriers for 3392 * coherent writes from the GPU, by effectively invalidating the 3393 * GTT domain upon first access. 3394 */ 3395 if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0) 3396 mb(); 3397 3398 old_write_domain = obj->base.write_domain; 3399 old_read_domains = obj->base.read_domains; 3400 3401 /* It should now be out of any other write domains, and we can update 3402 * the domain values for our changes. 3403 */ 3404 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_GTT) != 0); 3405 obj->base.read_domains |= I915_GEM_DOMAIN_GTT; 3406 if (write) { 3407 obj->base.read_domains = I915_GEM_DOMAIN_GTT; 3408 obj->base.write_domain = I915_GEM_DOMAIN_GTT; 3409 obj->dirty = 1; 3410 } 3411 3412 trace_i915_gem_object_change_domain(obj, 3413 old_read_domains, 3414 old_write_domain); 3415 3416 /* And bump the LRU for this access */ 3417 vma = i915_gem_obj_to_ggtt(obj); 3418 if (vma && 3419 drm_mm_node_allocated(&vma->node) && 3420 !i915_vma_is_active(vma)) 3421 list_move_tail(&vma->vm_link, &vma->vm->inactive_list); 3422 3423 return 0; 3424 } 3425 3426 /** 3427 * Changes the cache-level of an object across all VMA. 3428 * @obj: object to act on 3429 * @cache_level: new cache level to set for the object 3430 * 3431 * After this function returns, the object will be in the new cache-level 3432 * across all GTT and the contents of the backing storage will be coherent, 3433 * with respect to the new cache-level. In order to keep the backing storage 3434 * coherent for all users, we only allow a single cache level to be set 3435 * globally on the object and prevent it from being changed whilst the 3436 * hardware is reading from the object. That is if the object is currently 3437 * on the scanout it will be set to uncached (or equivalent display 3438 * cache coherency) and all non-MOCS GPU access will also be uncached so 3439 * that all direct access to the scanout remains coherent. 3440 */ 3441 int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, 3442 enum i915_cache_level cache_level) 3443 { 3444 struct i915_vma *vma; 3445 int ret = 0; 3446 3447 if (obj->cache_level == cache_level) 3448 goto out; 3449 3450 /* Inspect the list of currently bound VMA and unbind any that would 3451 * be invalid given the new cache-level. This is principally to 3452 * catch the issue of the CS prefetch crossing page boundaries and 3453 * reading an invalid PTE on older architectures. 3454 */ 3455 restart: 3456 list_for_each_entry(vma, &obj->vma_list, obj_link) { 3457 if (!drm_mm_node_allocated(&vma->node)) 3458 continue; 3459 3460 if (i915_vma_is_pinned(vma)) { 3461 DRM_DEBUG("can not change the cache level of pinned objects\n"); 3462 return -EBUSY; 3463 } 3464 3465 if (i915_gem_valid_gtt_space(vma, cache_level)) 3466 continue; 3467 3468 ret = i915_vma_unbind(vma); 3469 if (ret) 3470 return ret; 3471 3472 /* As unbinding may affect other elements in the 3473 * obj->vma_list (due to side-effects from retiring 3474 * an active vma), play safe and restart the iterator. 3475 */ 3476 goto restart; 3477 } 3478 3479 /* We can reuse the existing drm_mm nodes but need to change the 3480 * cache-level on the PTE. We could simply unbind them all and 3481 * rebind with the correct cache-level on next use. However since 3482 * we already have a valid slot, dma mapping, pages etc, we may as 3483 * rewrite the PTE in the belief that doing so tramples upon less 3484 * state and so involves less work. 3485 */ 3486 if (obj->bind_count) { 3487 /* Before we change the PTE, the GPU must not be accessing it. 3488 * If we wait upon the object, we know that all the bound 3489 * VMA are no longer active. 3490 */ 3491 ret = i915_gem_object_wait_rendering(obj, false); 3492 if (ret) 3493 return ret; 3494 3495 if (!HAS_LLC(obj->base.dev) && cache_level != I915_CACHE_NONE) { 3496 /* Access to snoopable pages through the GTT is 3497 * incoherent and on some machines causes a hard 3498 * lockup. Relinquish the CPU mmaping to force 3499 * userspace to refault in the pages and we can 3500 * then double check if the GTT mapping is still 3501 * valid for that pointer access. 3502 */ 3503 i915_gem_release_mmap(obj); 3504 3505 /* As we no longer need a fence for GTT access, 3506 * we can relinquish it now (and so prevent having 3507 * to steal a fence from someone else on the next 3508 * fence request). Note GPU activity would have 3509 * dropped the fence as all snoopable access is 3510 * supposed to be linear. 3511 */ 3512 ret = i915_gem_object_put_fence(obj); 3513 if (ret) 3514 return ret; 3515 } else { 3516 /* We either have incoherent backing store and 3517 * so no GTT access or the architecture is fully 3518 * coherent. In such cases, existing GTT mmaps 3519 * ignore the cache bit in the PTE and we can 3520 * rewrite it without confusing the GPU or having 3521 * to force userspace to fault back in its mmaps. 3522 */ 3523 } 3524 3525 list_for_each_entry(vma, &obj->vma_list, obj_link) { 3526 if (!drm_mm_node_allocated(&vma->node)) 3527 continue; 3528 3529 ret = i915_vma_bind(vma, cache_level, PIN_UPDATE); 3530 if (ret) 3531 return ret; 3532 } 3533 } 3534 3535 list_for_each_entry(vma, &obj->vma_list, obj_link) 3536 vma->node.color = cache_level; 3537 obj->cache_level = cache_level; 3538 3539 out: 3540 /* Flush the dirty CPU caches to the backing storage so that the 3541 * object is now coherent at its new cache level (with respect 3542 * to the access domain). 3543 */ 3544 if (obj->cache_dirty && cpu_write_needs_clflush(obj)) { 3545 if (i915_gem_clflush_object(obj, true)) 3546 i915_gem_chipset_flush(to_i915(obj->base.dev)); 3547 } 3548 3549 return 0; 3550 } 3551 3552 int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data, 3553 struct drm_file *file) 3554 { 3555 struct drm_i915_gem_caching *args = data; 3556 struct drm_i915_gem_object *obj; 3557 3558 obj = i915_gem_object_lookup(file, args->handle); 3559 if (!obj) 3560 return -ENOENT; 3561 3562 switch (obj->cache_level) { 3563 case I915_CACHE_LLC: 3564 case I915_CACHE_L3_LLC: 3565 args->caching = I915_CACHING_CACHED; 3566 break; 3567 3568 case I915_CACHE_WT: 3569 args->caching = I915_CACHING_DISPLAY; 3570 break; 3571 3572 default: 3573 args->caching = I915_CACHING_NONE; 3574 break; 3575 } 3576 3577 i915_gem_object_put_unlocked(obj); 3578 return 0; 3579 } 3580 3581 int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data, 3582 struct drm_file *file) 3583 { 3584 struct drm_i915_private *dev_priv = to_i915(dev); 3585 struct drm_i915_gem_caching *args = data; 3586 struct drm_i915_gem_object *obj; 3587 enum i915_cache_level level; 3588 int ret; 3589 3590 switch (args->caching) { 3591 case I915_CACHING_NONE: 3592 level = I915_CACHE_NONE; 3593 break; 3594 case I915_CACHING_CACHED: 3595 /* 3596 * Due to a HW issue on BXT A stepping, GPU stores via a 3597 * snooped mapping may leave stale data in a corresponding CPU 3598 * cacheline, whereas normally such cachelines would get 3599 * invalidated. 3600 */ 3601 if (!HAS_LLC(dev) && !HAS_SNOOP(dev)) 3602 return -ENODEV; 3603 3604 level = I915_CACHE_LLC; 3605 break; 3606 case I915_CACHING_DISPLAY: 3607 level = HAS_WT(dev) ? I915_CACHE_WT : I915_CACHE_NONE; 3608 break; 3609 default: 3610 return -EINVAL; 3611 } 3612 3613 intel_runtime_pm_get(dev_priv); 3614 3615 ret = i915_mutex_lock_interruptible(dev); 3616 if (ret) 3617 goto rpm_put; 3618 3619 obj = i915_gem_object_lookup(file, args->handle); 3620 if (!obj) { 3621 ret = -ENOENT; 3622 goto unlock; 3623 } 3624 3625 ret = i915_gem_object_set_cache_level(obj, level); 3626 3627 i915_gem_object_put(obj); 3628 unlock: 3629 mutex_unlock(&dev->struct_mutex); 3630 rpm_put: 3631 intel_runtime_pm_put(dev_priv); 3632 3633 return ret; 3634 } 3635 3636 /* 3637 * Prepare buffer for display plane (scanout, cursors, etc). 3638 * Can be called from an uninterruptible phase (modesetting) and allows 3639 * any flushes to be pipelined (for pageflips). 3640 */ 3641 int 3642 i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, 3643 u32 alignment, 3644 const struct i915_ggtt_view *view) 3645 { 3646 u32 old_read_domains, old_write_domain; 3647 int ret; 3648 3649 /* Mark the pin_display early so that we account for the 3650 * display coherency whilst setting up the cache domains. 3651 */ 3652 obj->pin_display++; 3653 3654 /* The display engine is not coherent with the LLC cache on gen6. As 3655 * a result, we make sure that the pinning that is about to occur is 3656 * done with uncached PTEs. This is lowest common denominator for all 3657 * chipsets. 3658 * 3659 * However for gen6+, we could do better by using the GFDT bit instead 3660 * of uncaching, which would allow us to flush all the LLC-cached data 3661 * with that bit in the PTE to main memory with just one PIPE_CONTROL. 3662 */ 3663 ret = i915_gem_object_set_cache_level(obj, 3664 HAS_WT(obj->base.dev) ? I915_CACHE_WT : I915_CACHE_NONE); 3665 if (ret) 3666 goto err_unpin_display; 3667 3668 /* As the user may map the buffer once pinned in the display plane 3669 * (e.g. libkms for the bootup splash), we have to ensure that we 3670 * always use map_and_fenceable for all scanout buffers. 3671 */ 3672 ret = i915_gem_object_ggtt_pin(obj, view, 0, alignment, 3673 view->type == I915_GGTT_VIEW_NORMAL ? 3674 PIN_MAPPABLE : 0); 3675 if (ret) 3676 goto err_unpin_display; 3677 3678 i915_gem_object_flush_cpu_write_domain(obj); 3679 3680 old_write_domain = obj->base.write_domain; 3681 old_read_domains = obj->base.read_domains; 3682 3683 /* It should now be out of any other write domains, and we can update 3684 * the domain values for our changes. 3685 */ 3686 obj->base.write_domain = 0; 3687 obj->base.read_domains |= I915_GEM_DOMAIN_GTT; 3688 3689 trace_i915_gem_object_change_domain(obj, 3690 old_read_domains, 3691 old_write_domain); 3692 3693 return 0; 3694 3695 err_unpin_display: 3696 obj->pin_display--; 3697 return ret; 3698 } 3699 3700 void 3701 i915_gem_object_unpin_from_display_plane(struct drm_i915_gem_object *obj, 3702 const struct i915_ggtt_view *view) 3703 { 3704 if (WARN_ON(obj->pin_display == 0)) 3705 return; 3706 3707 i915_gem_object_ggtt_unpin_view(obj, view); 3708 3709 obj->pin_display--; 3710 } 3711 3712 /** 3713 * Moves a single object to the CPU read, and possibly write domain. 3714 * @obj: object to act on 3715 * @write: requesting write or read-only access 3716 * 3717 * This function returns when the move is complete, including waiting on 3718 * flushes to occur. 3719 */ 3720 int 3721 i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) 3722 { 3723 uint32_t old_write_domain, old_read_domains; 3724 int ret; 3725 3726 ret = i915_gem_object_wait_rendering(obj, !write); 3727 if (ret) 3728 return ret; 3729 3730 if (obj->base.write_domain == I915_GEM_DOMAIN_CPU) 3731 return 0; 3732 3733 i915_gem_object_flush_gtt_write_domain(obj); 3734 3735 old_write_domain = obj->base.write_domain; 3736 old_read_domains = obj->base.read_domains; 3737 3738 /* Flush the CPU cache if it's still invalid. */ 3739 if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) { 3740 i915_gem_clflush_object(obj, false); 3741 3742 obj->base.read_domains |= I915_GEM_DOMAIN_CPU; 3743 } 3744 3745 /* It should now be out of any other write domains, and we can update 3746 * the domain values for our changes. 3747 */ 3748 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_CPU) != 0); 3749 3750 /* If we're writing through the CPU, then the GPU read domains will 3751 * need to be invalidated at next use. 3752 */ 3753 if (write) { 3754 obj->base.read_domains = I915_GEM_DOMAIN_CPU; 3755 obj->base.write_domain = I915_GEM_DOMAIN_CPU; 3756 } 3757 3758 trace_i915_gem_object_change_domain(obj, 3759 old_read_domains, 3760 old_write_domain); 3761 3762 return 0; 3763 } 3764 3765 /* Throttle our rendering by waiting until the ring has completed our requests 3766 * emitted over 20 msec ago. 3767 * 3768 * Note that if we were to use the current jiffies each time around the loop, 3769 * we wouldn't escape the function with any frames outstanding if the time to 3770 * render a frame was over 20ms. 3771 * 3772 * This should get us reasonable parallelism between CPU and GPU but also 3773 * relatively low latency when blocking on a particular request to finish. 3774 */ 3775 static int 3776 i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file) 3777 { 3778 struct drm_i915_private *dev_priv = to_i915(dev); 3779 struct drm_i915_file_private *file_priv = file->driver_priv; 3780 unsigned long recent_enough = jiffies - DRM_I915_THROTTLE_JIFFIES; 3781 struct drm_i915_gem_request *request, *target = NULL; 3782 int ret; 3783 3784 ret = i915_gem_wait_for_error(&dev_priv->gpu_error); 3785 if (ret) 3786 return ret; 3787 3788 /* ABI: return -EIO if already wedged */ 3789 if (i915_terminally_wedged(&dev_priv->gpu_error)) 3790 return -EIO; 3791 3792 lockmgr(&file_priv->mm.lock, LK_EXCLUSIVE); 3793 list_for_each_entry(request, &file_priv->mm.request_list, client_list) { 3794 if (time_after_eq(request->emitted_jiffies, recent_enough)) 3795 break; 3796 3797 /* 3798 * Note that the request might not have been submitted yet. 3799 * In which case emitted_jiffies will be zero. 3800 */ 3801 if (!request->emitted_jiffies) 3802 continue; 3803 3804 target = request; 3805 } 3806 if (target) 3807 i915_gem_request_get(target); 3808 lockmgr(&file_priv->mm.lock, LK_RELEASE); 3809 3810 if (target == NULL) 3811 return 0; 3812 3813 ret = i915_wait_request(target, true, NULL, NULL); 3814 i915_gem_request_put(target); 3815 3816 return ret; 3817 } 3818 3819 static bool 3820 i915_vma_misplaced(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) 3821 { 3822 struct drm_i915_gem_object *obj = vma->obj; 3823 3824 if (!drm_mm_node_allocated(&vma->node)) 3825 return false; 3826 3827 if (vma->node.size < size) 3828 return true; 3829 3830 if (alignment && vma->node.start & (alignment - 1)) 3831 return true; 3832 3833 if (flags & PIN_MAPPABLE && !obj->map_and_fenceable) 3834 return true; 3835 3836 if (flags & PIN_OFFSET_BIAS && 3837 vma->node.start < (flags & PIN_OFFSET_MASK)) 3838 return true; 3839 3840 if (flags & PIN_OFFSET_FIXED && 3841 vma->node.start != (flags & PIN_OFFSET_MASK)) 3842 return true; 3843 3844 return false; 3845 } 3846 3847 void __i915_vma_set_map_and_fenceable(struct i915_vma *vma) 3848 { 3849 struct drm_i915_gem_object *obj = vma->obj; 3850 struct drm_i915_private *dev_priv = to_i915(obj->base.dev); 3851 bool mappable, fenceable; 3852 u32 fence_size, fence_alignment; 3853 3854 fence_size = i915_gem_get_ggtt_size(dev_priv, 3855 obj->base.size, 3856 i915_gem_object_get_tiling(obj)); 3857 fence_alignment = i915_gem_get_ggtt_alignment(dev_priv, 3858 obj->base.size, 3859 i915_gem_object_get_tiling(obj), 3860 true); 3861 3862 fenceable = (vma->node.size == fence_size && 3863 (vma->node.start & (fence_alignment - 1)) == 0); 3864 3865 mappable = (vma->node.start + fence_size <= 3866 dev_priv->ggtt.mappable_end); 3867 3868 obj->map_and_fenceable = mappable && fenceable; 3869 } 3870 3871 int __i915_vma_do_pin(struct i915_vma *vma, 3872 u64 size, u64 alignment, u64 flags) 3873 { 3874 unsigned int bound = vma->flags; 3875 int ret; 3876 3877 GEM_BUG_ON((flags & (PIN_GLOBAL | PIN_USER)) == 0); 3878 GEM_BUG_ON((flags & PIN_GLOBAL) && !i915_vma_is_ggtt(vma)); 3879 3880 if (WARN_ON(bound & I915_VMA_PIN_OVERFLOW)) { 3881 ret = -EBUSY; 3882 goto err; 3883 } 3884 3885 if ((bound & I915_VMA_BIND_MASK) == 0) { 3886 ret = i915_vma_insert(vma, size, alignment, flags); 3887 if (ret) 3888 goto err; 3889 } 3890 3891 ret = i915_vma_bind(vma, vma->obj->cache_level, flags); 3892 if (ret) 3893 goto err; 3894 3895 if ((bound ^ vma->flags) & I915_VMA_GLOBAL_BIND) 3896 __i915_vma_set_map_and_fenceable(vma); 3897 3898 GEM_BUG_ON(i915_vma_misplaced(vma, size, alignment, flags)); 3899 return 0; 3900 3901 err: 3902 __i915_vma_unpin(vma); 3903 return ret; 3904 } 3905 3906 int 3907 i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, 3908 const struct i915_ggtt_view *view, 3909 u64 size, 3910 u64 alignment, 3911 u64 flags) 3912 { 3913 struct i915_vma *vma; 3914 int ret; 3915 3916 if (!view) 3917 view = &i915_ggtt_view_normal; 3918 3919 vma = i915_gem_obj_lookup_or_create_ggtt_vma(obj, view); 3920 if (IS_ERR(vma)) 3921 return PTR_ERR(vma); 3922 3923 if (i915_vma_misplaced(vma, size, alignment, flags)) { 3924 if (flags & PIN_NONBLOCK && 3925 (i915_vma_is_pinned(vma) || i915_vma_is_active(vma))) 3926 return -ENOSPC; 3927 3928 WARN(i915_vma_is_pinned(vma), 3929 "bo is already pinned in ggtt with incorrect alignment:" 3930 " offset=%08x %08x, req.alignment=%llx, req.map_and_fenceable=%d," 3931 " obj->map_and_fenceable=%d\n", 3932 upper_32_bits(vma->node.start), 3933 lower_32_bits(vma->node.start), 3934 alignment, 3935 !!(flags & PIN_MAPPABLE), 3936 obj->map_and_fenceable); 3937 ret = i915_vma_unbind(vma); 3938 if (ret) 3939 return ret; 3940 } 3941 3942 return i915_vma_pin(vma, size, alignment, flags | PIN_GLOBAL); 3943 } 3944 3945 void 3946 i915_gem_object_ggtt_unpin_view(struct drm_i915_gem_object *obj, 3947 const struct i915_ggtt_view *view) 3948 { 3949 i915_vma_unpin(i915_gem_obj_to_ggtt_view(obj, view)); 3950 } 3951 3952 static __always_inline unsigned int __busy_read_flag(unsigned int id) 3953 { 3954 /* Note that we could alias engines in the execbuf API, but 3955 * that would be very unwise as it prevents userspace from 3956 * fine control over engine selection. Ahem. 3957 * 3958 * This should be something like EXEC_MAX_ENGINE instead of 3959 * I915_NUM_ENGINES. 3960 */ 3961 BUILD_BUG_ON(I915_NUM_ENGINES > 16); 3962 return 0x10000 << id; 3963 } 3964 3965 static __always_inline unsigned int __busy_write_id(unsigned int id) 3966 { 3967 return id; 3968 } 3969 3970 static __always_inline unsigned int 3971 __busy_set_if_active(const struct i915_gem_active *active, 3972 unsigned int (*flag)(unsigned int id)) 3973 { 3974 /* For more discussion about the barriers and locking concerns, 3975 * see __i915_gem_active_get_rcu(). 3976 */ 3977 do { 3978 struct drm_i915_gem_request *request; 3979 unsigned int id; 3980 3981 request = rcu_dereference(active->request); 3982 if (!request || i915_gem_request_completed(request)) 3983 return 0; 3984 3985 id = request->engine->exec_id; 3986 3987 /* Check that the pointer wasn't reassigned and overwritten. 3988 * 3989 * In __i915_gem_active_get_rcu(), we enforce ordering between 3990 * the first rcu pointer dereference (imposing a 3991 * read-dependency only on access through the pointer) and 3992 * the second lockless access through the memory barrier 3993 * following a successful atomic_inc_not_zero(). Here there 3994 * is no such barrier, and so we must manually insert an 3995 * explicit read barrier to ensure that the following 3996 * access occurs after all the loads through the first 3997 * pointer. 3998 * 3999 * It is worth comparing this sequence with 4000 * raw_write_seqcount_latch() which operates very similarly. 4001 * The challenge here is the visibility of the other CPU 4002 * writes to the reallocated request vs the local CPU ordering. 4003 * Before the other CPU can overwrite the request, it will 4004 * have updated our active->request and gone through a wmb. 4005 * During the read here, we want to make sure that the values 4006 * we see have not been overwritten as we do so - and we do 4007 * that by serialising the second pointer check with the writes 4008 * on other other CPUs. 4009 * 4010 * The corresponding write barrier is part of 4011 * rcu_assign_pointer(). 4012 */ 4013 smp_rmb(); 4014 if (request == rcu_access_pointer(active->request)) 4015 return flag(id); 4016 } while (1); 4017 } 4018 4019 static __always_inline unsigned int 4020 busy_check_reader(const struct i915_gem_active *active) 4021 { 4022 return __busy_set_if_active(active, __busy_read_flag); 4023 } 4024 4025 static __always_inline unsigned int 4026 busy_check_writer(const struct i915_gem_active *active) 4027 { 4028 return __busy_set_if_active(active, __busy_write_id); 4029 } 4030 4031 int 4032 i915_gem_busy_ioctl(struct drm_device *dev, void *data, 4033 struct drm_file *file) 4034 { 4035 struct drm_i915_gem_busy *args = data; 4036 struct drm_i915_gem_object *obj; 4037 unsigned long active; 4038 4039 obj = i915_gem_object_lookup(file, args->handle); 4040 if (!obj) 4041 return -ENOENT; 4042 4043 args->busy = 0; 4044 active = __I915_BO_ACTIVE(obj); 4045 if (active) { 4046 int idx; 4047 4048 /* Yes, the lookups are intentionally racy. 4049 * 4050 * First, we cannot simply rely on __I915_BO_ACTIVE. We have 4051 * to regard the value as stale and as our ABI guarantees 4052 * forward progress, we confirm the status of each active 4053 * request with the hardware. 4054 * 4055 * Even though we guard the pointer lookup by RCU, that only 4056 * guarantees that the pointer and its contents remain 4057 * dereferencable and does *not* mean that the request we 4058 * have is the same as the one being tracked by the object. 4059 * 4060 * Consider that we lookup the request just as it is being 4061 * retired and freed. We take a local copy of the pointer, 4062 * but before we add its engine into the busy set, the other 4063 * thread reallocates it and assigns it to a task on another 4064 * engine with a fresh and incomplete seqno. 4065 * 4066 * So after we lookup the engine's id, we double check that 4067 * the active request is the same and only then do we add it 4068 * into the busy set. 4069 */ 4070 rcu_read_lock(); 4071 4072 for_each_active(active, idx) 4073 args->busy |= busy_check_reader(&obj->last_read[idx]); 4074 4075 /* For ABI sanity, we only care that the write engine is in 4076 * the set of read engines. This is ensured by the ordering 4077 * of setting last_read/last_write in i915_vma_move_to_active, 4078 * and then in reverse in retire. 4079 * 4080 * We don't care that the set of active read/write engines 4081 * may change during construction of the result, as it is 4082 * equally liable to change before userspace can inspect 4083 * the result. 4084 */ 4085 args->busy |= busy_check_writer(&obj->last_write); 4086 4087 rcu_read_unlock(); 4088 } 4089 4090 i915_gem_object_put_unlocked(obj); 4091 return 0; 4092 } 4093 4094 int 4095 i915_gem_throttle_ioctl(struct drm_device *dev, void *data, 4096 struct drm_file *file_priv) 4097 { 4098 return i915_gem_ring_throttle(dev, file_priv); 4099 } 4100 4101 int 4102 i915_gem_madvise_ioctl(struct drm_device *dev, void *data, 4103 struct drm_file *file_priv) 4104 { 4105 struct drm_i915_private *dev_priv = to_i915(dev); 4106 struct drm_i915_gem_madvise *args = data; 4107 struct drm_i915_gem_object *obj; 4108 int ret; 4109 4110 switch (args->madv) { 4111 case I915_MADV_DONTNEED: 4112 case I915_MADV_WILLNEED: 4113 break; 4114 default: 4115 return -EINVAL; 4116 } 4117 4118 ret = i915_mutex_lock_interruptible(dev); 4119 if (ret) 4120 return ret; 4121 4122 obj = i915_gem_object_lookup(file_priv, args->handle); 4123 if (!obj) { 4124 ret = -ENOENT; 4125 goto unlock; 4126 } 4127 4128 if (obj->pages && 4129 i915_gem_object_is_tiled(obj) && 4130 dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) { 4131 if (obj->madv == I915_MADV_WILLNEED) 4132 i915_gem_object_unpin_pages(obj); 4133 if (args->madv == I915_MADV_WILLNEED) 4134 i915_gem_object_pin_pages(obj); 4135 } 4136 4137 if (obj->madv != __I915_MADV_PURGED) 4138 obj->madv = args->madv; 4139 4140 /* if the object is no longer attached, discard its backing storage */ 4141 if (obj->madv == I915_MADV_DONTNEED && obj->pages == NULL) 4142 i915_gem_object_truncate(obj); 4143 4144 args->retained = obj->madv != __I915_MADV_PURGED; 4145 4146 i915_gem_object_put(obj); 4147 unlock: 4148 mutex_unlock(&dev->struct_mutex); 4149 return ret; 4150 } 4151 4152 void i915_gem_object_init(struct drm_i915_gem_object *obj, 4153 const struct drm_i915_gem_object_ops *ops) 4154 { 4155 int i; 4156 4157 INIT_LIST_HEAD(&obj->global_list); 4158 for (i = 0; i < I915_NUM_ENGINES; i++) 4159 init_request_active(&obj->last_read[i], 4160 i915_gem_object_retire__read); 4161 init_request_active(&obj->last_write, 4162 i915_gem_object_retire__write); 4163 init_request_active(&obj->last_fence, NULL); 4164 INIT_LIST_HEAD(&obj->obj_exec_link); 4165 INIT_LIST_HEAD(&obj->vma_list); 4166 INIT_LIST_HEAD(&obj->batch_pool_link); 4167 4168 obj->ops = ops; 4169 4170 obj->fence_reg = I915_FENCE_REG_NONE; 4171 obj->madv = I915_MADV_WILLNEED; 4172 4173 i915_gem_info_add_obj(to_i915(obj->base.dev), obj->base.size); 4174 } 4175 4176 static const struct drm_i915_gem_object_ops i915_gem_object_ops = { 4177 .flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE, 4178 .get_pages = i915_gem_object_get_pages_gtt, 4179 .put_pages = i915_gem_object_put_pages_gtt, 4180 }; 4181 4182 struct drm_i915_gem_object *i915_gem_object_create(struct drm_device *dev, 4183 size_t size) 4184 { 4185 struct drm_i915_gem_object *obj; 4186 #if 0 4187 struct address_space *mapping; 4188 gfp_t mask; 4189 #endif 4190 int ret; 4191 4192 obj = i915_gem_object_alloc(dev); 4193 if (obj == NULL) 4194 return ERR_PTR(-ENOMEM); 4195 4196 ret = drm_gem_object_init(dev, &obj->base, size); 4197 if (ret) 4198 goto fail; 4199 4200 #if 0 4201 mask = GFP_HIGHUSER | __GFP_RECLAIMABLE; 4202 if (IS_CRESTLINE(dev) || IS_BROADWATER(dev)) { 4203 /* 965gm cannot relocate objects above 4GiB. */ 4204 mask &= ~__GFP_HIGHMEM; 4205 mask |= __GFP_DMA32; 4206 } 4207 4208 mapping = file_inode(obj->base.filp)->i_mapping; 4209 mapping_set_gfp_mask(mapping, mask); 4210 #endif 4211 4212 i915_gem_object_init(obj, &i915_gem_object_ops); 4213 4214 obj->base.write_domain = I915_GEM_DOMAIN_CPU; 4215 obj->base.read_domains = I915_GEM_DOMAIN_CPU; 4216 4217 if (HAS_LLC(dev)) { 4218 /* On some devices, we can have the GPU use the LLC (the CPU 4219 * cache) for about a 10% performance improvement 4220 * compared to uncached. Graphics requests other than 4221 * display scanout are coherent with the CPU in 4222 * accessing this cache. This means in this mode we 4223 * don't need to clflush on the CPU side, and on the 4224 * GPU side we only need to flush internal caches to 4225 * get data visible to the CPU. 4226 * 4227 * However, we maintain the display planes as UC, and so 4228 * need to rebind when first used as such. 4229 */ 4230 obj->cache_level = I915_CACHE_LLC; 4231 } else 4232 obj->cache_level = I915_CACHE_NONE; 4233 4234 trace_i915_gem_object_create(obj); 4235 4236 return obj; 4237 4238 fail: 4239 i915_gem_object_free(obj); 4240 4241 return ERR_PTR(ret); 4242 } 4243 4244 static bool discard_backing_storage(struct drm_i915_gem_object *obj) 4245 { 4246 /* If we are the last user of the backing storage (be it shmemfs 4247 * pages or stolen etc), we know that the pages are going to be 4248 * immediately released. In this case, we can then skip copying 4249 * back the contents from the GPU. 4250 */ 4251 4252 if (obj->madv != I915_MADV_WILLNEED) 4253 return false; 4254 4255 if (obj->base.filp == NULL) 4256 return true; 4257 4258 /* At first glance, this looks racy, but then again so would be 4259 * userspace racing mmap against close. However, the first external 4260 * reference to the filp can only be obtained through the 4261 * i915_gem_mmap_ioctl() which safeguards us against the user 4262 * acquiring such a reference whilst we are in the middle of 4263 * freeing the object. 4264 */ 4265 #if 0 4266 return atomic_long_read(&obj->base.filp->f_count) == 1; 4267 #else 4268 return false; 4269 #endif 4270 } 4271 4272 void i915_gem_free_object(struct drm_gem_object *gem_obj) 4273 { 4274 struct drm_i915_gem_object *obj = to_intel_bo(gem_obj); 4275 struct drm_device *dev = obj->base.dev; 4276 struct drm_i915_private *dev_priv = to_i915(dev); 4277 struct i915_vma *vma, *next; 4278 4279 intel_runtime_pm_get(dev_priv); 4280 4281 trace_i915_gem_object_destroy(obj); 4282 4283 /* All file-owned VMA should have been released by this point through 4284 * i915_gem_close_object(), or earlier by i915_gem_context_close(). 4285 * However, the object may also be bound into the global GTT (e.g. 4286 * older GPUs without per-process support, or for direct access through 4287 * the GTT either for the user or for scanout). Those VMA still need to 4288 * unbound now. 4289 */ 4290 list_for_each_entry_safe(vma, next, &obj->vma_list, obj_link) { 4291 GEM_BUG_ON(!i915_vma_is_ggtt(vma)); 4292 GEM_BUG_ON(i915_vma_is_active(vma)); 4293 vma->flags &= ~I915_VMA_PIN_MASK; 4294 i915_vma_close(vma); 4295 } 4296 GEM_BUG_ON(obj->bind_count); 4297 4298 /* Stolen objects don't hold a ref, but do hold pin count. Fix that up 4299 * before progressing. */ 4300 if (obj->stolen) 4301 i915_gem_object_unpin_pages(obj); 4302 4303 WARN_ON(atomic_read(&obj->frontbuffer_bits)); 4304 4305 if (obj->pages && obj->madv == I915_MADV_WILLNEED && 4306 dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES && 4307 i915_gem_object_is_tiled(obj)) 4308 i915_gem_object_unpin_pages(obj); 4309 4310 if (WARN_ON(obj->pages_pin_count)) 4311 obj->pages_pin_count = 0; 4312 if (discard_backing_storage(obj)) 4313 obj->madv = I915_MADV_DONTNEED; 4314 i915_gem_object_put_pages(obj); 4315 4316 BUG_ON(obj->pages); 4317 4318 #if 0 4319 if (obj->base.import_attach) 4320 drm_prime_gem_destroy(&obj->base, NULL); 4321 #endif 4322 4323 if (obj->ops->release) 4324 obj->ops->release(obj); 4325 4326 drm_gem_object_release(&obj->base); 4327 i915_gem_info_remove_obj(dev_priv, obj->base.size); 4328 4329 kfree(obj->bit_17); 4330 i915_gem_object_free(obj); 4331 4332 intel_runtime_pm_put(dev_priv); 4333 } 4334 4335 struct i915_vma *i915_gem_obj_to_vma(struct drm_i915_gem_object *obj, 4336 struct i915_address_space *vm) 4337 { 4338 struct i915_vma *vma; 4339 list_for_each_entry(vma, &obj->vma_list, obj_link) { 4340 if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL && 4341 vma->vm == vm) 4342 return vma; 4343 } 4344 return NULL; 4345 } 4346 4347 struct i915_vma *i915_gem_obj_to_ggtt_view(struct drm_i915_gem_object *obj, 4348 const struct i915_ggtt_view *view) 4349 { 4350 struct i915_vma *vma; 4351 4352 GEM_BUG_ON(!view); 4353 4354 list_for_each_entry(vma, &obj->vma_list, obj_link) 4355 if (i915_vma_is_ggtt(vma) && 4356 i915_ggtt_view_equal(&vma->ggtt_view, view)) 4357 return vma; 4358 return NULL; 4359 } 4360 4361 int i915_gem_suspend(struct drm_device *dev) 4362 { 4363 struct drm_i915_private *dev_priv = to_i915(dev); 4364 int ret; 4365 4366 intel_suspend_gt_powersave(dev_priv); 4367 4368 mutex_lock(&dev->struct_mutex); 4369 4370 /* We have to flush all the executing contexts to main memory so 4371 * that they can saved in the hibernation image. To ensure the last 4372 * context image is coherent, we have to switch away from it. That 4373 * leaves the dev_priv->kernel_context still active when 4374 * we actually suspend, and its image in memory may not match the GPU 4375 * state. Fortunately, the kernel_context is disposable and we do 4376 * not rely on its state. 4377 */ 4378 ret = i915_gem_switch_to_kernel_context(dev_priv); 4379 if (ret) 4380 goto err; 4381 4382 ret = i915_gem_wait_for_idle(dev_priv, true); 4383 if (ret) 4384 goto err; 4385 4386 i915_gem_retire_requests(dev_priv); 4387 4388 i915_gem_context_lost(dev_priv); 4389 mutex_unlock(&dev->struct_mutex); 4390 4391 cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work); 4392 cancel_delayed_work_sync(&dev_priv->gt.retire_work); 4393 flush_delayed_work(&dev_priv->gt.idle_work); 4394 4395 /* Assert that we sucessfully flushed all the work and 4396 * reset the GPU back to its idle, low power state. 4397 */ 4398 WARN_ON(dev_priv->gt.awake); 4399 4400 return 0; 4401 4402 err: 4403 mutex_unlock(&dev->struct_mutex); 4404 return ret; 4405 } 4406 4407 void i915_gem_resume(struct drm_device *dev) 4408 { 4409 struct drm_i915_private *dev_priv = to_i915(dev); 4410 4411 mutex_lock(&dev->struct_mutex); 4412 i915_gem_restore_gtt_mappings(dev); 4413 4414 /* As we didn't flush the kernel context before suspend, we cannot 4415 * guarantee that the context image is complete. So let's just reset 4416 * it and start again. 4417 */ 4418 if (i915.enable_execlists) 4419 intel_lr_context_reset(dev_priv, dev_priv->kernel_context); 4420 4421 mutex_unlock(&dev->struct_mutex); 4422 } 4423 4424 void i915_gem_init_swizzling(struct drm_device *dev) 4425 { 4426 struct drm_i915_private *dev_priv = to_i915(dev); 4427 4428 if (INTEL_INFO(dev)->gen < 5 || 4429 dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE) 4430 return; 4431 4432 I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) | 4433 DISP_TILE_SURFACE_SWIZZLING); 4434 4435 if (IS_GEN5(dev)) 4436 return; 4437 4438 I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_SWZCTL); 4439 if (IS_GEN6(dev)) 4440 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_SNB)); 4441 else if (IS_GEN7(dev)) 4442 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_IVB)); 4443 else if (IS_GEN8(dev)) 4444 I915_WRITE(GAMTARBMODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_BDW)); 4445 else 4446 BUG(); 4447 } 4448 4449 static void init_unused_ring(struct drm_device *dev, u32 base) 4450 { 4451 struct drm_i915_private *dev_priv = to_i915(dev); 4452 4453 I915_WRITE(RING_CTL(base), 0); 4454 I915_WRITE(RING_HEAD(base), 0); 4455 I915_WRITE(RING_TAIL(base), 0); 4456 I915_WRITE(RING_START(base), 0); 4457 } 4458 4459 static void init_unused_rings(struct drm_device *dev) 4460 { 4461 if (IS_I830(dev)) { 4462 init_unused_ring(dev, PRB1_BASE); 4463 init_unused_ring(dev, SRB0_BASE); 4464 init_unused_ring(dev, SRB1_BASE); 4465 init_unused_ring(dev, SRB2_BASE); 4466 init_unused_ring(dev, SRB3_BASE); 4467 } else if (IS_GEN2(dev)) { 4468 init_unused_ring(dev, SRB0_BASE); 4469 init_unused_ring(dev, SRB1_BASE); 4470 } else if (IS_GEN3(dev)) { 4471 init_unused_ring(dev, PRB1_BASE); 4472 init_unused_ring(dev, PRB2_BASE); 4473 } 4474 } 4475 4476 int 4477 i915_gem_init_hw(struct drm_device *dev) 4478 { 4479 struct drm_i915_private *dev_priv = to_i915(dev); 4480 struct intel_engine_cs *engine; 4481 int ret; 4482 4483 /* Double layer security blanket, see i915_gem_init() */ 4484 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 4485 4486 if (HAS_EDRAM(dev) && INTEL_GEN(dev_priv) < 9) 4487 I915_WRITE(HSW_IDICR, I915_READ(HSW_IDICR) | IDIHASHMSK(0xf)); 4488 4489 if (IS_HASWELL(dev)) 4490 I915_WRITE(MI_PREDICATE_RESULT_2, IS_HSW_GT3(dev) ? 4491 LOWER_SLICE_ENABLED : LOWER_SLICE_DISABLED); 4492 4493 if (HAS_PCH_NOP(dev)) { 4494 if (IS_IVYBRIDGE(dev)) { 4495 u32 temp = I915_READ(GEN7_MSG_CTL); 4496 temp &= ~(WAIT_FOR_PCH_FLR_ACK | WAIT_FOR_PCH_RESET_ACK); 4497 I915_WRITE(GEN7_MSG_CTL, temp); 4498 } else if (INTEL_INFO(dev)->gen >= 7) { 4499 u32 temp = I915_READ(HSW_NDE_RSTWRN_OPT); 4500 temp &= ~RESET_PCH_HANDSHAKE_ENABLE; 4501 I915_WRITE(HSW_NDE_RSTWRN_OPT, temp); 4502 } 4503 } 4504 4505 i915_gem_init_swizzling(dev); 4506 4507 /* 4508 * At least 830 can leave some of the unused rings 4509 * "active" (ie. head != tail) after resume which 4510 * will prevent c3 entry. Makes sure all unused rings 4511 * are totally idle. 4512 */ 4513 init_unused_rings(dev); 4514 4515 BUG_ON(!dev_priv->kernel_context); 4516 4517 ret = i915_ppgtt_init_hw(dev); 4518 if (ret) { 4519 DRM_ERROR("PPGTT enable HW failed %d\n", ret); 4520 goto out; 4521 } 4522 4523 /* Need to do basic initialisation of all rings first: */ 4524 for_each_engine(engine, dev_priv) { 4525 ret = engine->init_hw(engine); 4526 if (ret) 4527 goto out; 4528 } 4529 4530 intel_mocs_init_l3cc_table(dev); 4531 4532 /* We can't enable contexts until all firmware is loaded */ 4533 ret = intel_guc_setup(dev); 4534 if (ret) 4535 goto out; 4536 4537 out: 4538 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 4539 return ret; 4540 } 4541 4542 bool intel_sanitize_semaphores(struct drm_i915_private *dev_priv, int value) 4543 { 4544 if (INTEL_INFO(dev_priv)->gen < 6) 4545 return false; 4546 4547 /* TODO: make semaphores and Execlists play nicely together */ 4548 if (i915.enable_execlists) 4549 return false; 4550 4551 if (value >= 0) 4552 return value; 4553 4554 #ifdef CONFIG_INTEL_IOMMU 4555 /* Enable semaphores on SNB when IO remapping is off */ 4556 if (INTEL_INFO(dev_priv)->gen == 6 && intel_iommu_gfx_mapped) 4557 return false; 4558 #endif 4559 4560 return true; 4561 } 4562 4563 int i915_gem_init(struct drm_device *dev) 4564 { 4565 struct drm_i915_private *dev_priv = to_i915(dev); 4566 int ret; 4567 4568 mutex_lock(&dev->struct_mutex); 4569 4570 if (!i915.enable_execlists) { 4571 dev_priv->gt.cleanup_engine = intel_engine_cleanup; 4572 } else { 4573 dev_priv->gt.cleanup_engine = intel_logical_ring_cleanup; 4574 } 4575 4576 /* This is just a security blanket to placate dragons. 4577 * On some systems, we very sporadically observe that the first TLBs 4578 * used by the CS may be stale, despite us poking the TLB reset. If 4579 * we hold the forcewake during initialisation these problems 4580 * just magically go away. 4581 */ 4582 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 4583 4584 i915_gem_init_userptr(dev_priv); 4585 4586 ret = i915_gem_init_ggtt(dev_priv); 4587 if (ret) 4588 goto out_unlock; 4589 4590 ret = i915_gem_context_init(dev); 4591 if (ret) 4592 goto out_unlock; 4593 4594 ret = intel_engines_init(dev); 4595 if (ret) 4596 goto out_unlock; 4597 4598 ret = i915_gem_init_hw(dev); 4599 if (ret == -EIO) { 4600 /* Allow engine initialisation to fail by marking the GPU as 4601 * wedged. But we only want to do this where the GPU is angry, 4602 * for all other failure, such as an allocation failure, bail. 4603 */ 4604 DRM_ERROR("Failed to initialize GPU, declaring it wedged\n"); 4605 atomic_or(I915_WEDGED, &dev_priv->gpu_error.reset_counter); 4606 ret = 0; 4607 } 4608 4609 out_unlock: 4610 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 4611 mutex_unlock(&dev->struct_mutex); 4612 4613 return ret; 4614 } 4615 4616 void 4617 i915_gem_cleanup_engines(struct drm_device *dev) 4618 { 4619 struct drm_i915_private *dev_priv = to_i915(dev); 4620 struct intel_engine_cs *engine; 4621 4622 for_each_engine(engine, dev_priv) 4623 dev_priv->gt.cleanup_engine(engine); 4624 } 4625 4626 static void 4627 init_engine_lists(struct intel_engine_cs *engine) 4628 { 4629 INIT_LIST_HEAD(&engine->request_list); 4630 } 4631 4632 void 4633 i915_gem_load_init_fences(struct drm_i915_private *dev_priv) 4634 { 4635 struct drm_device *dev = &dev_priv->drm; 4636 4637 if (INTEL_INFO(dev_priv)->gen >= 7 && !IS_VALLEYVIEW(dev_priv) && 4638 !IS_CHERRYVIEW(dev_priv)) 4639 dev_priv->num_fence_regs = 32; 4640 else if (INTEL_INFO(dev_priv)->gen >= 4 || IS_I945G(dev_priv) || 4641 IS_I945GM(dev_priv) || IS_G33(dev_priv)) 4642 dev_priv->num_fence_regs = 16; 4643 else 4644 dev_priv->num_fence_regs = 8; 4645 4646 if (intel_vgpu_active(dev_priv)) 4647 dev_priv->num_fence_regs = 4648 I915_READ(vgtif_reg(avail_rs.fence_num)); 4649 4650 /* Initialize fence registers to zero */ 4651 i915_gem_restore_fences(dev); 4652 4653 i915_gem_detect_bit_6_swizzle(dev); 4654 } 4655 4656 void 4657 i915_gem_load_init(struct drm_device *dev) 4658 { 4659 struct drm_i915_private *dev_priv = to_i915(dev); 4660 int i; 4661 4662 #if 0 4663 dev_priv->objects = 4664 kmem_cache_create("i915_gem_object", 4665 sizeof(struct drm_i915_gem_object), 0, 4666 SLAB_HWCACHE_ALIGN, 4667 NULL); 4668 dev_priv->vmas = 4669 kmem_cache_create("i915_gem_vma", 4670 sizeof(struct i915_vma), 0, 4671 SLAB_HWCACHE_ALIGN, 4672 NULL); 4673 dev_priv->requests = 4674 kmem_cache_create("i915_gem_request", 4675 sizeof(struct drm_i915_gem_request), 0, 4676 SLAB_HWCACHE_ALIGN | 4677 SLAB_RECLAIM_ACCOUNT | 4678 SLAB_DESTROY_BY_RCU, 4679 NULL); 4680 #endif 4681 4682 INIT_LIST_HEAD(&dev_priv->context_list); 4683 INIT_LIST_HEAD(&dev_priv->mm.unbound_list); 4684 INIT_LIST_HEAD(&dev_priv->mm.bound_list); 4685 INIT_LIST_HEAD(&dev_priv->mm.fence_list); 4686 for (i = 0; i < I915_NUM_ENGINES; i++) 4687 init_engine_lists(&dev_priv->engine[i]); 4688 for (i = 0; i < I915_MAX_NUM_FENCES; i++) 4689 INIT_LIST_HEAD(&dev_priv->fence_regs[i].lru_list); 4690 INIT_DELAYED_WORK(&dev_priv->gt.retire_work, 4691 i915_gem_retire_work_handler); 4692 INIT_DELAYED_WORK(&dev_priv->gt.idle_work, 4693 i915_gem_idle_work_handler); 4694 init_waitqueue_head(&dev_priv->gpu_error.wait_queue); 4695 init_waitqueue_head(&dev_priv->gpu_error.reset_queue); 4696 4697 dev_priv->relative_constants_mode = I915_EXEC_CONSTANTS_REL_GENERAL; 4698 4699 INIT_LIST_HEAD(&dev_priv->mm.fence_list); 4700 4701 init_waitqueue_head(&dev_priv->pending_flip_queue); 4702 4703 dev_priv->mm.interruptible = true; 4704 4705 lockinit(&dev_priv->fb_tracking.lock, "drmftl", 0, 0); 4706 } 4707 4708 void i915_gem_load_cleanup(struct drm_device *dev) 4709 { 4710 #if 0 4711 struct drm_i915_private *dev_priv = to_i915(dev); 4712 4713 kmem_cache_destroy(dev_priv->requests); 4714 kmem_cache_destroy(dev_priv->vmas); 4715 kmem_cache_destroy(dev_priv->objects); 4716 #endif 4717 4718 /* And ensure that our DESTROY_BY_RCU slabs are truly destroyed */ 4719 rcu_barrier(); 4720 } 4721 4722 int i915_gem_freeze_late(struct drm_i915_private *dev_priv) 4723 { 4724 struct drm_i915_gem_object *obj; 4725 4726 /* Called just before we write the hibernation image. 4727 * 4728 * We need to update the domain tracking to reflect that the CPU 4729 * will be accessing all the pages to create and restore from the 4730 * hibernation, and so upon restoration those pages will be in the 4731 * CPU domain. 4732 * 4733 * To make sure the hibernation image contains the latest state, 4734 * we update that state just before writing out the image. 4735 */ 4736 4737 list_for_each_entry(obj, &dev_priv->mm.unbound_list, global_list) { 4738 obj->base.read_domains = I915_GEM_DOMAIN_CPU; 4739 obj->base.write_domain = I915_GEM_DOMAIN_CPU; 4740 } 4741 4742 list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) { 4743 obj->base.read_domains = I915_GEM_DOMAIN_CPU; 4744 obj->base.write_domain = I915_GEM_DOMAIN_CPU; 4745 } 4746 4747 return 0; 4748 } 4749 4750 void i915_gem_release(struct drm_device *dev, struct drm_file *file) 4751 { 4752 struct drm_i915_file_private *file_priv = file->driver_priv; 4753 struct drm_i915_gem_request *request; 4754 4755 /* Clean up our request list when the client is going away, so that 4756 * later retire_requests won't dereference our soon-to-be-gone 4757 * file_priv. 4758 */ 4759 lockmgr(&file_priv->mm.lock, LK_EXCLUSIVE); 4760 list_for_each_entry(request, &file_priv->mm.request_list, client_list) 4761 request->file_priv = NULL; 4762 lockmgr(&file_priv->mm.lock, LK_RELEASE); 4763 4764 if (!list_empty(&file_priv->rps.link)) { 4765 lockmgr(&to_i915(dev)->rps.client_lock, LK_EXCLUSIVE); 4766 list_del(&file_priv->rps.link); 4767 lockmgr(&to_i915(dev)->rps.client_lock, LK_RELEASE); 4768 } 4769 } 4770 4771 int 4772 i915_gem_pager_ctor(void *handle, vm_ooffset_t size, vm_prot_t prot, 4773 vm_ooffset_t foff, struct ucred *cred, u_short *color) 4774 { 4775 *color = 0; /* XXXKIB */ 4776 return (0); 4777 } 4778 4779 void 4780 i915_gem_pager_dtor(void *handle) 4781 { 4782 struct drm_gem_object *obj = handle; 4783 struct drm_device *dev = obj->dev; 4784 4785 mutex_lock(&dev->struct_mutex); 4786 drm_gem_free_mmap_offset(obj); 4787 i915_gem_release_mmap(to_intel_bo(obj)); 4788 drm_gem_object_unreference(obj); 4789 mutex_unlock(&dev->struct_mutex); 4790 } 4791 4792 int i915_gem_open(struct drm_device *dev, struct drm_file *file) 4793 { 4794 struct drm_i915_file_private *file_priv; 4795 int ret; 4796 4797 DRM_DEBUG_DRIVER("\n"); 4798 4799 file_priv = kzalloc(sizeof(*file_priv), GFP_KERNEL); 4800 if (!file_priv) 4801 return -ENOMEM; 4802 4803 file->driver_priv = file_priv; 4804 file_priv->dev_priv = to_i915(dev); 4805 file_priv->file = file; 4806 INIT_LIST_HEAD(&file_priv->rps.link); 4807 4808 lockinit(&file_priv->mm.lock, "i915_priv", 0, 0); 4809 INIT_LIST_HEAD(&file_priv->mm.request_list); 4810 4811 file_priv->bsd_engine = -1; 4812 4813 ret = i915_gem_context_open(dev, file); 4814 if (ret) 4815 kfree(file_priv); 4816 4817 return ret; 4818 } 4819 4820 /** 4821 * i915_gem_track_fb - update frontbuffer tracking 4822 * @old: current GEM buffer for the frontbuffer slots 4823 * @new: new GEM buffer for the frontbuffer slots 4824 * @frontbuffer_bits: bitmask of frontbuffer slots 4825 * 4826 * This updates the frontbuffer tracking bits @frontbuffer_bits by clearing them 4827 * from @old and setting them in @new. Both @old and @new can be NULL. 4828 */ 4829 void i915_gem_track_fb(struct drm_i915_gem_object *old, 4830 struct drm_i915_gem_object *new, 4831 unsigned frontbuffer_bits) 4832 { 4833 /* Control of individual bits within the mask are guarded by 4834 * the owning plane->mutex, i.e. we can never see concurrent 4835 * manipulation of individual bits. But since the bitfield as a whole 4836 * is updated using RMW, we need to use atomics in order to update 4837 * the bits. 4838 */ 4839 BUILD_BUG_ON(INTEL_FRONTBUFFER_BITS_PER_PIPE * I915_MAX_PIPES > 4840 sizeof(atomic_t) * BITS_PER_BYTE); 4841 4842 if (old) { 4843 WARN_ON(!(atomic_read(&old->frontbuffer_bits) & frontbuffer_bits)); 4844 atomic_andnot(frontbuffer_bits, &old->frontbuffer_bits); 4845 } 4846 4847 if (new) { 4848 WARN_ON(atomic_read(&new->frontbuffer_bits) & frontbuffer_bits); 4849 atomic_or(frontbuffer_bits, &new->frontbuffer_bits); 4850 } 4851 } 4852 4853 /* All the new VM stuff */ 4854 u64 i915_gem_obj_offset(struct drm_i915_gem_object *o, 4855 struct i915_address_space *vm) 4856 { 4857 struct drm_i915_private *dev_priv = to_i915(o->base.dev); 4858 struct i915_vma *vma; 4859 4860 WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base); 4861 4862 list_for_each_entry(vma, &o->vma_list, obj_link) { 4863 if (i915_vma_is_ggtt(vma) && 4864 vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL) 4865 continue; 4866 if (vma->vm == vm) 4867 return vma->node.start; 4868 } 4869 4870 WARN(1, "%s vma for this object not found.\n", 4871 i915_is_ggtt(vm) ? "global" : "ppgtt"); 4872 return -1; 4873 } 4874 4875 u64 i915_gem_obj_ggtt_offset_view(struct drm_i915_gem_object *o, 4876 const struct i915_ggtt_view *view) 4877 { 4878 struct i915_vma *vma; 4879 4880 list_for_each_entry(vma, &o->vma_list, obj_link) 4881 if (i915_vma_is_ggtt(vma) && 4882 i915_ggtt_view_equal(&vma->ggtt_view, view)) 4883 return vma->node.start; 4884 4885 WARN(1, "global vma for this object not found. (view=%u)\n", view->type); 4886 return -1; 4887 } 4888 4889 bool i915_gem_obj_bound(struct drm_i915_gem_object *o, 4890 struct i915_address_space *vm) 4891 { 4892 struct i915_vma *vma; 4893 4894 list_for_each_entry(vma, &o->vma_list, obj_link) { 4895 if (i915_vma_is_ggtt(vma) && 4896 vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL) 4897 continue; 4898 if (vma->vm == vm && drm_mm_node_allocated(&vma->node)) 4899 return true; 4900 } 4901 4902 return false; 4903 } 4904 4905 bool i915_gem_obj_ggtt_bound_view(struct drm_i915_gem_object *o, 4906 const struct i915_ggtt_view *view) 4907 { 4908 struct i915_vma *vma; 4909 4910 list_for_each_entry(vma, &o->vma_list, obj_link) 4911 if (i915_vma_is_ggtt(vma) && 4912 i915_ggtt_view_equal(&vma->ggtt_view, view) && 4913 drm_mm_node_allocated(&vma->node)) 4914 return true; 4915 4916 return false; 4917 } 4918 4919 unsigned long i915_gem_obj_ggtt_size(struct drm_i915_gem_object *o) 4920 { 4921 struct i915_vma *vma; 4922 4923 GEM_BUG_ON(list_empty(&o->vma_list)); 4924 4925 list_for_each_entry(vma, &o->vma_list, obj_link) { 4926 if (i915_vma_is_ggtt(vma) && 4927 vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) 4928 return vma->node.size; 4929 } 4930 4931 return 0; 4932 } 4933 4934 bool i915_gem_obj_is_pinned(struct drm_i915_gem_object *obj) 4935 { 4936 struct i915_vma *vma; 4937 list_for_each_entry(vma, &obj->vma_list, obj_link) 4938 if (i915_vma_is_pinned(vma)) 4939 return true; 4940 4941 return false; 4942 } 4943 4944 /* Like i915_gem_object_get_page(), but mark the returned page dirty */ 4945 struct page * 4946 i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj, int n) 4947 { 4948 struct page *page; 4949 4950 /* Only default objects have per-page dirty tracking */ 4951 if (WARN_ON(!i915_gem_object_has_struct_page(obj))) 4952 return NULL; 4953 4954 page = i915_gem_object_get_page(obj, n); 4955 set_page_dirty(page); 4956 return page; 4957 } 4958 4959 /* Allocate a new GEM object and fill it with the supplied data */ 4960 struct drm_i915_gem_object * 4961 i915_gem_object_create_from_data(struct drm_device *dev, 4962 const void *data, size_t size) 4963 { 4964 struct drm_i915_gem_object *obj; 4965 struct sg_table *sg; 4966 size_t bytes; 4967 int ret; 4968 4969 obj = i915_gem_object_create(dev, round_up(size, PAGE_SIZE)); 4970 if (IS_ERR(obj)) 4971 return obj; 4972 4973 ret = i915_gem_object_set_to_cpu_domain(obj, true); 4974 if (ret) 4975 goto fail; 4976 4977 ret = i915_gem_object_get_pages(obj); 4978 if (ret) 4979 goto fail; 4980 4981 i915_gem_object_pin_pages(obj); 4982 sg = obj->pages; 4983 bytes = sg_copy_from_buffer(sg->sgl, sg->nents, (void *)data, size); 4984 obj->dirty = 1; /* Backing store is now out of date */ 4985 i915_gem_object_unpin_pages(obj); 4986 4987 if (WARN_ON(bytes != size)) { 4988 DRM_ERROR("Incomplete copy, wrote %zu of %zu", bytes, size); 4989 ret = -EFAULT; 4990 goto fail; 4991 } 4992 4993 return obj; 4994 4995 fail: 4996 i915_gem_object_put(obj); 4997 return ERR_PTR(ret); 4998 } 4999