1 /* 2 * Copyright © 2008 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Eric Anholt <eric@anholt.net> 25 * 26 * Copyright (c) 2011 The FreeBSD Foundation 27 * All rights reserved. 28 * 29 * This software was developed by Konstantin Belousov under sponsorship from 30 * the FreeBSD Foundation. 31 * 32 * Redistribution and use in source and binary forms, with or without 33 * modification, are permitted provided that the following conditions 34 * are met: 35 * 1. Redistributions of source code must retain the above copyright 36 * notice, this list of conditions and the following disclaimer. 37 * 2. Redistributions in binary form must reproduce the above copyright 38 * notice, this list of conditions and the following disclaimer in the 39 * documentation and/or other materials provided with the distribution. 40 * 41 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 51 * SUCH DAMAGE. 52 * 53 */ 54 55 #include <machine/md_var.h> 56 57 #include <drm/drmP.h> 58 #include <drm/drm_vma_manager.h> 59 #include <drm/i915_drm.h> 60 #include "i915_drv.h" 61 #include "i915_trace.h" 62 #include "intel_drv.h" 63 #include <linux/shmem_fs.h> 64 #include <linux/slab.h> 65 #include <linux/swap.h> 66 #include <linux/pci.h> 67 68 static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj); 69 static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj, 70 bool force); 71 static __must_check int 72 i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj, 73 bool readonly); 74 static void 75 i915_gem_object_retire(struct drm_i915_gem_object *obj); 76 77 static void i915_gem_write_fence(struct drm_device *dev, int reg, 78 struct drm_i915_gem_object *obj); 79 static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj, 80 struct drm_i915_fence_reg *fence, 81 bool enable); 82 83 static unsigned long i915_gem_purge(struct drm_i915_private *dev_priv, long target); 84 static unsigned long i915_gem_shrink_all(struct drm_i915_private *dev_priv); 85 86 static bool cpu_cache_is_coherent(struct drm_device *dev, 87 enum i915_cache_level level) 88 { 89 return HAS_LLC(dev) || level != I915_CACHE_NONE; 90 } 91 92 static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj) 93 { 94 if (!cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) 95 return true; 96 97 return obj->pin_display; 98 } 99 100 static inline void i915_gem_object_fence_lost(struct drm_i915_gem_object *obj) 101 { 102 if (obj->tiling_mode) 103 i915_gem_release_mmap(obj); 104 105 /* As we do not have an associated fence register, we will force 106 * a tiling change if we ever need to acquire one. 107 */ 108 obj->fence_dirty = false; 109 obj->fence_reg = I915_FENCE_REG_NONE; 110 } 111 112 /* some bookkeeping */ 113 static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv, 114 size_t size) 115 { 116 spin_lock(&dev_priv->mm.object_stat_lock); 117 dev_priv->mm.object_count++; 118 dev_priv->mm.object_memory += size; 119 spin_unlock(&dev_priv->mm.object_stat_lock); 120 } 121 122 static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv, 123 size_t size) 124 { 125 spin_lock(&dev_priv->mm.object_stat_lock); 126 dev_priv->mm.object_count--; 127 dev_priv->mm.object_memory -= size; 128 spin_unlock(&dev_priv->mm.object_stat_lock); 129 } 130 131 static int 132 i915_gem_wait_for_error(struct i915_gpu_error *error) 133 { 134 int ret; 135 136 #define EXIT_COND (!i915_reset_in_progress(error) || \ 137 i915_terminally_wedged(error)) 138 if (EXIT_COND) 139 return 0; 140 141 /* 142 * Only wait 10 seconds for the gpu reset to complete to avoid hanging 143 * userspace. If it takes that long something really bad is going on and 144 * we should simply try to bail out and fail as gracefully as possible. 145 */ 146 ret = wait_event_interruptible_timeout(error->reset_queue, 147 EXIT_COND, 148 10*HZ); 149 if (ret == 0) { 150 DRM_ERROR("Timed out waiting for the gpu reset to complete\n"); 151 return -EIO; 152 } else if (ret < 0) { 153 return ret; 154 } 155 #undef EXIT_COND 156 157 return 0; 158 } 159 160 int i915_mutex_lock_interruptible(struct drm_device *dev) 161 { 162 struct drm_i915_private *dev_priv = dev->dev_private; 163 int ret; 164 165 ret = i915_gem_wait_for_error(&dev_priv->gpu_error); 166 if (ret) 167 return ret; 168 169 ret = mutex_lock_interruptible(&dev->struct_mutex); 170 if (ret) 171 return ret; 172 173 WARN_ON(i915_verify_lists(dev)); 174 return 0; 175 } 176 177 static inline bool 178 i915_gem_object_is_inactive(struct drm_i915_gem_object *obj) 179 { 180 return i915_gem_obj_bound_any(obj) && !obj->active; 181 } 182 183 int 184 i915_gem_init_ioctl(struct drm_device *dev, void *data, 185 struct drm_file *file) 186 { 187 struct drm_i915_private *dev_priv = dev->dev_private; 188 struct drm_i915_gem_init *args = data; 189 190 if (drm_core_check_feature(dev, DRIVER_MODESET)) 191 return -ENODEV; 192 193 if (args->gtt_start >= args->gtt_end || 194 (args->gtt_end | args->gtt_start) & (PAGE_SIZE - 1)) 195 return -EINVAL; 196 197 /* GEM with user mode setting was never supported on ilk and later. */ 198 if (INTEL_INFO(dev)->gen >= 5) 199 return -ENODEV; 200 201 mutex_lock(&dev->struct_mutex); 202 kprintf("INITGLOBALGTT GTT_START %016jx\n", (uintmax_t)args->gtt_start); 203 i915_gem_setup_global_gtt(dev, args->gtt_start, args->gtt_end, 204 args->gtt_end); 205 dev_priv->gtt.mappable_end = args->gtt_end; 206 mutex_unlock(&dev->struct_mutex); 207 208 return 0; 209 } 210 211 int 212 i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, 213 struct drm_file *file) 214 { 215 struct drm_i915_private *dev_priv = dev->dev_private; 216 struct drm_i915_gem_get_aperture *args = data; 217 struct drm_i915_gem_object *obj; 218 size_t pinned; 219 220 pinned = 0; 221 mutex_lock(&dev->struct_mutex); 222 list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) 223 if (i915_gem_obj_is_pinned(obj)) 224 pinned += i915_gem_obj_ggtt_size(obj); 225 mutex_unlock(&dev->struct_mutex); 226 227 args->aper_size = dev_priv->gtt.base.total; 228 args->aper_available_size = args->aper_size - pinned; 229 230 return 0; 231 } 232 233 static void i915_gem_object_detach_phys(struct drm_i915_gem_object *obj) 234 { 235 drm_dma_handle_t *phys = obj->phys_handle; 236 237 if (!phys) 238 return; 239 240 if (obj->madv == I915_MADV_WILLNEED) { 241 struct vm_object *mapping = obj->base.vm_obj; 242 char *vaddr = phys->vaddr; 243 int i; 244 245 for (i = 0; i < obj->base.size / PAGE_SIZE; i++) { 246 struct vm_page *page = shmem_read_mapping_page(mapping, i); 247 if (!IS_ERR(page)) { 248 char *dst = kmap_atomic(page); 249 memcpy(dst, vaddr, PAGE_SIZE); 250 drm_clflush_virt_range(dst, PAGE_SIZE); 251 kunmap_atomic(dst); 252 253 set_page_dirty(page); 254 mark_page_accessed(page); 255 #if 0 256 page_cache_release(page); 257 #endif 258 } 259 vaddr += PAGE_SIZE; 260 } 261 i915_gem_chipset_flush(obj->base.dev); 262 } 263 264 #ifdef CONFIG_X86 265 set_memory_wb((unsigned long)phys->vaddr, phys->size / PAGE_SIZE); 266 #endif 267 drm_pci_free(obj->base.dev, phys); 268 obj->phys_handle = NULL; 269 } 270 271 int 272 i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, 273 int align) 274 { 275 drm_dma_handle_t *phys; 276 struct vm_object *mapping; 277 char *vaddr; 278 int i; 279 280 if (obj->phys_handle) { 281 if ((unsigned long)obj->phys_handle->vaddr & (align -1)) 282 return -EBUSY; 283 284 return 0; 285 } 286 287 if (obj->madv != I915_MADV_WILLNEED) 288 return -EFAULT; 289 290 #if 0 291 if (obj->base.filp == NULL) 292 return -EINVAL; 293 #endif 294 295 /* create a new object */ 296 phys = drm_pci_alloc(obj->base.dev, obj->base.size, align); 297 if (!phys) 298 return -ENOMEM; 299 300 vaddr = phys->vaddr; 301 #ifdef CONFIG_X86 302 set_memory_wc((unsigned long)vaddr, phys->size / PAGE_SIZE); 303 #endif 304 mapping = obj->base.vm_obj; 305 for (i = 0; i < obj->base.size / PAGE_SIZE; i++) { 306 struct vm_page *page; 307 char *src; 308 309 page = shmem_read_mapping_page(mapping, i); 310 if (IS_ERR(page)) { 311 #ifdef CONFIG_X86 312 set_memory_wb((unsigned long)phys->vaddr, phys->size / PAGE_SIZE); 313 #endif 314 drm_pci_free(obj->base.dev, phys); 315 return PTR_ERR(page); 316 } 317 318 src = kmap_atomic(page); 319 memcpy(vaddr, src, PAGE_SIZE); 320 kunmap_atomic(src); 321 322 mark_page_accessed(page); 323 #if 0 324 page_cache_release(page); 325 #endif 326 327 vaddr += PAGE_SIZE; 328 } 329 330 obj->phys_handle = phys; 331 return 0; 332 } 333 334 static int 335 i915_gem_phys_pwrite(struct drm_i915_gem_object *obj, 336 struct drm_i915_gem_pwrite *args, 337 struct drm_file *file_priv) 338 { 339 struct drm_device *dev = obj->base.dev; 340 void *vaddr = (char *)obj->phys_handle->vaddr + args->offset; 341 char __user *user_data = to_user_ptr(args->data_ptr); 342 343 if (__copy_from_user_inatomic_nocache(vaddr, user_data, args->size)) { 344 unsigned long unwritten; 345 346 /* The physical object once assigned is fixed for the lifetime 347 * of the obj, so we can safely drop the lock and continue 348 * to access vaddr. 349 */ 350 mutex_unlock(&dev->struct_mutex); 351 unwritten = copy_from_user(vaddr, user_data, args->size); 352 mutex_lock(&dev->struct_mutex); 353 if (unwritten) 354 return -EFAULT; 355 } 356 357 i915_gem_chipset_flush(dev); 358 return 0; 359 } 360 361 void *i915_gem_object_alloc(struct drm_device *dev) 362 { 363 return kmalloc(sizeof(struct drm_i915_gem_object), 364 M_DRM, M_WAITOK | M_ZERO); 365 } 366 367 void i915_gem_object_free(struct drm_i915_gem_object *obj) 368 { 369 kfree(obj); 370 } 371 372 static int 373 i915_gem_create(struct drm_file *file, 374 struct drm_device *dev, 375 uint64_t size, 376 uint32_t *handle_p) 377 { 378 struct drm_i915_gem_object *obj; 379 int ret; 380 u32 handle; 381 382 size = roundup(size, PAGE_SIZE); 383 if (size == 0) 384 return -EINVAL; 385 386 /* Allocate the new object */ 387 obj = i915_gem_alloc_object(dev, size); 388 if (obj == NULL) 389 return -ENOMEM; 390 391 ret = drm_gem_handle_create(file, &obj->base, &handle); 392 /* drop reference from allocate - handle holds it now */ 393 drm_gem_object_unreference_unlocked(&obj->base); 394 if (ret) 395 return ret; 396 397 *handle_p = handle; 398 return 0; 399 } 400 401 int 402 i915_gem_dumb_create(struct drm_file *file, 403 struct drm_device *dev, 404 struct drm_mode_create_dumb *args) 405 { 406 /* have to work out size/pitch and return them */ 407 args->pitch = ALIGN(args->width * DIV_ROUND_UP(args->bpp, 8), 64); 408 args->size = args->pitch * args->height; 409 return i915_gem_create(file, dev, 410 args->size, &args->handle); 411 } 412 413 /** 414 * Creates a new mm object and returns a handle to it. 415 */ 416 int 417 i915_gem_create_ioctl(struct drm_device *dev, void *data, 418 struct drm_file *file) 419 { 420 struct drm_i915_gem_create *args = data; 421 422 return i915_gem_create(file, dev, 423 args->size, &args->handle); 424 } 425 426 static inline int 427 __copy_to_user_swizzled(char __user *cpu_vaddr, 428 const char *gpu_vaddr, int gpu_offset, 429 int length) 430 { 431 int ret, cpu_offset = 0; 432 433 while (length > 0) { 434 int cacheline_end = ALIGN(gpu_offset + 1, 64); 435 int this_length = min(cacheline_end - gpu_offset, length); 436 int swizzled_gpu_offset = gpu_offset ^ 64; 437 438 ret = __copy_to_user(cpu_vaddr + cpu_offset, 439 gpu_vaddr + swizzled_gpu_offset, 440 this_length); 441 if (ret) 442 return ret + length; 443 444 cpu_offset += this_length; 445 gpu_offset += this_length; 446 length -= this_length; 447 } 448 449 return 0; 450 } 451 452 static inline int 453 __copy_from_user_swizzled(char *gpu_vaddr, int gpu_offset, 454 const char __user *cpu_vaddr, 455 int length) 456 { 457 int ret, cpu_offset = 0; 458 459 while (length > 0) { 460 int cacheline_end = ALIGN(gpu_offset + 1, 64); 461 int this_length = min(cacheline_end - gpu_offset, length); 462 int swizzled_gpu_offset = gpu_offset ^ 64; 463 464 ret = __copy_from_user(gpu_vaddr + swizzled_gpu_offset, 465 cpu_vaddr + cpu_offset, 466 this_length); 467 if (ret) 468 return ret + length; 469 470 cpu_offset += this_length; 471 gpu_offset += this_length; 472 length -= this_length; 473 } 474 475 return 0; 476 } 477 478 /* 479 * Pins the specified object's pages and synchronizes the object with 480 * GPU accesses. Sets needs_clflush to non-zero if the caller should 481 * flush the object from the CPU cache. 482 */ 483 int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj, 484 int *needs_clflush) 485 { 486 int ret; 487 488 *needs_clflush = 0; 489 490 #if 0 491 if (!obj->base.filp) 492 return -EINVAL; 493 #endif 494 495 if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)) { 496 /* If we're not in the cpu read domain, set ourself into the gtt 497 * read domain and manually flush cachelines (if required). This 498 * optimizes for the case when the gpu will dirty the data 499 * anyway again before the next pread happens. */ 500 *needs_clflush = !cpu_cache_is_coherent(obj->base.dev, 501 obj->cache_level); 502 ret = i915_gem_object_wait_rendering(obj, true); 503 if (ret) 504 return ret; 505 506 i915_gem_object_retire(obj); 507 } 508 509 ret = i915_gem_object_get_pages(obj); 510 if (ret) 511 return ret; 512 513 i915_gem_object_pin_pages(obj); 514 515 return ret; 516 } 517 518 /* Per-page copy function for the shmem pread fastpath. 519 * Flushes invalid cachelines before reading the target if 520 * needs_clflush is set. */ 521 static int 522 shmem_pread_fast(struct vm_page *page, int shmem_page_offset, int page_length, 523 char __user *user_data, 524 bool page_do_bit17_swizzling, bool needs_clflush) 525 { 526 char *vaddr; 527 int ret; 528 529 if (unlikely(page_do_bit17_swizzling)) 530 return -EINVAL; 531 532 vaddr = kmap_atomic(page); 533 if (needs_clflush) 534 drm_clflush_virt_range(vaddr + shmem_page_offset, 535 page_length); 536 ret = __copy_to_user_inatomic(user_data, 537 vaddr + shmem_page_offset, 538 page_length); 539 kunmap_atomic(vaddr); 540 541 return ret ? -EFAULT : 0; 542 } 543 544 static void 545 shmem_clflush_swizzled_range(char *addr, unsigned long length, 546 bool swizzled) 547 { 548 if (unlikely(swizzled)) { 549 unsigned long start = (unsigned long) addr; 550 unsigned long end = (unsigned long) addr + length; 551 552 /* For swizzling simply ensure that we always flush both 553 * channels. Lame, but simple and it works. Swizzled 554 * pwrite/pread is far from a hotpath - current userspace 555 * doesn't use it at all. */ 556 start = round_down(start, 128); 557 end = round_up(end, 128); 558 559 drm_clflush_virt_range((void *)start, end - start); 560 } else { 561 drm_clflush_virt_range(addr, length); 562 } 563 564 } 565 566 /* Only difference to the fast-path function is that this can handle bit17 567 * and uses non-atomic copy and kmap functions. */ 568 static int 569 shmem_pread_slow(struct vm_page *page, int shmem_page_offset, int page_length, 570 char __user *user_data, 571 bool page_do_bit17_swizzling, bool needs_clflush) 572 { 573 char *vaddr; 574 int ret; 575 576 vaddr = kmap(page); 577 if (needs_clflush) 578 shmem_clflush_swizzled_range(vaddr + shmem_page_offset, 579 page_length, 580 page_do_bit17_swizzling); 581 582 if (page_do_bit17_swizzling) 583 ret = __copy_to_user_swizzled(user_data, 584 vaddr, shmem_page_offset, 585 page_length); 586 else 587 ret = __copy_to_user(user_data, 588 vaddr + shmem_page_offset, 589 page_length); 590 kunmap(page); 591 592 return ret ? - EFAULT : 0; 593 } 594 595 static int 596 i915_gem_shmem_pread(struct drm_device *dev, 597 struct drm_i915_gem_object *obj, 598 struct drm_i915_gem_pread *args, 599 struct drm_file *file) 600 { 601 char __user *user_data; 602 ssize_t remain; 603 loff_t offset; 604 int shmem_page_offset, page_length, ret = 0; 605 int obj_do_bit17_swizzling, page_do_bit17_swizzling; 606 int prefaulted = 0; 607 int needs_clflush = 0; 608 int i; 609 610 user_data = to_user_ptr(args->data_ptr); 611 remain = args->size; 612 613 obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj); 614 615 ret = i915_gem_obj_prepare_shmem_read(obj, &needs_clflush); 616 if (ret) 617 return ret; 618 619 offset = args->offset; 620 621 for (i = 0; i < (obj->base.size >> PAGE_SHIFT); i++) { 622 struct vm_page *page = obj->pages[i]; 623 624 if (remain <= 0) 625 break; 626 627 /* Operation in this page 628 * 629 * shmem_page_offset = offset within page in shmem file 630 * page_length = bytes to copy for this page 631 */ 632 shmem_page_offset = offset_in_page(offset); 633 page_length = remain; 634 if ((shmem_page_offset + page_length) > PAGE_SIZE) 635 page_length = PAGE_SIZE - shmem_page_offset; 636 637 page_do_bit17_swizzling = obj_do_bit17_swizzling && 638 (page_to_phys(page) & (1 << 17)) != 0; 639 640 ret = shmem_pread_fast(page, shmem_page_offset, page_length, 641 user_data, page_do_bit17_swizzling, 642 needs_clflush); 643 if (ret == 0) 644 goto next_page; 645 646 mutex_unlock(&dev->struct_mutex); 647 648 if (likely(!i915.prefault_disable) && !prefaulted) { 649 ret = fault_in_multipages_writeable(user_data, remain); 650 /* Userspace is tricking us, but we've already clobbered 651 * its pages with the prefault and promised to write the 652 * data up to the first fault. Hence ignore any errors 653 * and just continue. */ 654 (void)ret; 655 prefaulted = 1; 656 } 657 658 ret = shmem_pread_slow(page, shmem_page_offset, page_length, 659 user_data, page_do_bit17_swizzling, 660 needs_clflush); 661 662 mutex_lock(&dev->struct_mutex); 663 664 if (ret) 665 goto out; 666 667 next_page: 668 remain -= page_length; 669 user_data += page_length; 670 offset += page_length; 671 } 672 673 out: 674 i915_gem_object_unpin_pages(obj); 675 676 return ret; 677 } 678 679 /** 680 * Reads data from the object referenced by handle. 681 * 682 * On error, the contents of *data are undefined. 683 */ 684 int 685 i915_gem_pread_ioctl(struct drm_device *dev, void *data, 686 struct drm_file *file) 687 { 688 struct drm_i915_gem_pread *args = data; 689 struct drm_i915_gem_object *obj; 690 int ret = 0; 691 692 if (args->size == 0) 693 return 0; 694 695 ret = i915_mutex_lock_interruptible(dev); 696 if (ret) 697 return ret; 698 699 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 700 if (&obj->base == NULL) { 701 ret = -ENOENT; 702 goto unlock; 703 } 704 705 /* Bounds check source. */ 706 if (args->offset > obj->base.size || 707 args->size > obj->base.size - args->offset) { 708 ret = -EINVAL; 709 goto out; 710 } 711 712 trace_i915_gem_object_pread(obj, args->offset, args->size); 713 714 ret = i915_gem_shmem_pread(dev, obj, args, file); 715 716 out: 717 drm_gem_object_unreference(&obj->base); 718 unlock: 719 mutex_unlock(&dev->struct_mutex); 720 return ret; 721 } 722 723 /* This is the fast write path which cannot handle 724 * page faults in the source data 725 */ 726 727 #if 0 /* XXX: buggy on core2 machines */ 728 static inline int 729 fast_user_write(struct io_mapping *mapping, 730 loff_t page_base, int page_offset, 731 char __user *user_data, 732 int length) 733 { 734 void __iomem *vaddr_atomic; 735 void *vaddr; 736 unsigned long unwritten; 737 738 vaddr_atomic = io_mapping_map_atomic_wc(mapping, page_base); 739 /* We can use the cpu mem copy function because this is X86. */ 740 vaddr = (char __force*)vaddr_atomic + page_offset; 741 unwritten = __copy_from_user_inatomic_nocache(vaddr, 742 user_data, length); 743 io_mapping_unmap_atomic(vaddr_atomic); 744 return unwritten; 745 } 746 #endif 747 748 static int 749 i915_gem_gtt_write(struct drm_device *dev, struct drm_i915_gem_object *obj, 750 uint64_t data_ptr, uint64_t size, uint64_t offset, struct drm_file *file) 751 { 752 vm_offset_t mkva; 753 int ret; 754 755 /* 756 * Pass the unaligned physical address and size to pmap_mapdev_attr() 757 * so it can properly calculate whether an extra page needs to be 758 * mapped or not to cover the requested range. The function will 759 * add the page offset into the returned mkva for us. 760 */ 761 mkva = (vm_offset_t)pmap_mapdev_attr(dev->agp->base + 762 i915_gem_obj_ggtt_offset(obj) + offset, size, PAT_WRITE_COMBINING); 763 ret = -copyin_nofault((void *)(uintptr_t)data_ptr, (char *)mkva, size); 764 pmap_unmapdev(mkva, size); 765 return ret; 766 } 767 768 /** 769 * This is the fast pwrite path, where we copy the data directly from the 770 * user into the GTT, uncached. 771 */ 772 static int 773 i915_gem_gtt_pwrite_fast(struct drm_device *dev, 774 struct drm_i915_gem_object *obj, 775 struct drm_i915_gem_pwrite *args, 776 struct drm_file *file) 777 { 778 ssize_t remain; 779 loff_t offset, page_base; 780 char __user *user_data; 781 int page_offset, page_length, ret; 782 783 ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE | PIN_NONBLOCK); 784 if (ret) 785 goto out; 786 787 ret = i915_gem_object_set_to_gtt_domain(obj, true); 788 if (ret) 789 goto out_unpin; 790 791 ret = i915_gem_object_put_fence(obj); 792 if (ret) 793 goto out_unpin; 794 795 user_data = to_user_ptr(args->data_ptr); 796 remain = args->size; 797 798 offset = i915_gem_obj_ggtt_offset(obj) + args->offset; 799 800 while (remain > 0) { 801 /* Operation in this page 802 * 803 * page_base = page offset within aperture 804 * page_offset = offset within page 805 * page_length = bytes to copy for this page 806 */ 807 page_base = offset & ~PAGE_MASK; 808 page_offset = offset_in_page(offset); 809 page_length = remain; 810 if ((page_offset + remain) > PAGE_SIZE) 811 page_length = PAGE_SIZE - page_offset; 812 813 /* If we get a fault while copying data, then (presumably) our 814 * source page isn't available. Return the error and we'll 815 * retry in the slow path. 816 */ 817 #if 0 818 if (fast_user_write(dev_priv->gtt.mappable, page_base, 819 page_offset, user_data, page_length)) { 820 #else 821 if (i915_gem_gtt_write(dev, obj, args->data_ptr, args->size, args->offset, file)) { 822 #endif 823 ret = -EFAULT; 824 goto out_unpin; 825 } 826 827 remain -= page_length; 828 user_data += page_length; 829 offset += page_length; 830 } 831 832 out_unpin: 833 i915_gem_object_ggtt_unpin(obj); 834 out: 835 return ret; 836 } 837 838 /* Per-page copy function for the shmem pwrite fastpath. 839 * Flushes invalid cachelines before writing to the target if 840 * needs_clflush_before is set and flushes out any written cachelines after 841 * writing if needs_clflush is set. */ 842 static int 843 shmem_pwrite_fast(struct vm_page *page, int shmem_page_offset, int page_length, 844 char __user *user_data, 845 bool page_do_bit17_swizzling, 846 bool needs_clflush_before, 847 bool needs_clflush_after) 848 { 849 char *vaddr; 850 int ret; 851 852 if (unlikely(page_do_bit17_swizzling)) 853 return -EINVAL; 854 855 vaddr = kmap_atomic(page); 856 if (needs_clflush_before) 857 drm_clflush_virt_range(vaddr + shmem_page_offset, 858 page_length); 859 ret = __copy_from_user_inatomic(vaddr + shmem_page_offset, 860 user_data, page_length); 861 if (needs_clflush_after) 862 drm_clflush_virt_range(vaddr + shmem_page_offset, 863 page_length); 864 kunmap_atomic(vaddr); 865 866 return ret ? -EFAULT : 0; 867 } 868 869 /* Only difference to the fast-path function is that this can handle bit17 870 * and uses non-atomic copy and kmap functions. */ 871 static int 872 shmem_pwrite_slow(struct vm_page *page, int shmem_page_offset, int page_length, 873 char __user *user_data, 874 bool page_do_bit17_swizzling, 875 bool needs_clflush_before, 876 bool needs_clflush_after) 877 { 878 char *vaddr; 879 int ret; 880 881 vaddr = kmap(page); 882 if (unlikely(needs_clflush_before || page_do_bit17_swizzling)) 883 shmem_clflush_swizzled_range(vaddr + shmem_page_offset, 884 page_length, 885 page_do_bit17_swizzling); 886 if (page_do_bit17_swizzling) 887 ret = __copy_from_user_swizzled(vaddr, shmem_page_offset, 888 user_data, 889 page_length); 890 else 891 ret = __copy_from_user(vaddr + shmem_page_offset, 892 user_data, 893 page_length); 894 if (needs_clflush_after) 895 shmem_clflush_swizzled_range(vaddr + shmem_page_offset, 896 page_length, 897 page_do_bit17_swizzling); 898 kunmap(page); 899 900 return ret ? -EFAULT : 0; 901 } 902 903 static int 904 i915_gem_shmem_pwrite(struct drm_device *dev, 905 struct drm_i915_gem_object *obj, 906 struct drm_i915_gem_pwrite *args, 907 struct drm_file *file) 908 { 909 ssize_t remain; 910 loff_t offset; 911 char __user *user_data; 912 int shmem_page_offset, page_length, ret = 0; 913 int obj_do_bit17_swizzling, page_do_bit17_swizzling; 914 int hit_slowpath = 0; 915 int needs_clflush_after = 0; 916 int needs_clflush_before = 0; 917 int i; 918 919 user_data = to_user_ptr(args->data_ptr); 920 remain = args->size; 921 922 obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj); 923 924 if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) { 925 /* If we're not in the cpu write domain, set ourself into the gtt 926 * write domain and manually flush cachelines (if required). This 927 * optimizes for the case when the gpu will use the data 928 * right away and we therefore have to clflush anyway. */ 929 needs_clflush_after = cpu_write_needs_clflush(obj); 930 ret = i915_gem_object_wait_rendering(obj, false); 931 if (ret) 932 return ret; 933 934 i915_gem_object_retire(obj); 935 } 936 /* Same trick applies to invalidate partially written cachelines read 937 * before writing. */ 938 if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) 939 needs_clflush_before = 940 !cpu_cache_is_coherent(dev, obj->cache_level); 941 942 ret = i915_gem_object_get_pages(obj); 943 if (ret) 944 return ret; 945 946 i915_gem_object_pin_pages(obj); 947 948 offset = args->offset; 949 obj->dirty = 1; 950 951 VM_OBJECT_LOCK(obj->base.vm_obj); 952 vm_object_pip_add(obj->base.vm_obj, 1); 953 for (i = 0; i < (obj->base.size >> PAGE_SHIFT); i++) { 954 struct vm_page *page = obj->pages[i]; 955 int partial_cacheline_write; 956 957 if (i < offset >> PAGE_SHIFT) 958 continue; 959 960 if (remain <= 0) 961 break; 962 963 /* Operation in this page 964 * 965 * shmem_page_offset = offset within page in shmem file 966 * page_length = bytes to copy for this page 967 */ 968 shmem_page_offset = offset_in_page(offset); 969 970 page_length = remain; 971 if ((shmem_page_offset + page_length) > PAGE_SIZE) 972 page_length = PAGE_SIZE - shmem_page_offset; 973 974 /* If we don't overwrite a cacheline completely we need to be 975 * careful to have up-to-date data by first clflushing. Don't 976 * overcomplicate things and flush the entire patch. */ 977 partial_cacheline_write = needs_clflush_before && 978 ((shmem_page_offset | page_length) 979 & (cpu_clflush_line_size - 1)); 980 981 page_do_bit17_swizzling = obj_do_bit17_swizzling && 982 (page_to_phys(page) & (1 << 17)) != 0; 983 984 ret = shmem_pwrite_fast(page, shmem_page_offset, page_length, 985 user_data, page_do_bit17_swizzling, 986 partial_cacheline_write, 987 needs_clflush_after); 988 if (ret == 0) 989 goto next_page; 990 991 hit_slowpath = 1; 992 mutex_unlock(&dev->struct_mutex); 993 ret = shmem_pwrite_slow(page, shmem_page_offset, page_length, 994 user_data, page_do_bit17_swizzling, 995 partial_cacheline_write, 996 needs_clflush_after); 997 998 mutex_lock(&dev->struct_mutex); 999 1000 if (ret) 1001 goto out; 1002 1003 next_page: 1004 remain -= page_length; 1005 user_data += page_length; 1006 offset += page_length; 1007 } 1008 vm_object_pip_wakeup(obj->base.vm_obj); 1009 VM_OBJECT_UNLOCK(obj->base.vm_obj); 1010 1011 out: 1012 i915_gem_object_unpin_pages(obj); 1013 1014 if (hit_slowpath) { 1015 /* 1016 * Fixup: Flush cpu caches in case we didn't flush the dirty 1017 * cachelines in-line while writing and the object moved 1018 * out of the cpu write domain while we've dropped the lock. 1019 */ 1020 if (!needs_clflush_after && 1021 obj->base.write_domain != I915_GEM_DOMAIN_CPU) { 1022 if (i915_gem_clflush_object(obj, obj->pin_display)) 1023 i915_gem_chipset_flush(dev); 1024 } 1025 } 1026 1027 if (needs_clflush_after) 1028 i915_gem_chipset_flush(dev); 1029 1030 return ret; 1031 } 1032 1033 /** 1034 * Writes data to the object referenced by handle. 1035 * 1036 * On error, the contents of the buffer that were to be modified are undefined. 1037 */ 1038 int 1039 i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, 1040 struct drm_file *file) 1041 { 1042 struct drm_i915_gem_pwrite *args = data; 1043 struct drm_i915_gem_object *obj; 1044 int ret; 1045 1046 if (args->size == 0) 1047 return 0; 1048 1049 if (likely(!i915.prefault_disable)) { 1050 ret = fault_in_multipages_readable(to_user_ptr(args->data_ptr), 1051 args->size); 1052 if (ret) 1053 return -EFAULT; 1054 } 1055 1056 ret = i915_mutex_lock_interruptible(dev); 1057 if (ret) 1058 return ret; 1059 1060 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 1061 if (&obj->base == NULL) { 1062 ret = -ENOENT; 1063 goto unlock; 1064 } 1065 1066 /* Bounds check destination. */ 1067 if (args->offset > obj->base.size || 1068 args->size > obj->base.size - args->offset) { 1069 ret = -EINVAL; 1070 goto out; 1071 } 1072 1073 trace_i915_gem_object_pwrite(obj, args->offset, args->size); 1074 1075 ret = -EFAULT; 1076 /* We can only do the GTT pwrite on untiled buffers, as otherwise 1077 * it would end up going through the fenced access, and we'll get 1078 * different detiling behavior between reading and writing. 1079 * pread/pwrite currently are reading and writing from the CPU 1080 * perspective, requiring manual detiling by the client. 1081 */ 1082 if (obj->phys_handle) { 1083 ret = i915_gem_phys_pwrite(obj, args, file); 1084 goto out; 1085 } 1086 1087 if (obj->tiling_mode == I915_TILING_NONE && 1088 obj->base.write_domain != I915_GEM_DOMAIN_CPU && 1089 cpu_write_needs_clflush(obj)) { 1090 ret = i915_gem_gtt_pwrite_fast(dev, obj, args, file); 1091 /* Note that the gtt paths might fail with non-page-backed user 1092 * pointers (e.g. gtt mappings when moving data between 1093 * textures). Fallback to the shmem path in that case. */ 1094 } 1095 1096 if (ret == -EFAULT || ret == -ENOSPC) 1097 ret = i915_gem_shmem_pwrite(dev, obj, args, file); 1098 1099 out: 1100 drm_gem_object_unreference(&obj->base); 1101 unlock: 1102 mutex_unlock(&dev->struct_mutex); 1103 return ret; 1104 } 1105 1106 int 1107 i915_gem_check_wedge(struct i915_gpu_error *error, 1108 bool interruptible) 1109 { 1110 if (i915_reset_in_progress(error)) { 1111 /* Non-interruptible callers can't handle -EAGAIN, hence return 1112 * -EIO unconditionally for these. */ 1113 if (!interruptible) 1114 return -EIO; 1115 1116 /* Recovery complete, but the reset failed ... */ 1117 if (i915_terminally_wedged(error)) 1118 return -EIO; 1119 1120 return -EAGAIN; 1121 } 1122 1123 return 0; 1124 } 1125 1126 /* 1127 * Compare seqno against outstanding lazy request. Emit a request if they are 1128 * equal. 1129 */ 1130 static int 1131 i915_gem_check_olr(struct intel_engine_cs *ring, u32 seqno) 1132 { 1133 int ret; 1134 1135 BUG_ON(!mutex_is_locked(&ring->dev->struct_mutex)); 1136 1137 ret = 0; 1138 if (seqno == ring->outstanding_lazy_seqno) 1139 ret = i915_add_request(ring, NULL); 1140 1141 return ret; 1142 } 1143 1144 #if 0 1145 static void fake_irq(unsigned long data) 1146 { 1147 wake_up_process((struct task_struct *)data); 1148 } 1149 1150 static bool missed_irq(struct drm_i915_private *dev_priv, 1151 struct intel_engine_cs *ring) 1152 { 1153 return test_bit(ring->id, &dev_priv->gpu_error.missed_irq_rings); 1154 } 1155 1156 static bool can_wait_boost(struct drm_i915_file_private *file_priv) 1157 { 1158 if (file_priv == NULL) 1159 return true; 1160 1161 return !atomic_xchg(&file_priv->rps_wait_boost, true); 1162 } 1163 #endif 1164 1165 /** 1166 * __wait_seqno - wait until execution of seqno has finished 1167 * @ring: the ring expected to report seqno 1168 * @seqno: duh! 1169 * @reset_counter: reset sequence associated with the given seqno 1170 * @interruptible: do an interruptible wait (normally yes) 1171 * @timeout: in - how long to wait (NULL forever); out - how much time remaining 1172 * 1173 * Note: It is of utmost importance that the passed in seqno and reset_counter 1174 * values have been read by the caller in an smp safe manner. Where read-side 1175 * locks are involved, it is sufficient to read the reset_counter before 1176 * unlocking the lock that protects the seqno. For lockless tricks, the 1177 * reset_counter _must_ be read before, and an appropriate smp_rmb must be 1178 * inserted. 1179 * 1180 * Returns 0 if the seqno was found within the alloted time. Else returns the 1181 * errno with remaining time filled in timeout argument. 1182 */ 1183 static int __wait_seqno(struct intel_engine_cs *ring, u32 seqno, 1184 unsigned reset_counter, 1185 bool interruptible, 1186 struct timespec *timeout, 1187 struct drm_i915_file_private *file_priv) 1188 { 1189 struct drm_i915_private *dev_priv = ring->dev->dev_private; 1190 struct timespec before, now, wait_time={1,0}; 1191 unsigned long timeout_jiffies; 1192 long end; 1193 bool wait_forever = true; 1194 int ret; 1195 1196 WARN(dev_priv->pm.irqs_disabled, "IRQs disabled\n"); 1197 1198 if (i915_seqno_passed(ring->get_seqno(ring, true), seqno)) 1199 return 0; 1200 1201 if (timeout != NULL) { 1202 wait_time = *timeout; 1203 wait_forever = false; 1204 } 1205 1206 timeout_jiffies = timespec_to_jiffies_timeout(&wait_time); 1207 1208 if (WARN_ON(!ring->irq_get(ring))) 1209 return -ENODEV; 1210 1211 /* Record current time in case interrupted by signal, or wedged */ 1212 trace_i915_gem_request_wait_begin(ring, seqno); 1213 getrawmonotonic(&before); 1214 1215 #define EXIT_COND \ 1216 (i915_seqno_passed(ring->get_seqno(ring, false), seqno) || \ 1217 i915_reset_in_progress(&dev_priv->gpu_error) || \ 1218 reset_counter != atomic_read(&dev_priv->gpu_error.reset_counter)) 1219 do { 1220 if (interruptible) 1221 end = wait_event_interruptible_timeout(ring->irq_queue, 1222 EXIT_COND, 1223 timeout_jiffies); 1224 else 1225 end = wait_event_timeout(ring->irq_queue, EXIT_COND, 1226 timeout_jiffies); 1227 1228 /* We need to check whether any gpu reset happened in between 1229 * the caller grabbing the seqno and now ... */ 1230 if (reset_counter != atomic_read(&dev_priv->gpu_error.reset_counter)) 1231 end = -EAGAIN; 1232 1233 /* ... but upgrade the -EGAIN to an -EIO if the gpu is truely 1234 * gone. */ 1235 ret = i915_gem_check_wedge(&dev_priv->gpu_error, interruptible); 1236 if (ret) 1237 end = ret; 1238 } while (end == 0 && wait_forever); 1239 1240 getrawmonotonic(&now); 1241 1242 ring->irq_put(ring); 1243 trace_i915_gem_request_wait_end(ring, seqno); 1244 #undef EXIT_COND 1245 1246 if (timeout) { 1247 struct timespec sleep_time = timespec_sub(now, before); 1248 *timeout = timespec_sub(*timeout, sleep_time); 1249 if (!timespec_valid(timeout)) /* i.e. negative time remains */ 1250 set_normalized_timespec(timeout, 0, 0); 1251 } 1252 1253 switch (end) { 1254 case -EIO: 1255 case -EAGAIN: /* Wedged */ 1256 case -ERESTARTSYS: /* Signal */ 1257 return (int)end; 1258 case 0: /* Timeout */ 1259 return -ETIMEDOUT; /* -ETIME on Linux */ 1260 default: /* Completed */ 1261 WARN_ON(end < 0); /* We're not aware of other errors */ 1262 return 0; 1263 } 1264 } 1265 1266 /** 1267 * Waits for a sequence number to be signaled, and cleans up the 1268 * request and object lists appropriately for that event. 1269 */ 1270 int 1271 i915_wait_seqno(struct intel_engine_cs *ring, uint32_t seqno) 1272 { 1273 struct drm_device *dev = ring->dev; 1274 struct drm_i915_private *dev_priv = dev->dev_private; 1275 bool interruptible = dev_priv->mm.interruptible; 1276 int ret; 1277 1278 BUG_ON(!mutex_is_locked(&dev->struct_mutex)); 1279 BUG_ON(seqno == 0); 1280 1281 ret = i915_gem_check_wedge(&dev_priv->gpu_error, interruptible); 1282 if (ret) 1283 return ret; 1284 1285 ret = i915_gem_check_olr(ring, seqno); 1286 if (ret) 1287 return ret; 1288 1289 return __wait_seqno(ring, seqno, 1290 atomic_read(&dev_priv->gpu_error.reset_counter), 1291 interruptible, NULL, NULL); 1292 } 1293 1294 static int 1295 i915_gem_object_wait_rendering__tail(struct drm_i915_gem_object *obj, 1296 struct intel_engine_cs *ring) 1297 { 1298 if (!obj->active) 1299 return 0; 1300 1301 /* Manually manage the write flush as we may have not yet 1302 * retired the buffer. 1303 * 1304 * Note that the last_write_seqno is always the earlier of 1305 * the two (read/write) seqno, so if we haved successfully waited, 1306 * we know we have passed the last write. 1307 */ 1308 obj->last_write_seqno = 0; 1309 1310 return 0; 1311 } 1312 1313 /** 1314 * Ensures that all rendering to the object has completed and the object is 1315 * safe to unbind from the GTT or access from the CPU. 1316 */ 1317 static __must_check int 1318 i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj, 1319 bool readonly) 1320 { 1321 struct intel_engine_cs *ring = obj->ring; 1322 u32 seqno; 1323 int ret; 1324 1325 seqno = readonly ? obj->last_write_seqno : obj->last_read_seqno; 1326 if (seqno == 0) 1327 return 0; 1328 1329 ret = i915_wait_seqno(ring, seqno); 1330 if (ret) 1331 return ret; 1332 1333 return i915_gem_object_wait_rendering__tail(obj, ring); 1334 } 1335 1336 /* A nonblocking variant of the above wait. This is a highly dangerous routine 1337 * as the object state may change during this call. 1338 */ 1339 static __must_check int 1340 i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj, 1341 struct drm_i915_file_private *file_priv, 1342 bool readonly) 1343 { 1344 struct drm_device *dev = obj->base.dev; 1345 struct drm_i915_private *dev_priv = dev->dev_private; 1346 struct intel_engine_cs *ring = obj->ring; 1347 unsigned reset_counter; 1348 u32 seqno; 1349 int ret; 1350 1351 BUG_ON(!mutex_is_locked(&dev->struct_mutex)); 1352 BUG_ON(!dev_priv->mm.interruptible); 1353 1354 seqno = readonly ? obj->last_write_seqno : obj->last_read_seqno; 1355 if (seqno == 0) 1356 return 0; 1357 1358 ret = i915_gem_check_wedge(&dev_priv->gpu_error, true); 1359 if (ret) 1360 return ret; 1361 1362 ret = i915_gem_check_olr(ring, seqno); 1363 if (ret) 1364 return ret; 1365 1366 reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter); 1367 mutex_unlock(&dev->struct_mutex); 1368 ret = __wait_seqno(ring, seqno, reset_counter, true, NULL, file_priv); 1369 mutex_lock(&dev->struct_mutex); 1370 if (ret) 1371 return ret; 1372 1373 return i915_gem_object_wait_rendering__tail(obj, ring); 1374 } 1375 1376 /** 1377 * Called when user space prepares to use an object with the CPU, either 1378 * through the mmap ioctl's mapping or a GTT mapping. 1379 */ 1380 int 1381 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, 1382 struct drm_file *file) 1383 { 1384 struct drm_i915_gem_set_domain *args = data; 1385 struct drm_i915_gem_object *obj; 1386 uint32_t read_domains = args->read_domains; 1387 uint32_t write_domain = args->write_domain; 1388 int ret; 1389 1390 /* Only handle setting domains to types used by the CPU. */ 1391 if (write_domain & I915_GEM_GPU_DOMAINS) 1392 return -EINVAL; 1393 1394 if (read_domains & I915_GEM_GPU_DOMAINS) 1395 return -EINVAL; 1396 1397 /* Having something in the write domain implies it's in the read 1398 * domain, and only that read domain. Enforce that in the request. 1399 */ 1400 if (write_domain != 0 && read_domains != write_domain) 1401 return -EINVAL; 1402 1403 ret = i915_mutex_lock_interruptible(dev); 1404 if (ret) 1405 return ret; 1406 1407 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 1408 if (&obj->base == NULL) { 1409 ret = -ENOENT; 1410 goto unlock; 1411 } 1412 1413 /* Try to flush the object off the GPU without holding the lock. 1414 * We will repeat the flush holding the lock in the normal manner 1415 * to catch cases where we are gazumped. 1416 */ 1417 ret = i915_gem_object_wait_rendering__nonblocking(obj, 1418 file->driver_priv, 1419 !write_domain); 1420 if (ret) 1421 goto unref; 1422 1423 if (read_domains & I915_GEM_DOMAIN_GTT) { 1424 ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0); 1425 1426 /* Silently promote "you're not bound, there was nothing to do" 1427 * to success, since the client was just asking us to 1428 * make sure everything was done. 1429 */ 1430 if (ret == -EINVAL) 1431 ret = 0; 1432 } else { 1433 ret = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0); 1434 } 1435 1436 unref: 1437 drm_gem_object_unreference(&obj->base); 1438 unlock: 1439 mutex_unlock(&dev->struct_mutex); 1440 return ret; 1441 } 1442 1443 /** 1444 * Called when user space has done writes to this buffer 1445 */ 1446 int 1447 i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data, 1448 struct drm_file *file) 1449 { 1450 struct drm_i915_gem_sw_finish *args = data; 1451 struct drm_i915_gem_object *obj; 1452 int ret = 0; 1453 1454 ret = i915_mutex_lock_interruptible(dev); 1455 if (ret) 1456 return ret; 1457 1458 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 1459 if (&obj->base == NULL) { 1460 ret = -ENOENT; 1461 goto unlock; 1462 } 1463 1464 /* Pinned buffers may be scanout, so flush the cache */ 1465 if (obj->pin_display) 1466 i915_gem_object_flush_cpu_write_domain(obj, true); 1467 1468 drm_gem_object_unreference(&obj->base); 1469 unlock: 1470 mutex_unlock(&dev->struct_mutex); 1471 return ret; 1472 } 1473 1474 /** 1475 * Maps the contents of an object, returning the address it is mapped 1476 * into. 1477 * 1478 * While the mapping holds a reference on the contents of the object, it doesn't 1479 * imply a ref on the object itself. 1480 */ 1481 int 1482 i915_gem_mmap_ioctl(struct drm_device *dev, void *data, 1483 struct drm_file *file) 1484 { 1485 struct drm_i915_gem_mmap *args = data; 1486 struct drm_gem_object *obj; 1487 unsigned long addr; 1488 struct proc *p = curproc; 1489 vm_map_t map = &p->p_vmspace->vm_map; 1490 vm_size_t size; 1491 int error = 0, rv; 1492 1493 obj = drm_gem_object_lookup(dev, file, args->handle); 1494 if (obj == NULL) 1495 return -ENOENT; 1496 1497 if (args->size == 0) 1498 goto out; 1499 1500 size = round_page(args->size); 1501 if (map->size + size > p->p_rlimit[RLIMIT_VMEM].rlim_cur) { 1502 error = -ENOMEM; 1503 goto out; 1504 } 1505 1506 /* 1507 * Call hint to ensure that NULL is not returned as a valid address 1508 * and to reduce vm_map traversals. XXX causes instability, use a 1509 * fixed low address as the start point instead to avoid the NULL 1510 * return issue. 1511 */ 1512 addr = PAGE_SIZE; 1513 1514 /* 1515 * Use 256KB alignment. It is unclear why this matters for a 1516 * virtual address but it appears to fix a number of application/X 1517 * crashes and kms console switching is much faster. 1518 */ 1519 vm_object_hold(obj->vm_obj); 1520 vm_object_reference_locked(obj->vm_obj); 1521 vm_object_drop(obj->vm_obj); 1522 1523 rv = vm_map_find(map, obj->vm_obj, NULL, 1524 args->offset, &addr, args->size, 1525 256 * 1024, /* align */ 1526 TRUE, /* fitit */ 1527 VM_MAPTYPE_NORMAL, /* maptype */ 1528 VM_PROT_READ | VM_PROT_WRITE, /* prot */ 1529 VM_PROT_READ | VM_PROT_WRITE, /* max */ 1530 MAP_SHARED /* cow */); 1531 if (rv != KERN_SUCCESS) { 1532 vm_object_deallocate(obj->vm_obj); 1533 error = -vm_mmap_to_errno(rv); 1534 } else { 1535 args->addr_ptr = (uint64_t)addr; 1536 } 1537 out: 1538 drm_gem_object_unreference(obj); 1539 return (error); 1540 } 1541 1542 /** 1543 * i915_gem_fault - fault a page into the GTT 1544 * 1545 * vm_obj is locked on entry and expected to be locked on return. 1546 * 1547 * The vm_pager has placemarked the object with an anonymous memory page 1548 * which we must replace atomically to avoid races against concurrent faults 1549 * on the same page. XXX we currently are unable to do this atomically. 1550 * 1551 * If we are to return an error we should not touch the anonymous page, 1552 * the caller will deallocate it. 1553 * 1554 * XXX Most GEM calls appear to be interruptable, but we can't hard loop 1555 * in that case. Release all resources and wait 1 tick before retrying. 1556 * This is a huge problem which needs to be fixed by getting rid of most 1557 * of the interruptability. The linux code does not retry but does appear 1558 * to have some sort of mechanism (VM_FAULT_NOPAGE ?) for the higher level 1559 * to be able to retry. 1560 * 1561 * -- 1562 * 1563 * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped 1564 * from userspace. The fault handler takes care of binding the object to 1565 * the GTT (if needed), allocating and programming a fence register (again, 1566 * only if needed based on whether the old reg is still valid or the object 1567 * is tiled) and inserting a new PTE into the faulting process. 1568 * 1569 * Note that the faulting process may involve evicting existing objects 1570 * from the GTT and/or fence registers to make room. So performance may 1571 * suffer if the GTT working set is large or there are few fence registers 1572 * left. 1573 * 1574 * vm_obj is locked on entry and expected to be locked on return. The VM 1575 * pager has placed an anonymous memory page at (obj,offset) which we have 1576 * to replace. 1577 */ 1578 int i915_gem_fault(vm_object_t vm_obj, vm_ooffset_t offset, int prot, vm_page_t *mres) 1579 { 1580 struct drm_i915_gem_object *obj = to_intel_bo(vm_obj->handle); 1581 struct drm_device *dev = obj->base.dev; 1582 struct drm_i915_private *dev_priv = dev->dev_private; 1583 unsigned long page_offset; 1584 vm_page_t m, oldm = NULL; 1585 int ret = 0; 1586 int didpip = 0; 1587 bool write = !!(prot & VM_PROT_WRITE); 1588 1589 intel_runtime_pm_get(dev_priv); 1590 1591 /* We don't use vmf->pgoff since that has the fake offset */ 1592 page_offset = (unsigned long)offset; 1593 1594 retry: 1595 ret = i915_mutex_lock_interruptible(dev); 1596 if (ret) 1597 goto out; 1598 1599 trace_i915_gem_object_fault(obj, page_offset, true, write); 1600 1601 /* Try to flush the object off the GPU first without holding the lock. 1602 * Upon reacquiring the lock, we will perform our sanity checks and then 1603 * repeat the flush holding the lock in the normal manner to catch cases 1604 * where we are gazumped. 1605 */ 1606 ret = i915_gem_object_wait_rendering__nonblocking(obj, NULL, !write); 1607 if (ret) 1608 goto unlock; 1609 1610 /* Access to snoopable pages through the GTT is incoherent. */ 1611 if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(dev)) { 1612 ret = -EFAULT; 1613 goto unlock; 1614 } 1615 1616 /* 1617 * START FREEBSD MAGIC 1618 * 1619 * Add a pip count to avoid destruction and certain other 1620 * complex operations (such as collapses?) while unlocked. 1621 */ 1622 if (didpip == 0) { 1623 vm_object_pip_add(vm_obj, 1); 1624 didpip = 1; 1625 } 1626 1627 /* 1628 * XXX We must currently remove the placeholder page now to avoid 1629 * a deadlock against a concurrent i915_gem_release_mmap(). 1630 * Otherwise concurrent operation will block on the busy page 1631 * while holding locks which we need to obtain. 1632 */ 1633 if (*mres != NULL) { 1634 oldm = *mres; 1635 vm_page_remove(oldm); 1636 *mres = NULL; 1637 } else { 1638 oldm = NULL; 1639 } 1640 1641 VM_OBJECT_UNLOCK(vm_obj); 1642 ret = 0; 1643 m = NULL; 1644 1645 /* 1646 * Since the object lock was dropped, another thread might have 1647 * faulted on the same GTT address and instantiated the mapping. 1648 * Recheck. 1649 */ 1650 VM_OBJECT_LOCK(vm_obj); 1651 m = vm_page_lookup(vm_obj, OFF_TO_IDX(offset)); 1652 if (m != NULL) { 1653 /* 1654 * Try to busy the page, retry on failure (non-zero ret). 1655 */ 1656 if (vm_page_busy_try(m, false)) { 1657 kprintf("i915_gem_fault: PG_BUSY\n"); 1658 VM_OBJECT_UNLOCK(vm_obj); 1659 mutex_unlock(&dev->struct_mutex); 1660 int dummy; 1661 tsleep(&dummy, 0, "delay", 1); /* XXX */ 1662 VM_OBJECT_LOCK(vm_obj); 1663 goto retry; 1664 } 1665 goto have_page; 1666 } 1667 /* 1668 * END FREEBSD MAGIC 1669 */ 1670 1671 /* 1672 * Object must be unlocked here to avoid deadlock during 1673 * other GEM calls. All goto targets expect the object to 1674 * be locked. 1675 */ 1676 VM_OBJECT_UNLOCK(vm_obj); 1677 1678 /* Now bind it into the GTT if needed */ 1679 ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE); 1680 if (ret) { 1681 VM_OBJECT_LOCK(vm_obj); 1682 goto unlock; 1683 } 1684 1685 ret = i915_gem_object_set_to_gtt_domain(obj, write); 1686 if (ret) { 1687 VM_OBJECT_LOCK(vm_obj); 1688 goto unpin; 1689 } 1690 1691 ret = i915_gem_object_get_fence(obj); 1692 if (ret) { 1693 VM_OBJECT_LOCK(vm_obj); 1694 goto unpin; 1695 } 1696 1697 obj->fault_mappable = true; 1698 1699 /* 1700 * Relock object for insertion, leave locked for return. 1701 */ 1702 VM_OBJECT_LOCK(vm_obj); 1703 m = vm_phys_fictitious_to_vm_page(dev->agp->base + 1704 i915_gem_obj_ggtt_offset(obj) + 1705 offset); 1706 if (m == NULL) { 1707 ret = -EFAULT; 1708 goto unpin; 1709 } 1710 KASSERT((m->flags & PG_FICTITIOUS) != 0, ("not fictitious %p", m)); 1711 KASSERT(m->wire_count == 1, ("wire_count not 1 %p", m)); 1712 1713 /* 1714 * Try to busy the page. Fails on non-zero return. 1715 */ 1716 if (vm_page_busy_try(m, false)) { 1717 VM_OBJECT_UNLOCK(vm_obj); 1718 i915_gem_object_ggtt_unpin(obj); 1719 kprintf("i915_gem_fault: PG_BUSY(2)\n"); 1720 i915_gem_object_ggtt_unpin(obj); 1721 mutex_unlock(&dev->struct_mutex); 1722 int dummy; 1723 tsleep(&dummy, 0, "delay", 1); /* XXX */ 1724 VM_OBJECT_LOCK(vm_obj); 1725 goto retry; 1726 } 1727 m->valid = VM_PAGE_BITS_ALL; 1728 1729 /* 1730 * Finally, remap it using the new GTT offset. 1731 * 1732 * (object expected to be in a locked state) 1733 */ 1734 vm_page_insert(m, vm_obj, OFF_TO_IDX(offset)); 1735 have_page: 1736 *mres = m; 1737 1738 i915_gem_object_ggtt_unpin(obj); 1739 mutex_unlock(&dev->struct_mutex); 1740 if (oldm != NULL) 1741 vm_page_free(oldm); 1742 if (didpip) 1743 vm_object_pip_wakeup(vm_obj); 1744 return (VM_PAGER_OK); 1745 1746 /* 1747 * ALTERNATIVE ERROR RETURN. 1748 * 1749 * OBJECT EXPECTED TO BE LOCKED. 1750 */ 1751 unpin: 1752 i915_gem_object_ggtt_unpin(obj); 1753 unlock: 1754 mutex_unlock(&dev->struct_mutex); 1755 out: 1756 switch (ret) { 1757 case -EIO: 1758 /* If this -EIO is due to a gpu hang, give the reset code a 1759 * chance to clean up the mess. Otherwise return the proper 1760 * SIGBUS. */ 1761 if (i915_terminally_wedged(&dev_priv->gpu_error)) { 1762 // ret = VM_FAULT_SIGBUS; 1763 break; 1764 } 1765 /* fall through */ 1766 case -EAGAIN: 1767 /* 1768 * EAGAIN means the gpu is hung and we'll wait for the error 1769 * handler to reset everything when re-faulting in 1770 * i915_mutex_lock_interruptible. 1771 */ 1772 /* fall through */ 1773 case -ERESTARTSYS: 1774 case -EINTR: 1775 kprintf("i915_gem_fault: %d\n", ret); 1776 VM_OBJECT_UNLOCK(vm_obj); 1777 int dummy; 1778 tsleep(&dummy, 0, "delay", 1); /* XXX */ 1779 VM_OBJECT_LOCK(vm_obj); 1780 goto retry; 1781 default: 1782 WARN_ONCE(ret, "unhandled error in i915_gem_fault: %i\n", ret); 1783 ret = VM_PAGER_ERROR; 1784 break; 1785 } 1786 1787 intel_runtime_pm_put(dev_priv); 1788 1789 /* 1790 * Error return. We already NULL'd out *mres so we should be able 1791 * to free (oldm) here even though we are returning an error and the 1792 * caller usually handles the freeing. 1793 */ 1794 if (oldm != NULL) 1795 vm_page_free(oldm); 1796 if (didpip) 1797 vm_object_pip_wakeup(vm_obj); 1798 1799 return ret; 1800 } 1801 1802 /** 1803 * i915_gem_release_mmap - remove physical page mappings 1804 * @obj: obj in question 1805 * 1806 * Preserve the reservation of the mmapping with the DRM core code, but 1807 * relinquish ownership of the pages back to the system. 1808 * 1809 * It is vital that we remove the page mapping if we have mapped a tiled 1810 * object through the GTT and then lose the fence register due to 1811 * resource pressure. Similarly if the object has been moved out of the 1812 * aperture, than pages mapped into userspace must be revoked. Removing the 1813 * mapping will then trigger a page fault on the next user access, allowing 1814 * fixup by i915_gem_fault(). 1815 */ 1816 void 1817 i915_gem_release_mmap(struct drm_i915_gem_object *obj) 1818 { 1819 vm_object_t devobj; 1820 vm_page_t m; 1821 int i, page_count; 1822 1823 if (!obj->fault_mappable) 1824 return; 1825 1826 devobj = cdev_pager_lookup(obj); 1827 if (devobj != NULL) { 1828 page_count = OFF_TO_IDX(obj->base.size); 1829 1830 VM_OBJECT_LOCK(devobj); 1831 for (i = 0; i < page_count; i++) { 1832 m = vm_page_lookup_busy_wait(devobj, i, TRUE, "915unm"); 1833 if (m == NULL) 1834 continue; 1835 cdev_pager_free_page(devobj, m); 1836 } 1837 VM_OBJECT_UNLOCK(devobj); 1838 vm_object_deallocate(devobj); 1839 } 1840 1841 obj->fault_mappable = false; 1842 } 1843 1844 void 1845 i915_gem_release_all_mmaps(struct drm_i915_private *dev_priv) 1846 { 1847 struct drm_i915_gem_object *obj; 1848 1849 list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) 1850 i915_gem_release_mmap(obj); 1851 } 1852 1853 uint32_t 1854 i915_gem_get_gtt_size(struct drm_device *dev, uint32_t size, int tiling_mode) 1855 { 1856 uint32_t gtt_size; 1857 1858 if (INTEL_INFO(dev)->gen >= 4 || 1859 tiling_mode == I915_TILING_NONE) 1860 return size; 1861 1862 /* Previous chips need a power-of-two fence region when tiling */ 1863 if (INTEL_INFO(dev)->gen == 3) 1864 gtt_size = 1024*1024; 1865 else 1866 gtt_size = 512*1024; 1867 1868 while (gtt_size < size) 1869 gtt_size <<= 1; 1870 1871 return gtt_size; 1872 } 1873 1874 /** 1875 * i915_gem_get_gtt_alignment - return required GTT alignment for an object 1876 * @obj: object to check 1877 * 1878 * Return the required GTT alignment for an object, taking into account 1879 * potential fence register mapping. 1880 */ 1881 uint32_t 1882 i915_gem_get_gtt_alignment(struct drm_device *dev, uint32_t size, 1883 int tiling_mode, bool fenced) 1884 { 1885 /* 1886 * Minimum alignment is 4k (GTT page size), but might be greater 1887 * if a fence register is needed for the object. 1888 */ 1889 if (INTEL_INFO(dev)->gen >= 4 || (!fenced && IS_G33(dev)) || 1890 tiling_mode == I915_TILING_NONE) 1891 return 4096; 1892 1893 /* 1894 * Previous chips need to be aligned to the size of the smallest 1895 * fence register that can contain the object. 1896 */ 1897 return i915_gem_get_gtt_size(dev, size, tiling_mode); 1898 } 1899 1900 static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj) 1901 { 1902 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 1903 int ret; 1904 1905 #if 0 1906 if (drm_vma_node_has_offset(&obj->base.vma_node)) 1907 return 0; 1908 #endif 1909 1910 dev_priv->mm.shrinker_no_lock_stealing = true; 1911 1912 ret = drm_gem_create_mmap_offset(&obj->base); 1913 if (ret != -ENOSPC) 1914 goto out; 1915 1916 /* Badly fragmented mmap space? The only way we can recover 1917 * space is by destroying unwanted objects. We can't randomly release 1918 * mmap_offsets as userspace expects them to be persistent for the 1919 * lifetime of the objects. The closest we can is to release the 1920 * offsets on purgeable objects by truncating it and marking it purged, 1921 * which prevents userspace from ever using that object again. 1922 */ 1923 i915_gem_purge(dev_priv, obj->base.size >> PAGE_SHIFT); 1924 ret = drm_gem_create_mmap_offset(&obj->base); 1925 if (ret != -ENOSPC) 1926 goto out; 1927 1928 i915_gem_shrink_all(dev_priv); 1929 ret = drm_gem_create_mmap_offset(&obj->base); 1930 out: 1931 dev_priv->mm.shrinker_no_lock_stealing = false; 1932 1933 return ret; 1934 } 1935 1936 static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj) 1937 { 1938 drm_gem_free_mmap_offset(&obj->base); 1939 } 1940 1941 int 1942 i915_gem_mmap_gtt(struct drm_file *file, 1943 struct drm_device *dev, 1944 uint32_t handle, 1945 uint64_t *offset) 1946 { 1947 struct drm_i915_private *dev_priv = dev->dev_private; 1948 struct drm_i915_gem_object *obj; 1949 int ret; 1950 1951 ret = i915_mutex_lock_interruptible(dev); 1952 if (ret) 1953 return ret; 1954 1955 obj = to_intel_bo(drm_gem_object_lookup(dev, file, handle)); 1956 if (&obj->base == NULL) { 1957 ret = -ENOENT; 1958 goto unlock; 1959 } 1960 1961 if (obj->base.size > dev_priv->gtt.mappable_end) { 1962 ret = -E2BIG; 1963 goto out; 1964 } 1965 1966 if (obj->madv != I915_MADV_WILLNEED) { 1967 DRM_DEBUG("Attempting to mmap a purgeable buffer\n"); 1968 ret = -EFAULT; 1969 goto out; 1970 } 1971 1972 ret = i915_gem_object_create_mmap_offset(obj); 1973 if (ret) 1974 goto out; 1975 1976 *offset = DRM_GEM_MAPPING_OFF(obj->base.map_list.key) | 1977 DRM_GEM_MAPPING_KEY; 1978 1979 out: 1980 drm_gem_object_unreference(&obj->base); 1981 unlock: 1982 mutex_unlock(&dev->struct_mutex); 1983 return ret; 1984 } 1985 1986 /** 1987 * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing 1988 * @dev: DRM device 1989 * @data: GTT mapping ioctl data 1990 * @file: GEM object info 1991 * 1992 * Simply returns the fake offset to userspace so it can mmap it. 1993 * The mmap call will end up in drm_gem_mmap(), which will set things 1994 * up so we can get faults in the handler above. 1995 * 1996 * The fault handler will take care of binding the object into the GTT 1997 * (since it may have been evicted to make room for something), allocating 1998 * a fence register, and mapping the appropriate aperture address into 1999 * userspace. 2000 */ 2001 int 2002 i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data, 2003 struct drm_file *file) 2004 { 2005 struct drm_i915_gem_mmap_gtt *args = data; 2006 2007 return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset); 2008 } 2009 2010 static inline int 2011 i915_gem_object_is_purgeable(struct drm_i915_gem_object *obj) 2012 { 2013 return obj->madv == I915_MADV_DONTNEED; 2014 } 2015 2016 /* Immediately discard the backing storage */ 2017 static void 2018 i915_gem_object_truncate(struct drm_i915_gem_object *obj) 2019 { 2020 vm_object_t vm_obj; 2021 2022 vm_obj = obj->base.vm_obj; 2023 VM_OBJECT_LOCK(vm_obj); 2024 vm_object_page_remove(vm_obj, 0, 0, false); 2025 VM_OBJECT_UNLOCK(vm_obj); 2026 2027 obj->madv = __I915_MADV_PURGED; 2028 } 2029 2030 /* Try to discard unwanted pages */ 2031 static void 2032 i915_gem_object_invalidate(struct drm_i915_gem_object *obj) 2033 { 2034 #if 0 2035 struct address_space *mapping; 2036 #endif 2037 2038 switch (obj->madv) { 2039 case I915_MADV_DONTNEED: 2040 i915_gem_object_truncate(obj); 2041 case __I915_MADV_PURGED: 2042 return; 2043 } 2044 2045 #if 0 2046 if (obj->base.filp == NULL) 2047 return; 2048 2049 mapping = file_inode(obj->base.filp)->i_mapping, 2050 invalidate_mapping_pages(mapping, 0, (loff_t)-1); 2051 #endif 2052 } 2053 2054 static void 2055 i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj) 2056 { 2057 int page_count = obj->base.size / PAGE_SIZE; 2058 int i, ret; 2059 2060 if (!obj->pages) 2061 return; 2062 2063 BUG_ON(obj->madv == __I915_MADV_PURGED); 2064 2065 ret = i915_gem_object_set_to_cpu_domain(obj, true); 2066 if (ret) { 2067 /* In the event of a disaster, abandon all caches and 2068 * hope for the best. 2069 */ 2070 WARN_ON(ret != -EIO); 2071 i915_gem_clflush_object(obj, true); 2072 obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU; 2073 } 2074 2075 if (i915_gem_object_needs_bit17_swizzle(obj)) 2076 i915_gem_object_save_bit_17_swizzle(obj); 2077 2078 if (obj->madv == I915_MADV_DONTNEED) 2079 obj->dirty = 0; 2080 2081 for (i = 0; i < page_count; i++) { 2082 struct vm_page *page = obj->pages[i]; 2083 2084 if (obj->dirty) 2085 set_page_dirty(page); 2086 2087 if (obj->madv == I915_MADV_WILLNEED) 2088 mark_page_accessed(page); 2089 2090 vm_page_busy_wait(obj->pages[i], FALSE, "i915gem"); 2091 vm_page_unwire(obj->pages[i], 1); 2092 vm_page_wakeup(obj->pages[i]); 2093 } 2094 obj->dirty = 0; 2095 2096 kfree(obj->pages); 2097 obj->pages = NULL; 2098 } 2099 2100 int 2101 i915_gem_object_put_pages(struct drm_i915_gem_object *obj) 2102 { 2103 const struct drm_i915_gem_object_ops *ops = obj->ops; 2104 2105 if (obj->pages == NULL) 2106 return 0; 2107 2108 if (obj->pages_pin_count) 2109 return -EBUSY; 2110 2111 BUG_ON(i915_gem_obj_bound_any(obj)); 2112 2113 /* ->put_pages might need to allocate memory for the bit17 swizzle 2114 * array, hence protect them from being reaped by removing them from gtt 2115 * lists early. */ 2116 list_del(&obj->global_list); 2117 2118 ops->put_pages(obj); 2119 obj->pages = NULL; 2120 2121 i915_gem_object_invalidate(obj); 2122 2123 return 0; 2124 } 2125 2126 static unsigned long 2127 __i915_gem_shrink(struct drm_i915_private *dev_priv, long target, 2128 bool purgeable_only) 2129 { 2130 struct list_head still_in_list; 2131 struct drm_i915_gem_object *obj; 2132 unsigned long count = 0; 2133 2134 /* 2135 * As we may completely rewrite the (un)bound list whilst unbinding 2136 * (due to retiring requests) we have to strictly process only 2137 * one element of the list at the time, and recheck the list 2138 * on every iteration. 2139 * 2140 * In particular, we must hold a reference whilst removing the 2141 * object as we may end up waiting for and/or retiring the objects. 2142 * This might release the final reference (held by the active list) 2143 * and result in the object being freed from under us. This is 2144 * similar to the precautions the eviction code must take whilst 2145 * removing objects. 2146 * 2147 * Also note that although these lists do not hold a reference to 2148 * the object we can safely grab one here: The final object 2149 * unreferencing and the bound_list are both protected by the 2150 * dev->struct_mutex and so we won't ever be able to observe an 2151 * object on the bound_list with a reference count equals 0. 2152 */ 2153 INIT_LIST_HEAD(&still_in_list); 2154 while (count < target && !list_empty(&dev_priv->mm.unbound_list)) { 2155 obj = list_first_entry(&dev_priv->mm.unbound_list, 2156 typeof(*obj), global_list); 2157 list_move_tail(&obj->global_list, &still_in_list); 2158 2159 if (!i915_gem_object_is_purgeable(obj) && purgeable_only) 2160 continue; 2161 2162 drm_gem_object_reference(&obj->base); 2163 2164 if (i915_gem_object_put_pages(obj) == 0) 2165 count += obj->base.size >> PAGE_SHIFT; 2166 2167 drm_gem_object_unreference(&obj->base); 2168 } 2169 list_splice(&still_in_list, &dev_priv->mm.unbound_list); 2170 2171 INIT_LIST_HEAD(&still_in_list); 2172 while (count < target && !list_empty(&dev_priv->mm.bound_list)) { 2173 struct i915_vma *vma, *v; 2174 2175 obj = list_first_entry(&dev_priv->mm.bound_list, 2176 typeof(*obj), global_list); 2177 list_move_tail(&obj->global_list, &still_in_list); 2178 2179 if (!i915_gem_object_is_purgeable(obj) && purgeable_only) 2180 continue; 2181 2182 drm_gem_object_reference(&obj->base); 2183 2184 list_for_each_entry_safe(vma, v, &obj->vma_list, vma_link) 2185 if (i915_vma_unbind(vma)) 2186 break; 2187 2188 if (i915_gem_object_put_pages(obj) == 0) 2189 count += obj->base.size >> PAGE_SHIFT; 2190 2191 drm_gem_object_unreference(&obj->base); 2192 } 2193 list_splice(&still_in_list, &dev_priv->mm.bound_list); 2194 2195 return count; 2196 } 2197 2198 static unsigned long 2199 i915_gem_purge(struct drm_i915_private *dev_priv, long target) 2200 { 2201 return __i915_gem_shrink(dev_priv, target, true); 2202 } 2203 2204 static unsigned long 2205 i915_gem_shrink_all(struct drm_i915_private *dev_priv) 2206 { 2207 i915_gem_evict_everything(dev_priv->dev); 2208 return __i915_gem_shrink(dev_priv, LONG_MAX, false); 2209 } 2210 2211 static int 2212 i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) 2213 { 2214 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 2215 int page_count, i, j; 2216 vm_object_t vm_obj; 2217 struct vm_page *page; 2218 2219 /* Assert that the object is not currently in any GPU domain. As it 2220 * wasn't in the GTT, there shouldn't be any way it could have been in 2221 * a GPU cache 2222 */ 2223 BUG_ON(obj->base.read_domains & I915_GEM_GPU_DOMAINS); 2224 BUG_ON(obj->base.write_domain & I915_GEM_GPU_DOMAINS); 2225 2226 page_count = obj->base.size / PAGE_SIZE; 2227 obj->pages = kmalloc(page_count * sizeof(vm_page_t), M_DRM, 2228 M_WAITOK); 2229 2230 /* Get the list of pages out of our struct file. They'll be pinned 2231 * at this point until we release them. 2232 * 2233 * Fail silently without starting the shrinker 2234 */ 2235 vm_obj = obj->base.vm_obj; 2236 VM_OBJECT_LOCK(vm_obj); 2237 for (i = 0; i < page_count; i++) { 2238 page = shmem_read_mapping_page(vm_obj, i); 2239 if (IS_ERR(page)) { 2240 i915_gem_purge(dev_priv, page_count); 2241 page = shmem_read_mapping_page(vm_obj, i); 2242 } 2243 if (IS_ERR(page)) { 2244 /* We've tried hard to allocate the memory by reaping 2245 * our own buffer, now let the real VM do its job and 2246 * go down in flames if truly OOM. 2247 */ 2248 2249 i915_gem_shrink_all(dev_priv); 2250 page = shmem_read_mapping_page(vm_obj, i); 2251 if (IS_ERR(page)) 2252 goto err_pages; 2253 } 2254 #ifdef CONFIG_SWIOTLB 2255 if (swiotlb_nr_tbl()) { 2256 st->nents++; 2257 sg_set_page(sg, page, PAGE_SIZE, 0); 2258 sg = sg_next(sg); 2259 continue; 2260 } 2261 #endif 2262 obj->pages[i] = page; 2263 } 2264 #ifdef CONFIG_SWIOTLB 2265 if (!swiotlb_nr_tbl()) 2266 #endif 2267 VM_OBJECT_UNLOCK(vm_obj); 2268 2269 if (i915_gem_object_needs_bit17_swizzle(obj)) 2270 i915_gem_object_do_bit_17_swizzle(obj); 2271 2272 return 0; 2273 2274 err_pages: 2275 for (j = 0; j < i; j++) { 2276 page = obj->pages[j]; 2277 vm_page_busy_wait(page, FALSE, "i915gem"); 2278 vm_page_unwire(page, 0); 2279 vm_page_wakeup(page); 2280 } 2281 VM_OBJECT_UNLOCK(vm_obj); 2282 kfree(obj->pages); 2283 obj->pages = NULL; 2284 return (-EIO); 2285 } 2286 2287 /* Ensure that the associated pages are gathered from the backing storage 2288 * and pinned into our object. i915_gem_object_get_pages() may be called 2289 * multiple times before they are released by a single call to 2290 * i915_gem_object_put_pages() - once the pages are no longer referenced 2291 * either as a result of memory pressure (reaping pages under the shrinker) 2292 * or as the object is itself released. 2293 */ 2294 int 2295 i915_gem_object_get_pages(struct drm_i915_gem_object *obj) 2296 { 2297 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 2298 const struct drm_i915_gem_object_ops *ops = obj->ops; 2299 int ret; 2300 2301 if (obj->pages) 2302 return 0; 2303 2304 if (obj->madv != I915_MADV_WILLNEED) { 2305 DRM_DEBUG("Attempting to obtain a purgeable object\n"); 2306 return -EFAULT; 2307 } 2308 2309 BUG_ON(obj->pages_pin_count); 2310 2311 ret = ops->get_pages(obj); 2312 if (ret) 2313 return ret; 2314 2315 list_add_tail(&obj->global_list, &dev_priv->mm.unbound_list); 2316 return 0; 2317 } 2318 2319 static void 2320 i915_gem_object_move_to_active(struct drm_i915_gem_object *obj, 2321 struct intel_engine_cs *ring) 2322 { 2323 struct drm_device *dev = obj->base.dev; 2324 struct drm_i915_private *dev_priv = dev->dev_private; 2325 u32 seqno = intel_ring_get_seqno(ring); 2326 2327 BUG_ON(ring == NULL); 2328 if (obj->ring != ring && obj->last_write_seqno) { 2329 /* Keep the seqno relative to the current ring */ 2330 obj->last_write_seqno = seqno; 2331 } 2332 obj->ring = ring; 2333 2334 /* Add a reference if we're newly entering the active list. */ 2335 if (!obj->active) { 2336 drm_gem_object_reference(&obj->base); 2337 obj->active = 1; 2338 } 2339 2340 list_move_tail(&obj->ring_list, &ring->active_list); 2341 2342 obj->last_read_seqno = seqno; 2343 2344 if (obj->fenced_gpu_access) { 2345 obj->last_fenced_seqno = seqno; 2346 2347 /* Bump MRU to take account of the delayed flush */ 2348 if (obj->fence_reg != I915_FENCE_REG_NONE) { 2349 struct drm_i915_fence_reg *reg; 2350 2351 reg = &dev_priv->fence_regs[obj->fence_reg]; 2352 list_move_tail(®->lru_list, 2353 &dev_priv->mm.fence_list); 2354 } 2355 } 2356 } 2357 2358 void i915_vma_move_to_active(struct i915_vma *vma, 2359 struct intel_engine_cs *ring) 2360 { 2361 list_move_tail(&vma->mm_list, &vma->vm->active_list); 2362 return i915_gem_object_move_to_active(vma->obj, ring); 2363 } 2364 2365 static void 2366 i915_gem_object_move_to_inactive(struct drm_i915_gem_object *obj) 2367 { 2368 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 2369 struct i915_address_space *vm; 2370 struct i915_vma *vma; 2371 2372 BUG_ON(obj->base.write_domain & ~I915_GEM_GPU_DOMAINS); 2373 BUG_ON(!obj->active); 2374 2375 list_for_each_entry(vm, &dev_priv->vm_list, global_link) { 2376 vma = i915_gem_obj_to_vma(obj, vm); 2377 if (vma && !list_empty(&vma->mm_list)) 2378 list_move_tail(&vma->mm_list, &vm->inactive_list); 2379 } 2380 2381 list_del_init(&obj->ring_list); 2382 obj->ring = NULL; 2383 2384 obj->last_read_seqno = 0; 2385 obj->last_write_seqno = 0; 2386 obj->base.write_domain = 0; 2387 2388 obj->last_fenced_seqno = 0; 2389 obj->fenced_gpu_access = false; 2390 2391 obj->active = 0; 2392 drm_gem_object_unreference(&obj->base); 2393 2394 WARN_ON(i915_verify_lists(dev)); 2395 } 2396 2397 static void 2398 i915_gem_object_retire(struct drm_i915_gem_object *obj) 2399 { 2400 struct intel_engine_cs *ring = obj->ring; 2401 2402 if (ring == NULL) 2403 return; 2404 2405 if (i915_seqno_passed(ring->get_seqno(ring, true), 2406 obj->last_read_seqno)) 2407 i915_gem_object_move_to_inactive(obj); 2408 } 2409 2410 static int 2411 i915_gem_init_seqno(struct drm_device *dev, u32 seqno) 2412 { 2413 struct drm_i915_private *dev_priv = dev->dev_private; 2414 struct intel_engine_cs *ring; 2415 int ret, i, j; 2416 2417 /* Carefully retire all requests without writing to the rings */ 2418 for_each_ring(ring, dev_priv, i) { 2419 ret = intel_ring_idle(ring); 2420 if (ret) 2421 return ret; 2422 } 2423 i915_gem_retire_requests(dev); 2424 2425 /* Finally reset hw state */ 2426 for_each_ring(ring, dev_priv, i) { 2427 intel_ring_init_seqno(ring, seqno); 2428 2429 for (j = 0; j < ARRAY_SIZE(ring->semaphore.sync_seqno); j++) 2430 ring->semaphore.sync_seqno[j] = 0; 2431 } 2432 2433 return 0; 2434 } 2435 2436 int i915_gem_set_seqno(struct drm_device *dev, u32 seqno) 2437 { 2438 struct drm_i915_private *dev_priv = dev->dev_private; 2439 int ret; 2440 2441 if (seqno == 0) 2442 return -EINVAL; 2443 2444 /* HWS page needs to be set less than what we 2445 * will inject to ring 2446 */ 2447 ret = i915_gem_init_seqno(dev, seqno - 1); 2448 if (ret) 2449 return ret; 2450 2451 /* Carefully set the last_seqno value so that wrap 2452 * detection still works 2453 */ 2454 dev_priv->next_seqno = seqno; 2455 dev_priv->last_seqno = seqno - 1; 2456 if (dev_priv->last_seqno == 0) 2457 dev_priv->last_seqno--; 2458 2459 return 0; 2460 } 2461 2462 int 2463 i915_gem_get_seqno(struct drm_device *dev, u32 *seqno) 2464 { 2465 struct drm_i915_private *dev_priv = dev->dev_private; 2466 2467 /* reserve 0 for non-seqno */ 2468 if (dev_priv->next_seqno == 0) { 2469 int ret = i915_gem_init_seqno(dev, 0); 2470 if (ret) 2471 return ret; 2472 2473 dev_priv->next_seqno = 1; 2474 } 2475 2476 *seqno = dev_priv->last_seqno = dev_priv->next_seqno++; 2477 return 0; 2478 } 2479 2480 int __i915_add_request(struct intel_engine_cs *ring, 2481 struct drm_file *file, 2482 struct drm_i915_gem_object *obj, 2483 u32 *out_seqno) 2484 { 2485 struct drm_i915_private *dev_priv = ring->dev->dev_private; 2486 struct drm_i915_gem_request *request; 2487 u32 request_ring_position, request_start; 2488 int ret; 2489 2490 request_start = intel_ring_get_tail(ring); 2491 /* 2492 * Emit any outstanding flushes - execbuf can fail to emit the flush 2493 * after having emitted the batchbuffer command. Hence we need to fix 2494 * things up similar to emitting the lazy request. The difference here 2495 * is that the flush _must_ happen before the next request, no matter 2496 * what. 2497 */ 2498 ret = intel_ring_flush_all_caches(ring); 2499 if (ret) 2500 return ret; 2501 2502 request = ring->preallocated_lazy_request; 2503 if (WARN_ON(request == NULL)) 2504 return -ENOMEM; 2505 2506 /* Record the position of the start of the request so that 2507 * should we detect the updated seqno part-way through the 2508 * GPU processing the request, we never over-estimate the 2509 * position of the head. 2510 */ 2511 request_ring_position = intel_ring_get_tail(ring); 2512 2513 ret = ring->add_request(ring); 2514 if (ret) 2515 return ret; 2516 2517 request->seqno = intel_ring_get_seqno(ring); 2518 request->ring = ring; 2519 request->head = request_start; 2520 request->tail = request_ring_position; 2521 2522 /* Whilst this request exists, batch_obj will be on the 2523 * active_list, and so will hold the active reference. Only when this 2524 * request is retired will the the batch_obj be moved onto the 2525 * inactive_list and lose its active reference. Hence we do not need 2526 * to explicitly hold another reference here. 2527 */ 2528 request->batch_obj = obj; 2529 2530 /* Hold a reference to the current context so that we can inspect 2531 * it later in case a hangcheck error event fires. 2532 */ 2533 request->ctx = ring->last_context; 2534 if (request->ctx) 2535 i915_gem_context_reference(request->ctx); 2536 2537 request->emitted_jiffies = jiffies; 2538 list_add_tail(&request->list, &ring->request_list); 2539 request->file_priv = NULL; 2540 2541 if (file) { 2542 struct drm_i915_file_private *file_priv = file->driver_priv; 2543 2544 spin_lock(&file_priv->mm.lock); 2545 request->file_priv = file_priv; 2546 list_add_tail(&request->client_list, 2547 &file_priv->mm.request_list); 2548 spin_unlock(&file_priv->mm.lock); 2549 } 2550 2551 trace_i915_gem_request_add(ring, request->seqno); 2552 ring->outstanding_lazy_seqno = 0; 2553 ring->preallocated_lazy_request = NULL; 2554 2555 if (!dev_priv->ums.mm_suspended) { 2556 i915_queue_hangcheck(ring->dev); 2557 2558 cancel_delayed_work_sync(&dev_priv->mm.idle_work); 2559 queue_delayed_work(dev_priv->wq, 2560 &dev_priv->mm.retire_work, 2561 round_jiffies_up_relative(HZ)); 2562 intel_mark_busy(dev_priv->dev); 2563 } 2564 2565 if (out_seqno) 2566 *out_seqno = request->seqno; 2567 return 0; 2568 } 2569 2570 static inline void 2571 i915_gem_request_remove_from_client(struct drm_i915_gem_request *request) 2572 { 2573 struct drm_i915_file_private *file_priv = request->file_priv; 2574 2575 if (!file_priv) 2576 return; 2577 2578 spin_lock(&file_priv->mm.lock); 2579 list_del(&request->client_list); 2580 request->file_priv = NULL; 2581 spin_unlock(&file_priv->mm.lock); 2582 } 2583 2584 static bool i915_context_is_banned(struct drm_i915_private *dev_priv, 2585 const struct intel_context *ctx) 2586 { 2587 unsigned long elapsed; 2588 2589 elapsed = get_seconds() - ctx->hang_stats.guilty_ts; 2590 2591 if (ctx->hang_stats.banned) 2592 return true; 2593 2594 if (elapsed <= DRM_I915_CTX_BAN_PERIOD) { 2595 if (!i915_gem_context_is_default(ctx)) { 2596 DRM_DEBUG("context hanging too fast, banning!\n"); 2597 return true; 2598 } else if (i915_stop_ring_allow_ban(dev_priv)) { 2599 if (i915_stop_ring_allow_warn(dev_priv)) 2600 DRM_ERROR("gpu hanging too fast, banning!\n"); 2601 return true; 2602 } 2603 } 2604 2605 return false; 2606 } 2607 2608 static void i915_set_reset_status(struct drm_i915_private *dev_priv, 2609 struct intel_context *ctx, 2610 const bool guilty) 2611 { 2612 struct i915_ctx_hang_stats *hs; 2613 2614 if (WARN_ON(!ctx)) 2615 return; 2616 2617 hs = &ctx->hang_stats; 2618 2619 if (guilty) { 2620 hs->banned = i915_context_is_banned(dev_priv, ctx); 2621 hs->batch_active++; 2622 hs->guilty_ts = get_seconds(); 2623 } else { 2624 hs->batch_pending++; 2625 } 2626 } 2627 2628 static void i915_gem_free_request(struct drm_i915_gem_request *request) 2629 { 2630 list_del(&request->list); 2631 i915_gem_request_remove_from_client(request); 2632 2633 if (request->ctx) 2634 i915_gem_context_unreference(request->ctx); 2635 2636 kfree(request); 2637 } 2638 2639 struct drm_i915_gem_request * 2640 i915_gem_find_active_request(struct intel_engine_cs *ring) 2641 { 2642 struct drm_i915_gem_request *request; 2643 u32 completed_seqno; 2644 2645 completed_seqno = ring->get_seqno(ring, false); 2646 2647 list_for_each_entry(request, &ring->request_list, list) { 2648 if (i915_seqno_passed(completed_seqno, request->seqno)) 2649 continue; 2650 2651 return request; 2652 } 2653 2654 return NULL; 2655 } 2656 2657 static void i915_gem_reset_ring_status(struct drm_i915_private *dev_priv, 2658 struct intel_engine_cs *ring) 2659 { 2660 struct drm_i915_gem_request *request; 2661 bool ring_hung; 2662 2663 request = i915_gem_find_active_request(ring); 2664 2665 if (request == NULL) 2666 return; 2667 2668 ring_hung = ring->hangcheck.score >= HANGCHECK_SCORE_RING_HUNG; 2669 2670 i915_set_reset_status(dev_priv, request->ctx, ring_hung); 2671 2672 list_for_each_entry_continue(request, &ring->request_list, list) 2673 i915_set_reset_status(dev_priv, request->ctx, false); 2674 } 2675 2676 static void i915_gem_reset_ring_cleanup(struct drm_i915_private *dev_priv, 2677 struct intel_engine_cs *ring) 2678 { 2679 while (!list_empty(&ring->active_list)) { 2680 struct drm_i915_gem_object *obj; 2681 2682 obj = list_first_entry(&ring->active_list, 2683 struct drm_i915_gem_object, 2684 ring_list); 2685 2686 i915_gem_object_move_to_inactive(obj); 2687 } 2688 2689 /* 2690 * We must free the requests after all the corresponding objects have 2691 * been moved off active lists. Which is the same order as the normal 2692 * retire_requests function does. This is important if object hold 2693 * implicit references on things like e.g. ppgtt address spaces through 2694 * the request. 2695 */ 2696 while (!list_empty(&ring->request_list)) { 2697 struct drm_i915_gem_request *request; 2698 2699 request = list_first_entry(&ring->request_list, 2700 struct drm_i915_gem_request, 2701 list); 2702 2703 i915_gem_free_request(request); 2704 } 2705 2706 /* These may not have been flush before the reset, do so now */ 2707 kfree(ring->preallocated_lazy_request); 2708 ring->preallocated_lazy_request = NULL; 2709 ring->outstanding_lazy_seqno = 0; 2710 } 2711 2712 void i915_gem_restore_fences(struct drm_device *dev) 2713 { 2714 struct drm_i915_private *dev_priv = dev->dev_private; 2715 int i; 2716 2717 for (i = 0; i < dev_priv->num_fence_regs; i++) { 2718 struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i]; 2719 2720 /* 2721 * Commit delayed tiling changes if we have an object still 2722 * attached to the fence, otherwise just clear the fence. 2723 */ 2724 if (reg->obj) { 2725 i915_gem_object_update_fence(reg->obj, reg, 2726 reg->obj->tiling_mode); 2727 } else { 2728 i915_gem_write_fence(dev, i, NULL); 2729 } 2730 } 2731 } 2732 2733 void i915_gem_reset(struct drm_device *dev) 2734 { 2735 struct drm_i915_private *dev_priv = dev->dev_private; 2736 struct intel_engine_cs *ring; 2737 int i; 2738 2739 /* 2740 * Before we free the objects from the requests, we need to inspect 2741 * them for finding the guilty party. As the requests only borrow 2742 * their reference to the objects, the inspection must be done first. 2743 */ 2744 for_each_ring(ring, dev_priv, i) 2745 i915_gem_reset_ring_status(dev_priv, ring); 2746 2747 for_each_ring(ring, dev_priv, i) 2748 i915_gem_reset_ring_cleanup(dev_priv, ring); 2749 2750 i915_gem_context_reset(dev); 2751 2752 i915_gem_restore_fences(dev); 2753 } 2754 2755 /** 2756 * This function clears the request list as sequence numbers are passed. 2757 */ 2758 void 2759 i915_gem_retire_requests_ring(struct intel_engine_cs *ring) 2760 { 2761 uint32_t seqno; 2762 2763 if (list_empty(&ring->request_list)) 2764 return; 2765 2766 WARN_ON(i915_verify_lists(ring->dev)); 2767 2768 seqno = ring->get_seqno(ring, true); 2769 2770 /* Move any buffers on the active list that are no longer referenced 2771 * by the ringbuffer to the flushing/inactive lists as appropriate, 2772 * before we free the context associated with the requests. 2773 */ 2774 while (!list_empty(&ring->active_list)) { 2775 struct drm_i915_gem_object *obj; 2776 2777 obj = list_first_entry(&ring->active_list, 2778 struct drm_i915_gem_object, 2779 ring_list); 2780 2781 if (!i915_seqno_passed(seqno, obj->last_read_seqno)) 2782 break; 2783 2784 i915_gem_object_move_to_inactive(obj); 2785 } 2786 2787 2788 while (!list_empty(&ring->request_list)) { 2789 struct drm_i915_gem_request *request; 2790 2791 request = list_first_entry(&ring->request_list, 2792 struct drm_i915_gem_request, 2793 list); 2794 2795 if (!i915_seqno_passed(seqno, request->seqno)) 2796 break; 2797 2798 trace_i915_gem_request_retire(ring, request->seqno); 2799 /* We know the GPU must have read the request to have 2800 * sent us the seqno + interrupt, so use the position 2801 * of tail of the request to update the last known position 2802 * of the GPU head. 2803 */ 2804 ring->buffer->last_retired_head = request->tail; 2805 2806 i915_gem_free_request(request); 2807 } 2808 2809 if (unlikely(ring->trace_irq_seqno && 2810 i915_seqno_passed(seqno, ring->trace_irq_seqno))) { 2811 ring->irq_put(ring); 2812 ring->trace_irq_seqno = 0; 2813 } 2814 2815 WARN_ON(i915_verify_lists(ring->dev)); 2816 } 2817 2818 bool 2819 i915_gem_retire_requests(struct drm_device *dev) 2820 { 2821 struct drm_i915_private *dev_priv = dev->dev_private; 2822 struct intel_engine_cs *ring; 2823 bool idle = true; 2824 int i; 2825 2826 for_each_ring(ring, dev_priv, i) { 2827 i915_gem_retire_requests_ring(ring); 2828 idle &= list_empty(&ring->request_list); 2829 } 2830 2831 if (idle) 2832 mod_delayed_work(dev_priv->wq, 2833 &dev_priv->mm.idle_work, 2834 msecs_to_jiffies(100)); 2835 2836 return idle; 2837 } 2838 2839 static void 2840 i915_gem_retire_work_handler(struct work_struct *work) 2841 { 2842 struct drm_i915_private *dev_priv = 2843 container_of(work, typeof(*dev_priv), mm.retire_work.work); 2844 struct drm_device *dev = dev_priv->dev; 2845 bool idle; 2846 2847 /* Come back later if the device is busy... */ 2848 idle = false; 2849 if (mutex_trylock(&dev->struct_mutex)) { 2850 idle = i915_gem_retire_requests(dev); 2851 mutex_unlock(&dev->struct_mutex); 2852 } 2853 if (!idle) 2854 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 2855 round_jiffies_up_relative(HZ)); 2856 } 2857 2858 static void 2859 i915_gem_idle_work_handler(struct work_struct *work) 2860 { 2861 struct drm_i915_private *dev_priv = 2862 container_of(work, typeof(*dev_priv), mm.idle_work.work); 2863 2864 intel_mark_idle(dev_priv->dev); 2865 } 2866 2867 /** 2868 * Ensures that an object will eventually get non-busy by flushing any required 2869 * write domains, emitting any outstanding lazy request and retiring and 2870 * completed requests. 2871 */ 2872 static int 2873 i915_gem_object_flush_active(struct drm_i915_gem_object *obj) 2874 { 2875 int ret; 2876 2877 if (obj->active) { 2878 ret = i915_gem_check_olr(obj->ring, obj->last_read_seqno); 2879 if (ret) 2880 return ret; 2881 2882 i915_gem_retire_requests_ring(obj->ring); 2883 } 2884 2885 return 0; 2886 } 2887 2888 /** 2889 * i915_gem_wait_ioctl - implements DRM_IOCTL_I915_GEM_WAIT 2890 * @DRM_IOCTL_ARGS: standard ioctl arguments 2891 * 2892 * Returns 0 if successful, else an error is returned with the remaining time in 2893 * the timeout parameter. 2894 * -ETIME: object is still busy after timeout 2895 * -ERESTARTSYS: signal interrupted the wait 2896 * -ENONENT: object doesn't exist 2897 * Also possible, but rare: 2898 * -EAGAIN: GPU wedged 2899 * -ENOMEM: damn 2900 * -ENODEV: Internal IRQ fail 2901 * -E?: The add request failed 2902 * 2903 * The wait ioctl with a timeout of 0 reimplements the busy ioctl. With any 2904 * non-zero timeout parameter the wait ioctl will wait for the given number of 2905 * nanoseconds on an object becoming unbusy. Since the wait itself does so 2906 * without holding struct_mutex the object may become re-busied before this 2907 * function completes. A similar but shorter * race condition exists in the busy 2908 * ioctl 2909 */ 2910 int 2911 i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) 2912 { 2913 struct drm_i915_private *dev_priv = dev->dev_private; 2914 struct drm_i915_gem_wait *args = data; 2915 struct drm_i915_gem_object *obj; 2916 struct intel_engine_cs *ring = NULL; 2917 struct timespec timeout_stack, *timeout = NULL; 2918 unsigned reset_counter; 2919 u32 seqno = 0; 2920 int ret = 0; 2921 2922 if (args->timeout_ns >= 0) { 2923 timeout_stack = ns_to_timespec(args->timeout_ns); 2924 timeout = &timeout_stack; 2925 } 2926 2927 ret = i915_mutex_lock_interruptible(dev); 2928 if (ret) 2929 return ret; 2930 2931 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->bo_handle)); 2932 if (&obj->base == NULL) { 2933 mutex_unlock(&dev->struct_mutex); 2934 return -ENOENT; 2935 } 2936 2937 /* Need to make sure the object gets inactive eventually. */ 2938 ret = i915_gem_object_flush_active(obj); 2939 if (ret) 2940 goto out; 2941 2942 if (obj->active) { 2943 seqno = obj->last_read_seqno; 2944 ring = obj->ring; 2945 } 2946 2947 if (seqno == 0) 2948 goto out; 2949 2950 /* Do this after OLR check to make sure we make forward progress polling 2951 * on this IOCTL with a 0 timeout (like busy ioctl) 2952 */ 2953 if (!args->timeout_ns) { 2954 ret = -ETIMEDOUT; 2955 goto out; 2956 } 2957 2958 drm_gem_object_unreference(&obj->base); 2959 reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter); 2960 mutex_unlock(&dev->struct_mutex); 2961 2962 ret = __wait_seqno(ring, seqno, reset_counter, true, timeout, file->driver_priv); 2963 if (timeout) 2964 args->timeout_ns = timespec_to_ns(timeout); 2965 return ret; 2966 2967 out: 2968 drm_gem_object_unreference(&obj->base); 2969 mutex_unlock(&dev->struct_mutex); 2970 return ret; 2971 } 2972 2973 /** 2974 * i915_gem_object_sync - sync an object to a ring. 2975 * 2976 * @obj: object which may be in use on another ring. 2977 * @to: ring we wish to use the object on. May be NULL. 2978 * 2979 * This code is meant to abstract object synchronization with the GPU. 2980 * Calling with NULL implies synchronizing the object with the CPU 2981 * rather than a particular GPU ring. 2982 * 2983 * Returns 0 if successful, else propagates up the lower layer error. 2984 */ 2985 int 2986 i915_gem_object_sync(struct drm_i915_gem_object *obj, 2987 struct intel_engine_cs *to) 2988 { 2989 struct intel_engine_cs *from = obj->ring; 2990 u32 seqno; 2991 int ret, idx; 2992 2993 if (from == NULL || to == from) 2994 return 0; 2995 2996 if (to == NULL || !i915_semaphore_is_enabled(obj->base.dev)) 2997 return i915_gem_object_wait_rendering(obj, false); 2998 2999 idx = intel_ring_sync_index(from, to); 3000 3001 seqno = obj->last_read_seqno; 3002 if (seqno <= from->semaphore.sync_seqno[idx]) 3003 return 0; 3004 3005 ret = i915_gem_check_olr(obj->ring, seqno); 3006 if (ret) 3007 return ret; 3008 3009 trace_i915_gem_ring_sync_to(from, to, seqno); 3010 ret = to->semaphore.sync_to(to, from, seqno); 3011 if (!ret) 3012 /* We use last_read_seqno because sync_to() 3013 * might have just caused seqno wrap under 3014 * the radar. 3015 */ 3016 from->semaphore.sync_seqno[idx] = obj->last_read_seqno; 3017 3018 return ret; 3019 } 3020 3021 static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj) 3022 { 3023 u32 old_write_domain, old_read_domains; 3024 3025 /* Force a pagefault for domain tracking on next user access */ 3026 i915_gem_release_mmap(obj); 3027 3028 if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0) 3029 return; 3030 3031 /* Wait for any direct GTT access to complete */ 3032 mb(); 3033 3034 old_read_domains = obj->base.read_domains; 3035 old_write_domain = obj->base.write_domain; 3036 3037 obj->base.read_domains &= ~I915_GEM_DOMAIN_GTT; 3038 obj->base.write_domain &= ~I915_GEM_DOMAIN_GTT; 3039 3040 trace_i915_gem_object_change_domain(obj, 3041 old_read_domains, 3042 old_write_domain); 3043 } 3044 3045 int i915_vma_unbind(struct i915_vma *vma) 3046 { 3047 struct drm_i915_gem_object *obj = vma->obj; 3048 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 3049 int ret; 3050 3051 if (list_empty(&vma->vma_link)) 3052 return 0; 3053 3054 if (!drm_mm_node_allocated(&vma->node)) { 3055 i915_gem_vma_destroy(vma); 3056 return 0; 3057 } 3058 3059 if (vma->pin_count) 3060 return -EBUSY; 3061 3062 BUG_ON(obj->pages == NULL); 3063 3064 ret = i915_gem_object_finish_gpu(obj); 3065 if (ret) 3066 return ret; 3067 /* Continue on if we fail due to EIO, the GPU is hung so we 3068 * should be safe and we need to cleanup or else we might 3069 * cause memory corruption through use-after-free. 3070 */ 3071 3072 if (i915_is_ggtt(vma->vm)) { 3073 i915_gem_object_finish_gtt(obj); 3074 3075 /* release the fence reg _after_ flushing */ 3076 ret = i915_gem_object_put_fence(obj); 3077 if (ret) 3078 return ret; 3079 } 3080 3081 trace_i915_vma_unbind(vma); 3082 3083 vma->unbind_vma(vma); 3084 3085 i915_gem_gtt_finish_object(obj); 3086 3087 list_del_init(&vma->mm_list); 3088 /* Avoid an unnecessary call to unbind on rebind. */ 3089 if (i915_is_ggtt(vma->vm)) 3090 obj->map_and_fenceable = true; 3091 3092 drm_mm_remove_node(&vma->node); 3093 i915_gem_vma_destroy(vma); 3094 3095 /* Since the unbound list is global, only move to that list if 3096 * no more VMAs exist. */ 3097 if (list_empty(&obj->vma_list)) 3098 list_move_tail(&obj->global_list, &dev_priv->mm.unbound_list); 3099 3100 /* And finally now the object is completely decoupled from this vma, 3101 * we can drop its hold on the backing storage and allow it to be 3102 * reaped by the shrinker. 3103 */ 3104 i915_gem_object_unpin_pages(obj); 3105 3106 return 0; 3107 } 3108 3109 int i915_gpu_idle(struct drm_device *dev) 3110 { 3111 struct drm_i915_private *dev_priv = dev->dev_private; 3112 struct intel_engine_cs *ring; 3113 int ret, i; 3114 3115 /* Flush everything onto the inactive list. */ 3116 for_each_ring(ring, dev_priv, i) { 3117 ret = i915_switch_context(ring, ring->default_context); 3118 if (ret) 3119 return ret; 3120 3121 ret = intel_ring_idle(ring); 3122 if (ret) 3123 return ret; 3124 } 3125 3126 return 0; 3127 } 3128 3129 static void i965_write_fence_reg(struct drm_device *dev, int reg, 3130 struct drm_i915_gem_object *obj) 3131 { 3132 struct drm_i915_private *dev_priv = dev->dev_private; 3133 int fence_reg; 3134 int fence_pitch_shift; 3135 3136 if (INTEL_INFO(dev)->gen >= 6) { 3137 fence_reg = FENCE_REG_SANDYBRIDGE_0; 3138 fence_pitch_shift = SANDYBRIDGE_FENCE_PITCH_SHIFT; 3139 } else { 3140 fence_reg = FENCE_REG_965_0; 3141 fence_pitch_shift = I965_FENCE_PITCH_SHIFT; 3142 } 3143 3144 fence_reg += reg * 8; 3145 3146 /* To w/a incoherency with non-atomic 64-bit register updates, 3147 * we split the 64-bit update into two 32-bit writes. In order 3148 * for a partial fence not to be evaluated between writes, we 3149 * precede the update with write to turn off the fence register, 3150 * and only enable the fence as the last step. 3151 * 3152 * For extra levels of paranoia, we make sure each step lands 3153 * before applying the next step. 3154 */ 3155 I915_WRITE(fence_reg, 0); 3156 POSTING_READ(fence_reg); 3157 3158 if (obj) { 3159 u32 size = i915_gem_obj_ggtt_size(obj); 3160 uint64_t val; 3161 3162 val = (uint64_t)((i915_gem_obj_ggtt_offset(obj) + size - 4096) & 3163 0xfffff000) << 32; 3164 val |= i915_gem_obj_ggtt_offset(obj) & 0xfffff000; 3165 val |= (uint64_t)((obj->stride / 128) - 1) << fence_pitch_shift; 3166 if (obj->tiling_mode == I915_TILING_Y) 3167 val |= 1 << I965_FENCE_TILING_Y_SHIFT; 3168 val |= I965_FENCE_REG_VALID; 3169 3170 I915_WRITE(fence_reg + 4, val >> 32); 3171 POSTING_READ(fence_reg + 4); 3172 3173 I915_WRITE(fence_reg + 0, val); 3174 POSTING_READ(fence_reg); 3175 } else { 3176 I915_WRITE(fence_reg + 4, 0); 3177 POSTING_READ(fence_reg + 4); 3178 } 3179 } 3180 3181 static void i915_write_fence_reg(struct drm_device *dev, int reg, 3182 struct drm_i915_gem_object *obj) 3183 { 3184 struct drm_i915_private *dev_priv = dev->dev_private; 3185 u32 val; 3186 3187 if (obj) { 3188 u32 size = i915_gem_obj_ggtt_size(obj); 3189 int pitch_val; 3190 int tile_width; 3191 3192 WARN((i915_gem_obj_ggtt_offset(obj) & ~I915_FENCE_START_MASK) || 3193 (size & -size) != size || 3194 (i915_gem_obj_ggtt_offset(obj) & (size - 1)), 3195 "object 0x%08lx [fenceable? %d] not 1M or pot-size (0x%08x) aligned\n", 3196 i915_gem_obj_ggtt_offset(obj), obj->map_and_fenceable, size); 3197 3198 if (obj->tiling_mode == I915_TILING_Y && HAS_128_BYTE_Y_TILING(dev)) 3199 tile_width = 128; 3200 else 3201 tile_width = 512; 3202 3203 /* Note: pitch better be a power of two tile widths */ 3204 pitch_val = obj->stride / tile_width; 3205 pitch_val = ffs(pitch_val) - 1; 3206 3207 val = i915_gem_obj_ggtt_offset(obj); 3208 if (obj->tiling_mode == I915_TILING_Y) 3209 val |= 1 << I830_FENCE_TILING_Y_SHIFT; 3210 val |= I915_FENCE_SIZE_BITS(size); 3211 val |= pitch_val << I830_FENCE_PITCH_SHIFT; 3212 val |= I830_FENCE_REG_VALID; 3213 } else 3214 val = 0; 3215 3216 if (reg < 8) 3217 reg = FENCE_REG_830_0 + reg * 4; 3218 else 3219 reg = FENCE_REG_945_8 + (reg - 8) * 4; 3220 3221 I915_WRITE(reg, val); 3222 POSTING_READ(reg); 3223 } 3224 3225 static void i830_write_fence_reg(struct drm_device *dev, int reg, 3226 struct drm_i915_gem_object *obj) 3227 { 3228 struct drm_i915_private *dev_priv = dev->dev_private; 3229 uint32_t val; 3230 3231 if (obj) { 3232 u32 size = i915_gem_obj_ggtt_size(obj); 3233 uint32_t pitch_val; 3234 3235 WARN((i915_gem_obj_ggtt_offset(obj) & ~I830_FENCE_START_MASK) || 3236 (size & -size) != size || 3237 (i915_gem_obj_ggtt_offset(obj) & (size - 1)), 3238 "object 0x%08lx not 512K or pot-size 0x%08x aligned\n", 3239 i915_gem_obj_ggtt_offset(obj), size); 3240 3241 pitch_val = obj->stride / 128; 3242 pitch_val = ffs(pitch_val) - 1; 3243 3244 val = i915_gem_obj_ggtt_offset(obj); 3245 if (obj->tiling_mode == I915_TILING_Y) 3246 val |= 1 << I830_FENCE_TILING_Y_SHIFT; 3247 val |= I830_FENCE_SIZE_BITS(size); 3248 val |= pitch_val << I830_FENCE_PITCH_SHIFT; 3249 val |= I830_FENCE_REG_VALID; 3250 } else 3251 val = 0; 3252 3253 I915_WRITE(FENCE_REG_830_0 + reg * 4, val); 3254 POSTING_READ(FENCE_REG_830_0 + reg * 4); 3255 } 3256 3257 inline static bool i915_gem_object_needs_mb(struct drm_i915_gem_object *obj) 3258 { 3259 return obj && obj->base.read_domains & I915_GEM_DOMAIN_GTT; 3260 } 3261 3262 static void i915_gem_write_fence(struct drm_device *dev, int reg, 3263 struct drm_i915_gem_object *obj) 3264 { 3265 struct drm_i915_private *dev_priv = dev->dev_private; 3266 3267 /* Ensure that all CPU reads are completed before installing a fence 3268 * and all writes before removing the fence. 3269 */ 3270 if (i915_gem_object_needs_mb(dev_priv->fence_regs[reg].obj)) 3271 mb(); 3272 3273 WARN(obj && (!obj->stride || !obj->tiling_mode), 3274 "bogus fence setup with stride: 0x%x, tiling mode: %i\n", 3275 obj->stride, obj->tiling_mode); 3276 3277 switch (INTEL_INFO(dev)->gen) { 3278 case 8: 3279 case 7: 3280 case 6: 3281 case 5: 3282 case 4: i965_write_fence_reg(dev, reg, obj); break; 3283 case 3: i915_write_fence_reg(dev, reg, obj); break; 3284 case 2: i830_write_fence_reg(dev, reg, obj); break; 3285 default: BUG(); 3286 } 3287 3288 /* And similarly be paranoid that no direct access to this region 3289 * is reordered to before the fence is installed. 3290 */ 3291 if (i915_gem_object_needs_mb(obj)) 3292 mb(); 3293 } 3294 3295 static inline int fence_number(struct drm_i915_private *dev_priv, 3296 struct drm_i915_fence_reg *fence) 3297 { 3298 return fence - dev_priv->fence_regs; 3299 } 3300 3301 static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj, 3302 struct drm_i915_fence_reg *fence, 3303 bool enable) 3304 { 3305 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 3306 int reg = fence_number(dev_priv, fence); 3307 3308 i915_gem_write_fence(obj->base.dev, reg, enable ? obj : NULL); 3309 3310 if (enable) { 3311 obj->fence_reg = reg; 3312 fence->obj = obj; 3313 list_move_tail(&fence->lru_list, &dev_priv->mm.fence_list); 3314 } else { 3315 obj->fence_reg = I915_FENCE_REG_NONE; 3316 fence->obj = NULL; 3317 list_del_init(&fence->lru_list); 3318 } 3319 obj->fence_dirty = false; 3320 } 3321 3322 static int 3323 i915_gem_object_wait_fence(struct drm_i915_gem_object *obj) 3324 { 3325 if (obj->last_fenced_seqno) { 3326 int ret = i915_wait_seqno(obj->ring, obj->last_fenced_seqno); 3327 if (ret) 3328 return ret; 3329 3330 obj->last_fenced_seqno = 0; 3331 } 3332 3333 obj->fenced_gpu_access = false; 3334 return 0; 3335 } 3336 3337 int 3338 i915_gem_object_put_fence(struct drm_i915_gem_object *obj) 3339 { 3340 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 3341 struct drm_i915_fence_reg *fence; 3342 int ret; 3343 3344 ret = i915_gem_object_wait_fence(obj); 3345 if (ret) 3346 return ret; 3347 3348 if (obj->fence_reg == I915_FENCE_REG_NONE) 3349 return 0; 3350 3351 fence = &dev_priv->fence_regs[obj->fence_reg]; 3352 3353 if (WARN_ON(fence->pin_count)) 3354 return -EBUSY; 3355 3356 i915_gem_object_fence_lost(obj); 3357 i915_gem_object_update_fence(obj, fence, false); 3358 3359 return 0; 3360 } 3361 3362 static struct drm_i915_fence_reg * 3363 i915_find_fence_reg(struct drm_device *dev) 3364 { 3365 struct drm_i915_private *dev_priv = dev->dev_private; 3366 struct drm_i915_fence_reg *reg, *avail; 3367 int i; 3368 3369 /* First try to find a free reg */ 3370 avail = NULL; 3371 for (i = dev_priv->fence_reg_start; i < dev_priv->num_fence_regs; i++) { 3372 reg = &dev_priv->fence_regs[i]; 3373 if (!reg->obj) 3374 return reg; 3375 3376 if (!reg->pin_count) 3377 avail = reg; 3378 } 3379 3380 if (avail == NULL) 3381 goto deadlock; 3382 3383 /* None available, try to steal one or wait for a user to finish */ 3384 list_for_each_entry(reg, &dev_priv->mm.fence_list, lru_list) { 3385 if (reg->pin_count) 3386 continue; 3387 3388 return reg; 3389 } 3390 3391 deadlock: 3392 /* Wait for completion of pending flips which consume fences */ 3393 if (intel_has_pending_fb_unpin(dev)) 3394 return ERR_PTR(-EAGAIN); 3395 3396 return ERR_PTR(-EDEADLK); 3397 } 3398 3399 /** 3400 * i915_gem_object_get_fence - set up fencing for an object 3401 * @obj: object to map through a fence reg 3402 * 3403 * When mapping objects through the GTT, userspace wants to be able to write 3404 * to them without having to worry about swizzling if the object is tiled. 3405 * This function walks the fence regs looking for a free one for @obj, 3406 * stealing one if it can't find any. 3407 * 3408 * It then sets up the reg based on the object's properties: address, pitch 3409 * and tiling format. 3410 * 3411 * For an untiled surface, this removes any existing fence. 3412 */ 3413 int 3414 i915_gem_object_get_fence(struct drm_i915_gem_object *obj) 3415 { 3416 struct drm_device *dev = obj->base.dev; 3417 struct drm_i915_private *dev_priv = dev->dev_private; 3418 bool enable = obj->tiling_mode != I915_TILING_NONE; 3419 struct drm_i915_fence_reg *reg; 3420 int ret; 3421 3422 /* Have we updated the tiling parameters upon the object and so 3423 * will need to serialise the write to the associated fence register? 3424 */ 3425 if (obj->fence_dirty) { 3426 ret = i915_gem_object_wait_fence(obj); 3427 if (ret) 3428 return ret; 3429 } 3430 3431 /* Just update our place in the LRU if our fence is getting reused. */ 3432 if (obj->fence_reg != I915_FENCE_REG_NONE) { 3433 reg = &dev_priv->fence_regs[obj->fence_reg]; 3434 if (!obj->fence_dirty) { 3435 list_move_tail(®->lru_list, 3436 &dev_priv->mm.fence_list); 3437 return 0; 3438 } 3439 } else if (enable) { 3440 reg = i915_find_fence_reg(dev); 3441 if (IS_ERR(reg)) 3442 return PTR_ERR(reg); 3443 3444 if (reg->obj) { 3445 struct drm_i915_gem_object *old = reg->obj; 3446 3447 ret = i915_gem_object_wait_fence(old); 3448 if (ret) 3449 return ret; 3450 3451 i915_gem_object_fence_lost(old); 3452 } 3453 } else 3454 return 0; 3455 3456 i915_gem_object_update_fence(obj, reg, enable); 3457 3458 return 0; 3459 } 3460 3461 static bool i915_gem_valid_gtt_space(struct drm_device *dev, 3462 struct drm_mm_node *gtt_space, 3463 unsigned long cache_level) 3464 { 3465 struct drm_mm_node *other; 3466 3467 /* On non-LLC machines we have to be careful when putting differing 3468 * types of snoopable memory together to avoid the prefetcher 3469 * crossing memory domains and dying. 3470 */ 3471 if (HAS_LLC(dev)) 3472 return true; 3473 3474 if (!drm_mm_node_allocated(gtt_space)) 3475 return true; 3476 3477 if (list_empty(>t_space->node_list)) 3478 return true; 3479 3480 other = list_entry(gtt_space->node_list.prev, struct drm_mm_node, node_list); 3481 if (other->allocated && !other->hole_follows && other->color != cache_level) 3482 return false; 3483 3484 other = list_entry(gtt_space->node_list.next, struct drm_mm_node, node_list); 3485 if (other->allocated && !gtt_space->hole_follows && other->color != cache_level) 3486 return false; 3487 3488 return true; 3489 } 3490 3491 static void i915_gem_verify_gtt(struct drm_device *dev) 3492 { 3493 #if WATCH_GTT 3494 struct drm_i915_private *dev_priv = dev->dev_private; 3495 struct drm_i915_gem_object *obj; 3496 int err = 0; 3497 3498 list_for_each_entry(obj, &dev_priv->mm.gtt_list, global_list) { 3499 if (obj->gtt_space == NULL) { 3500 printk(KERN_ERR "object found on GTT list with no space reserved\n"); 3501 err++; 3502 continue; 3503 } 3504 3505 if (obj->cache_level != obj->gtt_space->color) { 3506 printk(KERN_ERR "object reserved space [%08lx, %08lx] with wrong color, cache_level=%x, color=%lx\n", 3507 i915_gem_obj_ggtt_offset(obj), 3508 i915_gem_obj_ggtt_offset(obj) + i915_gem_obj_ggtt_size(obj), 3509 obj->cache_level, 3510 obj->gtt_space->color); 3511 err++; 3512 continue; 3513 } 3514 3515 if (!i915_gem_valid_gtt_space(dev, 3516 obj->gtt_space, 3517 obj->cache_level)) { 3518 printk(KERN_ERR "invalid GTT space found at [%08lx, %08lx] - color=%x\n", 3519 i915_gem_obj_ggtt_offset(obj), 3520 i915_gem_obj_ggtt_offset(obj) + i915_gem_obj_ggtt_size(obj), 3521 obj->cache_level); 3522 err++; 3523 continue; 3524 } 3525 } 3526 3527 WARN_ON(err); 3528 #endif 3529 } 3530 3531 /** 3532 * Finds free space in the GTT aperture and binds the object there. 3533 */ 3534 static struct i915_vma * 3535 i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj, 3536 struct i915_address_space *vm, 3537 unsigned alignment, 3538 uint64_t flags) 3539 { 3540 struct drm_device *dev = obj->base.dev; 3541 struct drm_i915_private *dev_priv = dev->dev_private; 3542 u32 size, fence_size, fence_alignment, unfenced_alignment; 3543 unsigned long start = 3544 flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0; 3545 unsigned long end = 3546 flags & PIN_MAPPABLE ? dev_priv->gtt.mappable_end : vm->total; 3547 struct i915_vma *vma; 3548 int ret; 3549 3550 fence_size = i915_gem_get_gtt_size(dev, 3551 obj->base.size, 3552 obj->tiling_mode); 3553 fence_alignment = i915_gem_get_gtt_alignment(dev, 3554 obj->base.size, 3555 obj->tiling_mode, true); 3556 unfenced_alignment = 3557 i915_gem_get_gtt_alignment(dev, 3558 obj->base.size, 3559 obj->tiling_mode, false); 3560 3561 if (alignment == 0) 3562 alignment = flags & PIN_MAPPABLE ? fence_alignment : 3563 unfenced_alignment; 3564 if (flags & PIN_MAPPABLE && alignment & (fence_alignment - 1)) { 3565 DRM_DEBUG("Invalid object alignment requested %u\n", alignment); 3566 return ERR_PTR(-EINVAL); 3567 } 3568 3569 size = flags & PIN_MAPPABLE ? fence_size : obj->base.size; 3570 3571 /* If the object is bigger than the entire aperture, reject it early 3572 * before evicting everything in a vain attempt to find space. 3573 */ 3574 if (obj->base.size > end) { 3575 DRM_DEBUG("Attempting to bind an object larger than the aperture: object=%zd > %s aperture=%lu\n", 3576 obj->base.size, 3577 flags & PIN_MAPPABLE ? "mappable" : "total", 3578 end); 3579 return ERR_PTR(-E2BIG); 3580 } 3581 3582 ret = i915_gem_object_get_pages(obj); 3583 if (ret) 3584 return ERR_PTR(ret); 3585 3586 i915_gem_object_pin_pages(obj); 3587 3588 vma = i915_gem_obj_lookup_or_create_vma(obj, vm); 3589 if (IS_ERR(vma)) 3590 goto err_unpin; 3591 3592 search_free: 3593 ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node, 3594 size, alignment, 3595 obj->cache_level, 3596 start, end, 3597 DRM_MM_SEARCH_DEFAULT, 3598 DRM_MM_CREATE_DEFAULT); 3599 if (ret) { 3600 ret = i915_gem_evict_something(dev, vm, size, alignment, 3601 obj->cache_level, 3602 start, end, 3603 flags); 3604 if (ret == 0) 3605 goto search_free; 3606 3607 goto err_free_vma; 3608 } 3609 if (WARN_ON(!i915_gem_valid_gtt_space(dev, &vma->node, 3610 obj->cache_level))) { 3611 ret = -EINVAL; 3612 goto err_remove_node; 3613 } 3614 3615 ret = i915_gem_gtt_prepare_object(obj); 3616 if (ret) 3617 goto err_remove_node; 3618 3619 list_move_tail(&obj->global_list, &dev_priv->mm.bound_list); 3620 list_add_tail(&vma->mm_list, &vm->inactive_list); 3621 3622 if (i915_is_ggtt(vm)) { 3623 bool mappable, fenceable; 3624 3625 fenceable = (vma->node.size == fence_size && 3626 (vma->node.start & (fence_alignment - 1)) == 0); 3627 3628 mappable = (vma->node.start + obj->base.size <= 3629 dev_priv->gtt.mappable_end); 3630 3631 obj->map_and_fenceable = mappable && fenceable; 3632 } 3633 3634 WARN_ON(flags & PIN_MAPPABLE && !obj->map_and_fenceable); 3635 3636 trace_i915_vma_bind(vma, flags); 3637 vma->bind_vma(vma, obj->cache_level, 3638 flags & (PIN_MAPPABLE | PIN_GLOBAL) ? GLOBAL_BIND : 0); 3639 3640 i915_gem_verify_gtt(dev); 3641 return vma; 3642 3643 err_remove_node: 3644 drm_mm_remove_node(&vma->node); 3645 err_free_vma: 3646 i915_gem_vma_destroy(vma); 3647 vma = ERR_PTR(ret); 3648 err_unpin: 3649 i915_gem_object_unpin_pages(obj); 3650 return vma; 3651 } 3652 3653 bool 3654 i915_gem_clflush_object(struct drm_i915_gem_object *obj, 3655 bool force) 3656 { 3657 /* If we don't have a page list set up, then we're not pinned 3658 * to GPU, and we can ignore the cache flush because it'll happen 3659 * again at bind time. 3660 */ 3661 if (obj->pages == NULL) 3662 return false; 3663 3664 /* 3665 * Stolen memory is always coherent with the GPU as it is explicitly 3666 * marked as wc by the system, or the system is cache-coherent. 3667 */ 3668 if (obj->stolen) 3669 return false; 3670 3671 /* If the GPU is snooping the contents of the CPU cache, 3672 * we do not need to manually clear the CPU cache lines. However, 3673 * the caches are only snooped when the render cache is 3674 * flushed/invalidated. As we always have to emit invalidations 3675 * and flushes when moving into and out of the RENDER domain, correct 3676 * snooping behaviour occurs naturally as the result of our domain 3677 * tracking. 3678 */ 3679 if (!force && cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) 3680 return false; 3681 3682 trace_i915_gem_object_clflush(obj); 3683 drm_clflush_pages(obj->pages, obj->base.size / PAGE_SIZE); 3684 3685 return true; 3686 } 3687 3688 /** Flushes the GTT write domain for the object if it's dirty. */ 3689 static void 3690 i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj) 3691 { 3692 uint32_t old_write_domain; 3693 3694 if (obj->base.write_domain != I915_GEM_DOMAIN_GTT) 3695 return; 3696 3697 /* No actual flushing is required for the GTT write domain. Writes 3698 * to it immediately go to main memory as far as we know, so there's 3699 * no chipset flush. It also doesn't land in render cache. 3700 * 3701 * However, we do have to enforce the order so that all writes through 3702 * the GTT land before any writes to the device, such as updates to 3703 * the GATT itself. 3704 */ 3705 wmb(); 3706 3707 old_write_domain = obj->base.write_domain; 3708 obj->base.write_domain = 0; 3709 3710 trace_i915_gem_object_change_domain(obj, 3711 obj->base.read_domains, 3712 old_write_domain); 3713 } 3714 3715 /** Flushes the CPU write domain for the object if it's dirty. */ 3716 static void 3717 i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj, 3718 bool force) 3719 { 3720 uint32_t old_write_domain; 3721 3722 if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) 3723 return; 3724 3725 if (i915_gem_clflush_object(obj, force)) 3726 i915_gem_chipset_flush(obj->base.dev); 3727 3728 old_write_domain = obj->base.write_domain; 3729 obj->base.write_domain = 0; 3730 3731 trace_i915_gem_object_change_domain(obj, 3732 obj->base.read_domains, 3733 old_write_domain); 3734 } 3735 3736 /** 3737 * Moves a single object to the GTT read, and possibly write domain. 3738 * 3739 * This function returns when the move is complete, including waiting on 3740 * flushes to occur. 3741 */ 3742 int 3743 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) 3744 { 3745 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 3746 uint32_t old_write_domain, old_read_domains; 3747 int ret; 3748 3749 /* Not valid to be called on unbound objects. */ 3750 if (!i915_gem_obj_bound_any(obj)) 3751 return -EINVAL; 3752 3753 if (obj->base.write_domain == I915_GEM_DOMAIN_GTT) 3754 return 0; 3755 3756 ret = i915_gem_object_wait_rendering(obj, !write); 3757 if (ret) 3758 return ret; 3759 3760 i915_gem_object_retire(obj); 3761 i915_gem_object_flush_cpu_write_domain(obj, false); 3762 3763 /* Serialise direct access to this object with the barriers for 3764 * coherent writes from the GPU, by effectively invalidating the 3765 * GTT domain upon first access. 3766 */ 3767 if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0) 3768 mb(); 3769 3770 old_write_domain = obj->base.write_domain; 3771 old_read_domains = obj->base.read_domains; 3772 3773 /* It should now be out of any other write domains, and we can update 3774 * the domain values for our changes. 3775 */ 3776 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_GTT) != 0); 3777 obj->base.read_domains |= I915_GEM_DOMAIN_GTT; 3778 if (write) { 3779 obj->base.read_domains = I915_GEM_DOMAIN_GTT; 3780 obj->base.write_domain = I915_GEM_DOMAIN_GTT; 3781 obj->dirty = 1; 3782 } 3783 3784 trace_i915_gem_object_change_domain(obj, 3785 old_read_domains, 3786 old_write_domain); 3787 3788 /* And bump the LRU for this access */ 3789 if (i915_gem_object_is_inactive(obj)) { 3790 struct i915_vma *vma = i915_gem_obj_to_ggtt(obj); 3791 if (vma) 3792 list_move_tail(&vma->mm_list, 3793 &dev_priv->gtt.base.inactive_list); 3794 3795 } 3796 3797 return 0; 3798 } 3799 3800 int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, 3801 enum i915_cache_level cache_level) 3802 { 3803 struct drm_device *dev = obj->base.dev; 3804 struct i915_vma *vma, *next; 3805 int ret; 3806 3807 if (obj->cache_level == cache_level) 3808 return 0; 3809 3810 if (i915_gem_obj_is_pinned(obj)) { 3811 DRM_DEBUG("can not change the cache level of pinned objects\n"); 3812 return -EBUSY; 3813 } 3814 3815 list_for_each_entry_safe(vma, next, &obj->vma_list, vma_link) { 3816 if (!i915_gem_valid_gtt_space(dev, &vma->node, cache_level)) { 3817 ret = i915_vma_unbind(vma); 3818 if (ret) 3819 return ret; 3820 } 3821 } 3822 3823 if (i915_gem_obj_bound_any(obj)) { 3824 ret = i915_gem_object_finish_gpu(obj); 3825 if (ret) 3826 return ret; 3827 3828 i915_gem_object_finish_gtt(obj); 3829 3830 /* Before SandyBridge, you could not use tiling or fence 3831 * registers with snooped memory, so relinquish any fences 3832 * currently pointing to our region in the aperture. 3833 */ 3834 if (INTEL_INFO(dev)->gen < 6) { 3835 ret = i915_gem_object_put_fence(obj); 3836 if (ret) 3837 return ret; 3838 } 3839 3840 list_for_each_entry(vma, &obj->vma_list, vma_link) 3841 if (drm_mm_node_allocated(&vma->node)) 3842 vma->bind_vma(vma, cache_level, 3843 obj->has_global_gtt_mapping ? GLOBAL_BIND : 0); 3844 } 3845 3846 list_for_each_entry(vma, &obj->vma_list, vma_link) 3847 vma->node.color = cache_level; 3848 obj->cache_level = cache_level; 3849 3850 if (cpu_write_needs_clflush(obj)) { 3851 u32 old_read_domains, old_write_domain; 3852 3853 /* If we're coming from LLC cached, then we haven't 3854 * actually been tracking whether the data is in the 3855 * CPU cache or not, since we only allow one bit set 3856 * in obj->write_domain and have been skipping the clflushes. 3857 * Just set it to the CPU cache for now. 3858 */ 3859 i915_gem_object_retire(obj); 3860 WARN_ON(obj->base.write_domain & ~I915_GEM_DOMAIN_CPU); 3861 3862 old_read_domains = obj->base.read_domains; 3863 old_write_domain = obj->base.write_domain; 3864 3865 obj->base.read_domains = I915_GEM_DOMAIN_CPU; 3866 obj->base.write_domain = I915_GEM_DOMAIN_CPU; 3867 3868 trace_i915_gem_object_change_domain(obj, 3869 old_read_domains, 3870 old_write_domain); 3871 } 3872 3873 i915_gem_verify_gtt(dev); 3874 return 0; 3875 } 3876 3877 int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data, 3878 struct drm_file *file) 3879 { 3880 struct drm_i915_gem_caching *args = data; 3881 struct drm_i915_gem_object *obj; 3882 int ret; 3883 3884 ret = i915_mutex_lock_interruptible(dev); 3885 if (ret) 3886 return ret; 3887 3888 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 3889 if (&obj->base == NULL) { 3890 ret = -ENOENT; 3891 goto unlock; 3892 } 3893 3894 switch (obj->cache_level) { 3895 case I915_CACHE_LLC: 3896 case I915_CACHE_L3_LLC: 3897 args->caching = I915_CACHING_CACHED; 3898 break; 3899 3900 case I915_CACHE_WT: 3901 args->caching = I915_CACHING_DISPLAY; 3902 break; 3903 3904 default: 3905 args->caching = I915_CACHING_NONE; 3906 break; 3907 } 3908 3909 drm_gem_object_unreference(&obj->base); 3910 unlock: 3911 mutex_unlock(&dev->struct_mutex); 3912 return ret; 3913 } 3914 3915 int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data, 3916 struct drm_file *file) 3917 { 3918 struct drm_i915_gem_caching *args = data; 3919 struct drm_i915_gem_object *obj; 3920 enum i915_cache_level level; 3921 int ret; 3922 3923 switch (args->caching) { 3924 case I915_CACHING_NONE: 3925 level = I915_CACHE_NONE; 3926 break; 3927 case I915_CACHING_CACHED: 3928 level = I915_CACHE_LLC; 3929 break; 3930 case I915_CACHING_DISPLAY: 3931 level = HAS_WT(dev) ? I915_CACHE_WT : I915_CACHE_NONE; 3932 break; 3933 default: 3934 return -EINVAL; 3935 } 3936 3937 ret = i915_mutex_lock_interruptible(dev); 3938 if (ret) 3939 return ret; 3940 3941 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 3942 if (&obj->base == NULL) { 3943 ret = -ENOENT; 3944 goto unlock; 3945 } 3946 3947 ret = i915_gem_object_set_cache_level(obj, level); 3948 3949 drm_gem_object_unreference(&obj->base); 3950 unlock: 3951 mutex_unlock(&dev->struct_mutex); 3952 return ret; 3953 } 3954 3955 static bool is_pin_display(struct drm_i915_gem_object *obj) 3956 { 3957 struct i915_vma *vma; 3958 3959 if (list_empty(&obj->vma_list)) 3960 return false; 3961 3962 vma = i915_gem_obj_to_ggtt(obj); 3963 if (!vma) 3964 return false; 3965 3966 /* There are 3 sources that pin objects: 3967 * 1. The display engine (scanouts, sprites, cursors); 3968 * 2. Reservations for execbuffer; 3969 * 3. The user. 3970 * 3971 * We can ignore reservations as we hold the struct_mutex and 3972 * are only called outside of the reservation path. The user 3973 * can only increment pin_count once, and so if after 3974 * subtracting the potential reference by the user, any pin_count 3975 * remains, it must be due to another use by the display engine. 3976 */ 3977 return vma->pin_count - !!obj->user_pin_count; 3978 } 3979 3980 /* 3981 * Prepare buffer for display plane (scanout, cursors, etc). 3982 * Can be called from an uninterruptible phase (modesetting) and allows 3983 * any flushes to be pipelined (for pageflips). 3984 */ 3985 int 3986 i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, 3987 u32 alignment, 3988 struct intel_engine_cs *pipelined) 3989 { 3990 u32 old_read_domains, old_write_domain; 3991 bool was_pin_display; 3992 int ret; 3993 3994 if (pipelined != obj->ring) { 3995 ret = i915_gem_object_sync(obj, pipelined); 3996 if (ret) 3997 return ret; 3998 } 3999 4000 /* Mark the pin_display early so that we account for the 4001 * display coherency whilst setting up the cache domains. 4002 */ 4003 was_pin_display = obj->pin_display; 4004 obj->pin_display = true; 4005 4006 /* The display engine is not coherent with the LLC cache on gen6. As 4007 * a result, we make sure that the pinning that is about to occur is 4008 * done with uncached PTEs. This is lowest common denominator for all 4009 * chipsets. 4010 * 4011 * However for gen6+, we could do better by using the GFDT bit instead 4012 * of uncaching, which would allow us to flush all the LLC-cached data 4013 * with that bit in the PTE to main memory with just one PIPE_CONTROL. 4014 */ 4015 ret = i915_gem_object_set_cache_level(obj, 4016 HAS_WT(obj->base.dev) ? I915_CACHE_WT : I915_CACHE_NONE); 4017 if (ret) 4018 goto err_unpin_display; 4019 4020 /* As the user may map the buffer once pinned in the display plane 4021 * (e.g. libkms for the bootup splash), we have to ensure that we 4022 * always use map_and_fenceable for all scanout buffers. 4023 */ 4024 ret = i915_gem_obj_ggtt_pin(obj, alignment, PIN_MAPPABLE); 4025 if (ret) 4026 goto err_unpin_display; 4027 4028 i915_gem_object_flush_cpu_write_domain(obj, true); 4029 4030 old_write_domain = obj->base.write_domain; 4031 old_read_domains = obj->base.read_domains; 4032 4033 /* It should now be out of any other write domains, and we can update 4034 * the domain values for our changes. 4035 */ 4036 obj->base.write_domain = 0; 4037 obj->base.read_domains |= I915_GEM_DOMAIN_GTT; 4038 4039 trace_i915_gem_object_change_domain(obj, 4040 old_read_domains, 4041 old_write_domain); 4042 4043 return 0; 4044 4045 err_unpin_display: 4046 WARN_ON(was_pin_display != is_pin_display(obj)); 4047 obj->pin_display = was_pin_display; 4048 return ret; 4049 } 4050 4051 void 4052 i915_gem_object_unpin_from_display_plane(struct drm_i915_gem_object *obj) 4053 { 4054 i915_gem_object_ggtt_unpin(obj); 4055 obj->pin_display = is_pin_display(obj); 4056 } 4057 4058 int 4059 i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj) 4060 { 4061 int ret; 4062 4063 if ((obj->base.read_domains & I915_GEM_GPU_DOMAINS) == 0) 4064 return 0; 4065 4066 ret = i915_gem_object_wait_rendering(obj, false); 4067 if (ret) 4068 return ret; 4069 4070 /* Ensure that we invalidate the GPU's caches and TLBs. */ 4071 obj->base.read_domains &= ~I915_GEM_GPU_DOMAINS; 4072 return 0; 4073 } 4074 4075 /** 4076 * Moves a single object to the CPU read, and possibly write domain. 4077 * 4078 * This function returns when the move is complete, including waiting on 4079 * flushes to occur. 4080 */ 4081 int 4082 i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) 4083 { 4084 uint32_t old_write_domain, old_read_domains; 4085 int ret; 4086 4087 if (obj->base.write_domain == I915_GEM_DOMAIN_CPU) 4088 return 0; 4089 4090 ret = i915_gem_object_wait_rendering(obj, !write); 4091 if (ret) 4092 return ret; 4093 4094 i915_gem_object_retire(obj); 4095 i915_gem_object_flush_gtt_write_domain(obj); 4096 4097 old_write_domain = obj->base.write_domain; 4098 old_read_domains = obj->base.read_domains; 4099 4100 /* Flush the CPU cache if it's still invalid. */ 4101 if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) { 4102 i915_gem_clflush_object(obj, false); 4103 4104 obj->base.read_domains |= I915_GEM_DOMAIN_CPU; 4105 } 4106 4107 /* It should now be out of any other write domains, and we can update 4108 * the domain values for our changes. 4109 */ 4110 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_CPU) != 0); 4111 4112 /* If we're writing through the CPU, then the GPU read domains will 4113 * need to be invalidated at next use. 4114 */ 4115 if (write) { 4116 obj->base.read_domains = I915_GEM_DOMAIN_CPU; 4117 obj->base.write_domain = I915_GEM_DOMAIN_CPU; 4118 } 4119 4120 trace_i915_gem_object_change_domain(obj, 4121 old_read_domains, 4122 old_write_domain); 4123 4124 return 0; 4125 } 4126 4127 /* Throttle our rendering by waiting until the ring has completed our requests 4128 * emitted over 20 msec ago. 4129 * 4130 * Note that if we were to use the current jiffies each time around the loop, 4131 * we wouldn't escape the function with any frames outstanding if the time to 4132 * render a frame was over 20ms. 4133 * 4134 * This should get us reasonable parallelism between CPU and GPU but also 4135 * relatively low latency when blocking on a particular request to finish. 4136 */ 4137 static int 4138 i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file) 4139 { 4140 struct drm_i915_private *dev_priv = dev->dev_private; 4141 struct drm_i915_file_private *file_priv = file->driver_priv; 4142 unsigned long recent_enough = jiffies - msecs_to_jiffies(20); 4143 struct drm_i915_gem_request *request; 4144 struct intel_engine_cs *ring = NULL; 4145 unsigned reset_counter; 4146 u32 seqno = 0; 4147 int ret; 4148 4149 ret = i915_gem_wait_for_error(&dev_priv->gpu_error); 4150 if (ret) 4151 return ret; 4152 4153 ret = i915_gem_check_wedge(&dev_priv->gpu_error, false); 4154 if (ret) 4155 return ret; 4156 4157 spin_lock(&file_priv->mm.lock); 4158 list_for_each_entry(request, &file_priv->mm.request_list, client_list) { 4159 if (time_after_eq(request->emitted_jiffies, recent_enough)) 4160 break; 4161 4162 ring = request->ring; 4163 seqno = request->seqno; 4164 } 4165 reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter); 4166 spin_unlock(&file_priv->mm.lock); 4167 4168 if (seqno == 0) 4169 return 0; 4170 4171 ret = __wait_seqno(ring, seqno, reset_counter, true, NULL, NULL); 4172 if (ret == 0) 4173 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 0); 4174 4175 return ret; 4176 } 4177 4178 static bool 4179 i915_vma_misplaced(struct i915_vma *vma, uint32_t alignment, uint64_t flags) 4180 { 4181 struct drm_i915_gem_object *obj = vma->obj; 4182 4183 if (alignment && 4184 vma->node.start & (alignment - 1)) 4185 return true; 4186 4187 if (flags & PIN_MAPPABLE && !obj->map_and_fenceable) 4188 return true; 4189 4190 if (flags & PIN_OFFSET_BIAS && 4191 vma->node.start < (flags & PIN_OFFSET_MASK)) 4192 return true; 4193 4194 return false; 4195 } 4196 4197 int 4198 i915_gem_object_pin(struct drm_i915_gem_object *obj, 4199 struct i915_address_space *vm, 4200 uint32_t alignment, 4201 uint64_t flags) 4202 { 4203 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 4204 struct i915_vma *vma; 4205 int ret; 4206 4207 if (WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base)) 4208 return -ENODEV; 4209 4210 if (WARN_ON(flags & (PIN_GLOBAL | PIN_MAPPABLE) && !i915_is_ggtt(vm))) 4211 return -EINVAL; 4212 4213 vma = i915_gem_obj_to_vma(obj, vm); 4214 if (vma) { 4215 if (WARN_ON(vma->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT)) 4216 return -EBUSY; 4217 4218 if (i915_vma_misplaced(vma, alignment, flags)) { 4219 WARN(vma->pin_count, 4220 "bo is already pinned with incorrect alignment:" 4221 " offset=%lx, req.alignment=%x, req.map_and_fenceable=%d," 4222 " obj->map_and_fenceable=%d\n", 4223 i915_gem_obj_offset(obj, vm), alignment, 4224 !!(flags & PIN_MAPPABLE), 4225 obj->map_and_fenceable); 4226 ret = i915_vma_unbind(vma); 4227 if (ret) 4228 return ret; 4229 4230 vma = NULL; 4231 } 4232 } 4233 4234 if (vma == NULL || !drm_mm_node_allocated(&vma->node)) { 4235 vma = i915_gem_object_bind_to_vm(obj, vm, alignment, flags); 4236 if (IS_ERR(vma)) 4237 return PTR_ERR(vma); 4238 } 4239 4240 if (flags & PIN_GLOBAL && !obj->has_global_gtt_mapping) 4241 vma->bind_vma(vma, obj->cache_level, GLOBAL_BIND); 4242 4243 vma->pin_count++; 4244 if (flags & PIN_MAPPABLE) 4245 obj->pin_mappable |= true; 4246 4247 return 0; 4248 } 4249 4250 void 4251 i915_gem_object_ggtt_unpin(struct drm_i915_gem_object *obj) 4252 { 4253 struct i915_vma *vma = i915_gem_obj_to_ggtt(obj); 4254 4255 BUG_ON(!vma); 4256 BUG_ON(vma->pin_count == 0); 4257 BUG_ON(!i915_gem_obj_ggtt_bound(obj)); 4258 4259 if (--vma->pin_count == 0) 4260 obj->pin_mappable = false; 4261 } 4262 4263 bool 4264 i915_gem_object_pin_fence(struct drm_i915_gem_object *obj) 4265 { 4266 if (obj->fence_reg != I915_FENCE_REG_NONE) { 4267 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 4268 struct i915_vma *ggtt_vma = i915_gem_obj_to_ggtt(obj); 4269 4270 WARN_ON(!ggtt_vma || 4271 dev_priv->fence_regs[obj->fence_reg].pin_count > 4272 ggtt_vma->pin_count); 4273 dev_priv->fence_regs[obj->fence_reg].pin_count++; 4274 return true; 4275 } else 4276 return false; 4277 } 4278 4279 void 4280 i915_gem_object_unpin_fence(struct drm_i915_gem_object *obj) 4281 { 4282 if (obj->fence_reg != I915_FENCE_REG_NONE) { 4283 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 4284 WARN_ON(dev_priv->fence_regs[obj->fence_reg].pin_count <= 0); 4285 dev_priv->fence_regs[obj->fence_reg].pin_count--; 4286 } 4287 } 4288 4289 int 4290 i915_gem_pin_ioctl(struct drm_device *dev, void *data, 4291 struct drm_file *file) 4292 { 4293 struct drm_i915_gem_pin *args = data; 4294 struct drm_i915_gem_object *obj; 4295 int ret; 4296 4297 if (INTEL_INFO(dev)->gen >= 6) 4298 return -ENODEV; 4299 4300 ret = i915_mutex_lock_interruptible(dev); 4301 if (ret) 4302 return ret; 4303 4304 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 4305 if (&obj->base == NULL) { 4306 ret = -ENOENT; 4307 goto unlock; 4308 } 4309 4310 if (obj->madv != I915_MADV_WILLNEED) { 4311 DRM_DEBUG("Attempting to pin a purgeable buffer\n"); 4312 ret = -EFAULT; 4313 goto out; 4314 } 4315 4316 if (obj->pin_filp != NULL && obj->pin_filp != file) { 4317 DRM_DEBUG("Already pinned in i915_gem_pin_ioctl(): %d\n", 4318 args->handle); 4319 ret = -EINVAL; 4320 goto out; 4321 } 4322 4323 if (obj->user_pin_count == ULONG_MAX) { 4324 ret = -EBUSY; 4325 goto out; 4326 } 4327 4328 if (obj->user_pin_count == 0) { 4329 ret = i915_gem_obj_ggtt_pin(obj, args->alignment, PIN_MAPPABLE); 4330 if (ret) 4331 goto out; 4332 } 4333 4334 obj->user_pin_count++; 4335 obj->pin_filp = file; 4336 4337 args->offset = i915_gem_obj_ggtt_offset(obj); 4338 out: 4339 drm_gem_object_unreference(&obj->base); 4340 unlock: 4341 mutex_unlock(&dev->struct_mutex); 4342 return ret; 4343 } 4344 4345 int 4346 i915_gem_unpin_ioctl(struct drm_device *dev, void *data, 4347 struct drm_file *file) 4348 { 4349 struct drm_i915_gem_pin *args = data; 4350 struct drm_i915_gem_object *obj; 4351 int ret; 4352 4353 ret = i915_mutex_lock_interruptible(dev); 4354 if (ret) 4355 return ret; 4356 4357 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 4358 if (&obj->base == NULL) { 4359 ret = -ENOENT; 4360 goto unlock; 4361 } 4362 4363 if (obj->pin_filp != file) { 4364 DRM_DEBUG("Not pinned by caller in i915_gem_pin_ioctl(): %d\n", 4365 args->handle); 4366 ret = -EINVAL; 4367 goto out; 4368 } 4369 obj->user_pin_count--; 4370 if (obj->user_pin_count == 0) { 4371 obj->pin_filp = NULL; 4372 i915_gem_object_ggtt_unpin(obj); 4373 } 4374 4375 out: 4376 drm_gem_object_unreference(&obj->base); 4377 unlock: 4378 mutex_unlock(&dev->struct_mutex); 4379 return ret; 4380 } 4381 4382 int 4383 i915_gem_busy_ioctl(struct drm_device *dev, void *data, 4384 struct drm_file *file) 4385 { 4386 struct drm_i915_gem_busy *args = data; 4387 struct drm_i915_gem_object *obj; 4388 int ret; 4389 4390 ret = i915_mutex_lock_interruptible(dev); 4391 if (ret) 4392 return ret; 4393 4394 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 4395 if (&obj->base == NULL) { 4396 ret = -ENOENT; 4397 goto unlock; 4398 } 4399 4400 /* Count all active objects as busy, even if they are currently not used 4401 * by the gpu. Users of this interface expect objects to eventually 4402 * become non-busy without any further actions, therefore emit any 4403 * necessary flushes here. 4404 */ 4405 ret = i915_gem_object_flush_active(obj); 4406 4407 args->busy = obj->active; 4408 if (obj->ring) { 4409 args->busy |= intel_ring_flag(obj->ring) << 16; 4410 } 4411 4412 drm_gem_object_unreference(&obj->base); 4413 unlock: 4414 mutex_unlock(&dev->struct_mutex); 4415 return ret; 4416 } 4417 4418 int 4419 i915_gem_throttle_ioctl(struct drm_device *dev, void *data, 4420 struct drm_file *file_priv) 4421 { 4422 return i915_gem_ring_throttle(dev, file_priv); 4423 } 4424 4425 int 4426 i915_gem_madvise_ioctl(struct drm_device *dev, void *data, 4427 struct drm_file *file_priv) 4428 { 4429 struct drm_i915_gem_madvise *args = data; 4430 struct drm_i915_gem_object *obj; 4431 int ret; 4432 4433 switch (args->madv) { 4434 case I915_MADV_DONTNEED: 4435 case I915_MADV_WILLNEED: 4436 break; 4437 default: 4438 return -EINVAL; 4439 } 4440 4441 ret = i915_mutex_lock_interruptible(dev); 4442 if (ret) 4443 return ret; 4444 4445 obj = to_intel_bo(drm_gem_object_lookup(dev, file_priv, args->handle)); 4446 if (&obj->base == NULL) { 4447 ret = -ENOENT; 4448 goto unlock; 4449 } 4450 4451 if (i915_gem_obj_is_pinned(obj)) { 4452 ret = -EINVAL; 4453 goto out; 4454 } 4455 4456 if (obj->madv != __I915_MADV_PURGED) 4457 obj->madv = args->madv; 4458 4459 /* if the object is no longer attached, discard its backing storage */ 4460 if (i915_gem_object_is_purgeable(obj) && obj->pages == NULL) 4461 i915_gem_object_truncate(obj); 4462 4463 args->retained = obj->madv != __I915_MADV_PURGED; 4464 4465 out: 4466 drm_gem_object_unreference(&obj->base); 4467 unlock: 4468 mutex_unlock(&dev->struct_mutex); 4469 return ret; 4470 } 4471 4472 void i915_gem_object_init(struct drm_i915_gem_object *obj, 4473 const struct drm_i915_gem_object_ops *ops) 4474 { 4475 INIT_LIST_HEAD(&obj->global_list); 4476 INIT_LIST_HEAD(&obj->ring_list); 4477 INIT_LIST_HEAD(&obj->obj_exec_link); 4478 INIT_LIST_HEAD(&obj->vma_list); 4479 4480 obj->ops = ops; 4481 4482 obj->fence_reg = I915_FENCE_REG_NONE; 4483 obj->madv = I915_MADV_WILLNEED; 4484 /* Avoid an unnecessary call to unbind on the first bind. */ 4485 obj->map_and_fenceable = true; 4486 4487 i915_gem_info_add_obj(obj->base.dev->dev_private, obj->base.size); 4488 } 4489 4490 static const struct drm_i915_gem_object_ops i915_gem_object_ops = { 4491 .get_pages = i915_gem_object_get_pages_gtt, 4492 .put_pages = i915_gem_object_put_pages_gtt, 4493 }; 4494 4495 struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev, 4496 size_t size) 4497 { 4498 struct drm_i915_gem_object *obj; 4499 #if 0 4500 struct address_space *mapping; 4501 gfp_t mask; 4502 #endif 4503 4504 obj = i915_gem_object_alloc(dev); 4505 if (obj == NULL) 4506 return NULL; 4507 4508 if (drm_gem_object_init(dev, &obj->base, size) != 0) { 4509 i915_gem_object_free(obj); 4510 return NULL; 4511 } 4512 4513 #if 0 4514 mask = GFP_HIGHUSER | __GFP_RECLAIMABLE; 4515 if (IS_CRESTLINE(dev) || IS_BROADWATER(dev)) { 4516 /* 965gm cannot relocate objects above 4GiB. */ 4517 mask &= ~__GFP_HIGHMEM; 4518 mask |= __GFP_DMA32; 4519 } 4520 4521 mapping = file_inode(obj->base.filp)->i_mapping; 4522 mapping_set_gfp_mask(mapping, mask); 4523 #endif 4524 4525 i915_gem_object_init(obj, &i915_gem_object_ops); 4526 4527 obj->base.write_domain = I915_GEM_DOMAIN_CPU; 4528 obj->base.read_domains = I915_GEM_DOMAIN_CPU; 4529 4530 if (HAS_LLC(dev)) { 4531 /* On some devices, we can have the GPU use the LLC (the CPU 4532 * cache) for about a 10% performance improvement 4533 * compared to uncached. Graphics requests other than 4534 * display scanout are coherent with the CPU in 4535 * accessing this cache. This means in this mode we 4536 * don't need to clflush on the CPU side, and on the 4537 * GPU side we only need to flush internal caches to 4538 * get data visible to the CPU. 4539 * 4540 * However, we maintain the display planes as UC, and so 4541 * need to rebind when first used as such. 4542 */ 4543 obj->cache_level = I915_CACHE_LLC; 4544 } else 4545 obj->cache_level = I915_CACHE_NONE; 4546 4547 trace_i915_gem_object_create(obj); 4548 4549 return obj; 4550 } 4551 4552 static bool discard_backing_storage(struct drm_i915_gem_object *obj) 4553 { 4554 /* If we are the last user of the backing storage (be it shmemfs 4555 * pages or stolen etc), we know that the pages are going to be 4556 * immediately released. In this case, we can then skip copying 4557 * back the contents from the GPU. 4558 */ 4559 4560 if (obj->madv != I915_MADV_WILLNEED) 4561 return false; 4562 4563 if (obj->base.vm_obj == NULL) 4564 return true; 4565 4566 /* At first glance, this looks racy, but then again so would be 4567 * userspace racing mmap against close. However, the first external 4568 * reference to the filp can only be obtained through the 4569 * i915_gem_mmap_ioctl() which safeguards us against the user 4570 * acquiring such a reference whilst we are in the middle of 4571 * freeing the object. 4572 */ 4573 #if 0 4574 return atomic_long_read(&obj->base.filp->f_count) == 1; 4575 #else 4576 return false; 4577 #endif 4578 } 4579 4580 void i915_gem_free_object(struct drm_gem_object *gem_obj) 4581 { 4582 struct drm_i915_gem_object *obj = to_intel_bo(gem_obj); 4583 struct drm_device *dev = obj->base.dev; 4584 struct drm_i915_private *dev_priv = dev->dev_private; 4585 struct i915_vma *vma, *next; 4586 4587 intel_runtime_pm_get(dev_priv); 4588 4589 trace_i915_gem_object_destroy(obj); 4590 4591 list_for_each_entry_safe(vma, next, &obj->vma_list, vma_link) { 4592 int ret; 4593 4594 vma->pin_count = 0; 4595 ret = i915_vma_unbind(vma); 4596 if (WARN_ON(ret == -ERESTARTSYS)) { 4597 bool was_interruptible; 4598 4599 was_interruptible = dev_priv->mm.interruptible; 4600 dev_priv->mm.interruptible = false; 4601 4602 WARN_ON(i915_vma_unbind(vma)); 4603 4604 dev_priv->mm.interruptible = was_interruptible; 4605 } 4606 } 4607 4608 i915_gem_object_detach_phys(obj); 4609 4610 /* Stolen objects don't hold a ref, but do hold pin count. Fix that up 4611 * before progressing. */ 4612 if (obj->stolen) 4613 i915_gem_object_unpin_pages(obj); 4614 4615 if (WARN_ON(obj->pages_pin_count)) 4616 obj->pages_pin_count = 0; 4617 if (discard_backing_storage(obj)) 4618 obj->madv = I915_MADV_DONTNEED; 4619 i915_gem_object_put_pages(obj); 4620 i915_gem_object_free_mmap_offset(obj); 4621 4622 BUG_ON(obj->pages); 4623 4624 #if 0 4625 if (obj->base.import_attach) 4626 drm_prime_gem_destroy(&obj->base, NULL); 4627 #endif 4628 4629 if (obj->ops->release) 4630 obj->ops->release(obj); 4631 4632 drm_gem_object_release(&obj->base); 4633 i915_gem_info_remove_obj(dev_priv, obj->base.size); 4634 4635 kfree(obj->bit_17); 4636 i915_gem_object_free(obj); 4637 4638 intel_runtime_pm_put(dev_priv); 4639 } 4640 4641 struct i915_vma *i915_gem_obj_to_vma(struct drm_i915_gem_object *obj, 4642 struct i915_address_space *vm) 4643 { 4644 struct i915_vma *vma; 4645 list_for_each_entry(vma, &obj->vma_list, vma_link) 4646 if (vma->vm == vm) 4647 return vma; 4648 4649 return NULL; 4650 } 4651 4652 void i915_gem_vma_destroy(struct i915_vma *vma) 4653 { 4654 WARN_ON(vma->node.allocated); 4655 4656 /* Keep the vma as a placeholder in the execbuffer reservation lists */ 4657 if (!list_empty(&vma->exec_list)) 4658 return; 4659 4660 list_del(&vma->vma_link); 4661 4662 kfree(vma); 4663 } 4664 4665 static void 4666 i915_gem_stop_ringbuffers(struct drm_device *dev) 4667 { 4668 struct drm_i915_private *dev_priv = dev->dev_private; 4669 struct intel_engine_cs *ring; 4670 int i; 4671 4672 for_each_ring(ring, dev_priv, i) 4673 intel_stop_ring_buffer(ring); 4674 } 4675 4676 int 4677 i915_gem_suspend(struct drm_device *dev) 4678 { 4679 struct drm_i915_private *dev_priv = dev->dev_private; 4680 int ret = 0; 4681 4682 mutex_lock(&dev->struct_mutex); 4683 if (dev_priv->ums.mm_suspended) 4684 goto err; 4685 4686 ret = i915_gpu_idle(dev); 4687 if (ret) 4688 goto err; 4689 4690 i915_gem_retire_requests(dev); 4691 4692 /* Under UMS, be paranoid and evict. */ 4693 if (!drm_core_check_feature(dev, DRIVER_MODESET)) 4694 i915_gem_evict_everything(dev); 4695 4696 i915_kernel_lost_context(dev); 4697 i915_gem_stop_ringbuffers(dev); 4698 4699 /* Hack! Don't let anybody do execbuf while we don't control the chip. 4700 * We need to replace this with a semaphore, or something. 4701 * And not confound ums.mm_suspended! 4702 */ 4703 dev_priv->ums.mm_suspended = !drm_core_check_feature(dev, 4704 DRIVER_MODESET); 4705 mutex_unlock(&dev->struct_mutex); 4706 4707 del_timer_sync(&dev_priv->gpu_error.hangcheck_timer); 4708 cancel_delayed_work_sync(&dev_priv->mm.retire_work); 4709 cancel_delayed_work_sync(&dev_priv->mm.idle_work); 4710 4711 return 0; 4712 4713 err: 4714 mutex_unlock(&dev->struct_mutex); 4715 return ret; 4716 } 4717 4718 int i915_gem_l3_remap(struct intel_engine_cs *ring, int slice) 4719 { 4720 struct drm_device *dev = ring->dev; 4721 struct drm_i915_private *dev_priv = dev->dev_private; 4722 u32 reg_base = GEN7_L3LOG_BASE + (slice * 0x200); 4723 u32 *remap_info = dev_priv->l3_parity.remap_info[slice]; 4724 int i, ret; 4725 4726 if (!HAS_L3_DPF(dev) || !remap_info) 4727 return 0; 4728 4729 ret = intel_ring_begin(ring, GEN7_L3LOG_SIZE / 4 * 3); 4730 if (ret) 4731 return ret; 4732 4733 /* 4734 * Note: We do not worry about the concurrent register cacheline hang 4735 * here because no other code should access these registers other than 4736 * at initialization time. 4737 */ 4738 for (i = 0; i < GEN7_L3LOG_SIZE; i += 4) { 4739 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); 4740 intel_ring_emit(ring, reg_base + i); 4741 intel_ring_emit(ring, remap_info[i/4]); 4742 } 4743 4744 intel_ring_advance(ring); 4745 4746 return ret; 4747 } 4748 4749 void i915_gem_init_swizzling(struct drm_device *dev) 4750 { 4751 struct drm_i915_private *dev_priv = dev->dev_private; 4752 4753 if (INTEL_INFO(dev)->gen < 5 || 4754 dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE) 4755 return; 4756 4757 I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) | 4758 DISP_TILE_SURFACE_SWIZZLING); 4759 4760 if (IS_GEN5(dev)) 4761 return; 4762 4763 I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_SWZCTL); 4764 if (IS_GEN6(dev)) 4765 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_SNB)); 4766 else if (IS_GEN7(dev)) 4767 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_IVB)); 4768 else if (IS_GEN8(dev)) 4769 I915_WRITE(GAMTARBMODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_BDW)); 4770 else 4771 BUG(); 4772 } 4773 4774 static bool 4775 intel_enable_blt(struct drm_device *dev) 4776 { 4777 int revision; 4778 4779 if (!HAS_BLT(dev)) 4780 return false; 4781 4782 /* The blitter was dysfunctional on early prototypes */ 4783 revision = pci_read_config(dev->dev, PCIR_REVID, 1); 4784 if (IS_GEN6(dev) && revision < 8) { 4785 DRM_INFO("BLT not supported on this pre-production hardware;" 4786 " graphics performance will be degraded.\n"); 4787 return false; 4788 } 4789 4790 return true; 4791 } 4792 4793 static int i915_gem_init_rings(struct drm_device *dev) 4794 { 4795 struct drm_i915_private *dev_priv = dev->dev_private; 4796 int ret; 4797 4798 ret = intel_init_render_ring_buffer(dev); 4799 if (ret) 4800 return ret; 4801 4802 if (HAS_BSD(dev)) { 4803 ret = intel_init_bsd_ring_buffer(dev); 4804 if (ret) 4805 goto cleanup_render_ring; 4806 } 4807 4808 if (intel_enable_blt(dev)) { 4809 ret = intel_init_blt_ring_buffer(dev); 4810 if (ret) 4811 goto cleanup_bsd_ring; 4812 } 4813 4814 if (HAS_VEBOX(dev)) { 4815 ret = intel_init_vebox_ring_buffer(dev); 4816 if (ret) 4817 goto cleanup_blt_ring; 4818 } 4819 4820 if (HAS_BSD2(dev)) { 4821 ret = intel_init_bsd2_ring_buffer(dev); 4822 if (ret) 4823 goto cleanup_vebox_ring; 4824 } 4825 4826 ret = i915_gem_set_seqno(dev, ((u32)~0 - 0x1000)); 4827 if (ret) 4828 goto cleanup_bsd2_ring; 4829 4830 return 0; 4831 4832 cleanup_bsd2_ring: 4833 intel_cleanup_ring_buffer(&dev_priv->ring[VCS2]); 4834 cleanup_vebox_ring: 4835 intel_cleanup_ring_buffer(&dev_priv->ring[VECS]); 4836 cleanup_blt_ring: 4837 intel_cleanup_ring_buffer(&dev_priv->ring[BCS]); 4838 cleanup_bsd_ring: 4839 intel_cleanup_ring_buffer(&dev_priv->ring[VCS]); 4840 cleanup_render_ring: 4841 intel_cleanup_ring_buffer(&dev_priv->ring[RCS]); 4842 4843 return ret; 4844 } 4845 4846 int 4847 i915_gem_init_hw(struct drm_device *dev) 4848 { 4849 struct drm_i915_private *dev_priv = dev->dev_private; 4850 int ret, i; 4851 4852 #if 0 4853 if (INTEL_INFO(dev)->gen < 6 && !intel_enable_gtt()) 4854 return -EIO; 4855 #endif 4856 4857 if (dev_priv->ellc_size) 4858 I915_WRITE(HSW_IDICR, I915_READ(HSW_IDICR) | IDIHASHMSK(0xf)); 4859 4860 if (IS_HASWELL(dev)) 4861 I915_WRITE(MI_PREDICATE_RESULT_2, IS_HSW_GT3(dev) ? 4862 LOWER_SLICE_ENABLED : LOWER_SLICE_DISABLED); 4863 4864 if (HAS_PCH_NOP(dev)) { 4865 if (IS_IVYBRIDGE(dev)) { 4866 u32 temp = I915_READ(GEN7_MSG_CTL); 4867 temp &= ~(WAIT_FOR_PCH_FLR_ACK | WAIT_FOR_PCH_RESET_ACK); 4868 I915_WRITE(GEN7_MSG_CTL, temp); 4869 } else if (INTEL_INFO(dev)->gen >= 7) { 4870 u32 temp = I915_READ(HSW_NDE_RSTWRN_OPT); 4871 temp &= ~RESET_PCH_HANDSHAKE_ENABLE; 4872 I915_WRITE(HSW_NDE_RSTWRN_OPT, temp); 4873 } 4874 } 4875 4876 i915_gem_init_swizzling(dev); 4877 4878 ret = i915_gem_init_rings(dev); 4879 if (ret) 4880 return ret; 4881 4882 for (i = 0; i < NUM_L3_SLICES(dev); i++) 4883 i915_gem_l3_remap(&dev_priv->ring[RCS], i); 4884 4885 /* 4886 * XXX: Contexts should only be initialized once. Doing a switch to the 4887 * default context switch however is something we'd like to do after 4888 * reset or thaw (the latter may not actually be necessary for HW, but 4889 * goes with our code better). Context switching requires rings (for 4890 * the do_switch), but before enabling PPGTT. So don't move this. 4891 */ 4892 ret = i915_gem_context_enable(dev_priv); 4893 if (ret && ret != -EIO) { 4894 DRM_ERROR("Context enable failed %d\n", ret); 4895 i915_gem_cleanup_ringbuffer(dev); 4896 } 4897 4898 return ret; 4899 } 4900 4901 int i915_gem_init(struct drm_device *dev) 4902 { 4903 struct drm_i915_private *dev_priv = dev->dev_private; 4904 int ret; 4905 4906 mutex_lock(&dev->struct_mutex); 4907 4908 if (IS_VALLEYVIEW(dev)) { 4909 /* VLVA0 (potential hack), BIOS isn't actually waking us */ 4910 I915_WRITE(VLV_GTLC_WAKE_CTRL, VLV_GTLC_ALLOWWAKEREQ); 4911 if (wait_for((I915_READ(VLV_GTLC_PW_STATUS) & 4912 VLV_GTLC_ALLOWWAKEACK), 10)) 4913 DRM_DEBUG_DRIVER("allow wake ack timed out\n"); 4914 } 4915 4916 i915_gem_init_userptr(dev); 4917 i915_gem_init_global_gtt(dev); 4918 4919 ret = i915_gem_context_init(dev); 4920 if (ret) { 4921 mutex_unlock(&dev->struct_mutex); 4922 return ret; 4923 } 4924 4925 ret = i915_gem_init_hw(dev); 4926 if (ret == -EIO) { 4927 /* Allow ring initialisation to fail by marking the GPU as 4928 * wedged. But we only want to do this where the GPU is angry, 4929 * for all other failure, such as an allocation failure, bail. 4930 */ 4931 DRM_ERROR("Failed to initialize GPU, declaring it wedged\n"); 4932 atomic_set_mask(I915_WEDGED, &dev_priv->gpu_error.reset_counter); 4933 ret = 0; 4934 } 4935 mutex_unlock(&dev->struct_mutex); 4936 4937 /* Allow hardware batchbuffers unless told otherwise, but not for KMS. */ 4938 if (!drm_core_check_feature(dev, DRIVER_MODESET)) 4939 dev_priv->dri1.allow_batchbuffer = 1; 4940 return ret; 4941 } 4942 4943 void 4944 i915_gem_cleanup_ringbuffer(struct drm_device *dev) 4945 { 4946 struct drm_i915_private *dev_priv = dev->dev_private; 4947 struct intel_engine_cs *ring; 4948 int i; 4949 4950 for_each_ring(ring, dev_priv, i) 4951 intel_cleanup_ring_buffer(ring); 4952 } 4953 4954 int 4955 i915_gem_entervt_ioctl(struct drm_device *dev, void *data, 4956 struct drm_file *file_priv) 4957 { 4958 struct drm_i915_private *dev_priv = dev->dev_private; 4959 int ret; 4960 4961 if (drm_core_check_feature(dev, DRIVER_MODESET)) 4962 return 0; 4963 4964 if (i915_reset_in_progress(&dev_priv->gpu_error)) { 4965 DRM_ERROR("Reenabling wedged hardware, good luck\n"); 4966 atomic_set(&dev_priv->gpu_error.reset_counter, 0); 4967 } 4968 4969 mutex_lock(&dev->struct_mutex); 4970 dev_priv->ums.mm_suspended = 0; 4971 4972 ret = i915_gem_init_hw(dev); 4973 if (ret != 0) { 4974 mutex_unlock(&dev->struct_mutex); 4975 return ret; 4976 } 4977 4978 BUG_ON(!list_empty(&dev_priv->gtt.base.active_list)); 4979 4980 ret = drm_irq_install(dev, dev->irq); 4981 if (ret) 4982 goto cleanup_ringbuffer; 4983 mutex_unlock(&dev->struct_mutex); 4984 4985 return 0; 4986 4987 cleanup_ringbuffer: 4988 i915_gem_cleanup_ringbuffer(dev); 4989 dev_priv->ums.mm_suspended = 1; 4990 mutex_unlock(&dev->struct_mutex); 4991 4992 return ret; 4993 } 4994 4995 int 4996 i915_gem_leavevt_ioctl(struct drm_device *dev, void *data, 4997 struct drm_file *file_priv) 4998 { 4999 if (drm_core_check_feature(dev, DRIVER_MODESET)) 5000 return 0; 5001 5002 mutex_lock(&dev->struct_mutex); 5003 drm_irq_uninstall(dev); 5004 mutex_unlock(&dev->struct_mutex); 5005 5006 return i915_gem_suspend(dev); 5007 } 5008 5009 void 5010 i915_gem_lastclose(struct drm_device *dev) 5011 { 5012 int ret; 5013 5014 if (drm_core_check_feature(dev, DRIVER_MODESET)) 5015 return; 5016 5017 ret = i915_gem_suspend(dev); 5018 if (ret) 5019 DRM_ERROR("failed to idle hardware: %d\n", ret); 5020 } 5021 5022 static void 5023 init_ring_lists(struct intel_engine_cs *ring) 5024 { 5025 INIT_LIST_HEAD(&ring->active_list); 5026 INIT_LIST_HEAD(&ring->request_list); 5027 } 5028 5029 void i915_init_vm(struct drm_i915_private *dev_priv, 5030 struct i915_address_space *vm) 5031 { 5032 if (!i915_is_ggtt(vm)) 5033 drm_mm_init(&vm->mm, vm->start, vm->total); 5034 vm->dev = dev_priv->dev; 5035 INIT_LIST_HEAD(&vm->active_list); 5036 INIT_LIST_HEAD(&vm->inactive_list); 5037 INIT_LIST_HEAD(&vm->global_link); 5038 list_add_tail(&vm->global_link, &dev_priv->vm_list); 5039 } 5040 5041 void 5042 i915_gem_load(struct drm_device *dev) 5043 { 5044 struct drm_i915_private *dev_priv = dev->dev_private; 5045 int i; 5046 5047 INIT_LIST_HEAD(&dev_priv->vm_list); 5048 i915_init_vm(dev_priv, &dev_priv->gtt.base); 5049 5050 INIT_LIST_HEAD(&dev_priv->context_list); 5051 INIT_LIST_HEAD(&dev_priv->mm.unbound_list); 5052 INIT_LIST_HEAD(&dev_priv->mm.bound_list); 5053 INIT_LIST_HEAD(&dev_priv->mm.fence_list); 5054 for (i = 0; i < I915_NUM_RINGS; i++) 5055 init_ring_lists(&dev_priv->ring[i]); 5056 for (i = 0; i < I915_MAX_NUM_FENCES; i++) 5057 INIT_LIST_HEAD(&dev_priv->fence_regs[i].lru_list); 5058 INIT_DELAYED_WORK(&dev_priv->mm.retire_work, 5059 i915_gem_retire_work_handler); 5060 INIT_DELAYED_WORK(&dev_priv->mm.idle_work, 5061 i915_gem_idle_work_handler); 5062 init_waitqueue_head(&dev_priv->gpu_error.reset_queue); 5063 5064 /* On GEN3 we really need to make sure the ARB C3 LP bit is set */ 5065 if (!drm_core_check_feature(dev, DRIVER_MODESET) && IS_GEN3(dev)) { 5066 I915_WRITE(MI_ARB_STATE, 5067 _MASKED_BIT_ENABLE(MI_ARB_C3_LP_WRITE_ENABLE)); 5068 } 5069 5070 dev_priv->relative_constants_mode = I915_EXEC_CONSTANTS_REL_GENERAL; 5071 5072 /* Old X drivers will take 0-2 for front, back, depth buffers */ 5073 if (!drm_core_check_feature(dev, DRIVER_MODESET)) 5074 dev_priv->fence_reg_start = 3; 5075 5076 if (INTEL_INFO(dev)->gen >= 7 && !IS_VALLEYVIEW(dev)) 5077 dev_priv->num_fence_regs = 32; 5078 else if (INTEL_INFO(dev)->gen >= 4 || IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev)) 5079 dev_priv->num_fence_regs = 16; 5080 else 5081 dev_priv->num_fence_regs = 8; 5082 5083 /* Initialize fence registers to zero */ 5084 INIT_LIST_HEAD(&dev_priv->mm.fence_list); 5085 i915_gem_restore_fences(dev); 5086 5087 i915_gem_detect_bit_6_swizzle(dev); 5088 init_waitqueue_head(&dev_priv->pending_flip_queue); 5089 5090 dev_priv->mm.interruptible = true; 5091 5092 #if 0 5093 dev_priv->mm.inactive_shrinker.scan_objects = i915_gem_inactive_scan; 5094 dev_priv->mm.inactive_shrinker.count_objects = i915_gem_inactive_count; 5095 dev_priv->mm.inactive_shrinker.seeks = DEFAULT_SEEKS; 5096 register_shrinker(&dev_priv->mm.inactive_shrinker); 5097 /* Old FreeBSD code */ 5098 dev_priv->mm.inactive_shrinker = EVENTHANDLER_REGISTER(vm_lowmem, 5099 i915_gem_inactive_shrink, dev, EVENTHANDLER_PRI_ANY); 5100 #endif 5101 } 5102 5103 void i915_gem_release(struct drm_device *dev, struct drm_file *file) 5104 { 5105 struct drm_i915_file_private *file_priv = file->driver_priv; 5106 5107 cancel_delayed_work_sync(&file_priv->mm.idle_work); 5108 5109 /* Clean up our request list when the client is going away, so that 5110 * later retire_requests won't dereference our soon-to-be-gone 5111 * file_priv. 5112 */ 5113 spin_lock(&file_priv->mm.lock); 5114 while (!list_empty(&file_priv->mm.request_list)) { 5115 struct drm_i915_gem_request *request; 5116 5117 request = list_first_entry(&file_priv->mm.request_list, 5118 struct drm_i915_gem_request, 5119 client_list); 5120 list_del(&request->client_list); 5121 request->file_priv = NULL; 5122 } 5123 spin_unlock(&file_priv->mm.lock); 5124 } 5125 5126 int 5127 i915_gem_pager_ctor(void *handle, vm_ooffset_t size, vm_prot_t prot, 5128 vm_ooffset_t foff, struct ucred *cred, u_short *color) 5129 { 5130 *color = 0; /* XXXKIB */ 5131 return (0); 5132 } 5133 5134 void 5135 i915_gem_pager_dtor(void *handle) 5136 { 5137 struct drm_gem_object *obj; 5138 struct drm_device *dev; 5139 5140 obj = handle; 5141 dev = obj->dev; 5142 5143 mutex_lock(&dev->struct_mutex); 5144 drm_gem_free_mmap_offset(obj); 5145 i915_gem_release_mmap(to_intel_bo(obj)); 5146 drm_gem_object_unreference(obj); 5147 mutex_unlock(&dev->struct_mutex); 5148 } 5149 5150 static void 5151 i915_gem_file_idle_work_handler(struct work_struct *work) 5152 { 5153 struct drm_i915_file_private *file_priv = 5154 container_of(work, typeof(*file_priv), mm.idle_work.work); 5155 5156 atomic_set(&file_priv->rps_wait_boost, false); 5157 } 5158 5159 int i915_gem_open(struct drm_device *dev, struct drm_file *file) 5160 { 5161 struct drm_i915_file_private *file_priv; 5162 int ret; 5163 5164 DRM_DEBUG_DRIVER("\n"); 5165 5166 file_priv = kzalloc(sizeof(*file_priv), GFP_KERNEL); 5167 if (!file_priv) 5168 return -ENOMEM; 5169 5170 file->driver_priv = file_priv; 5171 file_priv->dev_priv = dev->dev_private; 5172 file_priv->file = file; 5173 5174 spin_init(&file_priv->mm.lock, "i915_priv"); 5175 INIT_LIST_HEAD(&file_priv->mm.request_list); 5176 INIT_DELAYED_WORK(&file_priv->mm.idle_work, 5177 i915_gem_file_idle_work_handler); 5178 5179 ret = i915_gem_context_open(dev, file); 5180 if (ret) 5181 kfree(file_priv); 5182 5183 return ret; 5184 } 5185 5186 #if 0 5187 static bool mutex_is_locked_by(struct mutex *mutex, struct task_struct *task) 5188 { 5189 if (!mutex_is_locked(mutex)) 5190 return false; 5191 5192 #if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_MUTEXES) 5193 return mutex->owner == task; 5194 #else 5195 /* Since UP may be pre-empted, we cannot assume that we own the lock */ 5196 return false; 5197 #endif 5198 } 5199 #endif 5200 5201 #if 0 5202 static bool i915_gem_shrinker_lock(struct drm_device *dev, bool *unlock) 5203 { 5204 if (!mutex_trylock(&dev->struct_mutex)) { 5205 if (!mutex_is_locked_by(&dev->struct_mutex, current)) 5206 return false; 5207 5208 if (to_i915(dev)->mm.shrinker_no_lock_stealing) 5209 return false; 5210 5211 *unlock = false; 5212 } else 5213 *unlock = true; 5214 5215 return true; 5216 } 5217 5218 static int num_vma_bound(struct drm_i915_gem_object *obj) 5219 { 5220 struct i915_vma *vma; 5221 int count = 0; 5222 5223 list_for_each_entry(vma, &obj->vma_list, vma_link) 5224 if (drm_mm_node_allocated(&vma->node)) 5225 count++; 5226 5227 return count; 5228 } 5229 5230 static unsigned long 5231 i915_gem_inactive_count(struct shrinker *shrinker, struct shrink_control *sc) 5232 { 5233 struct drm_i915_private *dev_priv = 5234 container_of(shrinker, 5235 struct drm_i915_private, 5236 mm.inactive_shrinker); 5237 struct drm_device *dev = dev_priv->dev; 5238 struct drm_i915_gem_object *obj; 5239 unsigned long count; 5240 bool unlock; 5241 5242 if (!i915_gem_shrinker_lock(dev, &unlock)) 5243 return 0; 5244 5245 count = 0; 5246 list_for_each_entry(obj, &dev_priv->mm.unbound_list, global_list) 5247 if (obj->pages_pin_count == 0) 5248 count += obj->base.size >> PAGE_SHIFT; 5249 5250 list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) { 5251 if (!i915_gem_obj_is_pinned(obj) && 5252 obj->pages_pin_count == num_vma_bound(obj)) 5253 count += obj->base.size >> PAGE_SHIFT; 5254 } 5255 5256 if (unlock) 5257 mutex_unlock(&dev->struct_mutex); 5258 5259 return count; 5260 } 5261 #endif 5262 5263 /* All the new VM stuff */ 5264 unsigned long i915_gem_obj_offset(struct drm_i915_gem_object *o, 5265 struct i915_address_space *vm) 5266 { 5267 struct drm_i915_private *dev_priv = o->base.dev->dev_private; 5268 struct i915_vma *vma; 5269 5270 if (!dev_priv->mm.aliasing_ppgtt || 5271 vm == &dev_priv->mm.aliasing_ppgtt->base) 5272 vm = &dev_priv->gtt.base; 5273 5274 BUG_ON(list_empty(&o->vma_list)); 5275 list_for_each_entry(vma, &o->vma_list, vma_link) { 5276 if (vma->vm == vm) 5277 return vma->node.start; 5278 5279 } 5280 return -1; 5281 } 5282 5283 bool i915_gem_obj_bound(struct drm_i915_gem_object *o, 5284 struct i915_address_space *vm) 5285 { 5286 struct i915_vma *vma; 5287 5288 list_for_each_entry(vma, &o->vma_list, vma_link) 5289 if (vma->vm == vm && drm_mm_node_allocated(&vma->node)) 5290 return true; 5291 5292 return false; 5293 } 5294 5295 bool i915_gem_obj_bound_any(struct drm_i915_gem_object *o) 5296 { 5297 struct i915_vma *vma; 5298 5299 list_for_each_entry(vma, &o->vma_list, vma_link) 5300 if (drm_mm_node_allocated(&vma->node)) 5301 return true; 5302 5303 return false; 5304 } 5305 5306 unsigned long i915_gem_obj_size(struct drm_i915_gem_object *o, 5307 struct i915_address_space *vm) 5308 { 5309 struct drm_i915_private *dev_priv = o->base.dev->dev_private; 5310 struct i915_vma *vma; 5311 5312 if (!dev_priv->mm.aliasing_ppgtt || 5313 vm == &dev_priv->mm.aliasing_ppgtt->base) 5314 vm = &dev_priv->gtt.base; 5315 5316 BUG_ON(list_empty(&o->vma_list)); 5317 5318 list_for_each_entry(vma, &o->vma_list, vma_link) 5319 if (vma->vm == vm) 5320 return vma->node.size; 5321 5322 return 0; 5323 } 5324 5325 #if 0 5326 static unsigned long 5327 i915_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc) 5328 { 5329 struct drm_i915_private *dev_priv = 5330 container_of(shrinker, struct drm_i915_private, mm.shrinker); 5331 struct drm_device *dev = dev_priv->dev; 5332 unsigned long freed; 5333 bool unlock; 5334 5335 if (!i915_gem_shrinker_lock(dev, &unlock)) 5336 return SHRINK_STOP; 5337 5338 freed = i915_gem_purge(dev_priv, sc->nr_to_scan); 5339 if (freed < sc->nr_to_scan) 5340 freed += __i915_gem_shrink(dev_priv, 5341 sc->nr_to_scan - freed, 5342 false); 5343 if (freed < sc->nr_to_scan) 5344 freed += i915_gem_shrink_all(dev_priv); 5345 5346 if (unlock) 5347 mutex_unlock(&dev->struct_mutex); 5348 5349 return freed; 5350 } 5351 #endif 5352 5353 struct i915_vma *i915_gem_obj_to_ggtt(struct drm_i915_gem_object *obj) 5354 { 5355 struct i915_vma *vma; 5356 5357 /* This WARN has probably outlived its usefulness (callers already 5358 * WARN if they don't find the GGTT vma they expect). When removing, 5359 * remember to remove the pre-check in is_pin_display() as well */ 5360 if (WARN_ON(list_empty(&obj->vma_list))) 5361 return NULL; 5362 5363 vma = list_first_entry(&obj->vma_list, typeof(*vma), vma_link); 5364 if (vma->vm != obj_to_ggtt(obj)) 5365 return NULL; 5366 5367 return vma; 5368 } 5369