1 /* $NetBSD: i915_gem.c,v 1.61 2020/02/23 15:46:40 ad Exp $ */ 2 3 /* 4 * Copyright © 2008-2015 Intel Corporation 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the "Software"), 8 * to deal in the Software without restriction, including without limitation 9 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 10 * and/or sell copies of the Software, and to permit persons to whom the 11 * Software is furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice (including the next 14 * paragraph) shall be included in all copies or substantial portions of the 15 * Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 22 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 23 * IN THE SOFTWARE. 24 * 25 * Authors: 26 * Eric Anholt <eric@anholt.net> 27 * 28 */ 29 30 #include <sys/cdefs.h> 31 __KERNEL_RCSID(0, "$NetBSD: i915_gem.c,v 1.61 2020/02/23 15:46:40 ad Exp $"); 32 33 #ifdef __NetBSD__ 34 #if 0 /* XXX uvmhist option? */ 35 #include "opt_uvmhist.h" 36 #endif 37 38 #include <sys/types.h> 39 #include <sys/param.h> 40 41 #include <uvm/uvm.h> 42 #include <uvm/uvm_extern.h> 43 #include <uvm/uvm_fault.h> 44 #include <uvm/uvm_page.h> 45 #include <uvm/uvm_pmap.h> 46 #include <uvm/uvm_prot.h> 47 48 #include <drm/bus_dma_hacks.h> 49 #endif 50 51 #include <drm/drmP.h> 52 #include <drm/drm_vma_manager.h> 53 #include <drm/i915_drm.h> 54 #include "i915_drv.h" 55 #include "i915_vgpu.h" 56 #include "i915_trace.h" 57 #include "intel_drv.h" 58 #include <linux/shmem_fs.h> 59 #include <linux/slab.h> 60 #include <linux/swap.h> 61 #include <linux/pci.h> 62 #include <linux/dma-buf.h> 63 #include <asm/page.h> 64 #include <asm/cpufeature.h> 65 66 #include <linux/nbsd-namespace.h> 67 68 #define RQ_BUG_ON(expr) 69 70 static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj); 71 static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj); 72 static void 73 i915_gem_object_retire__write(struct drm_i915_gem_object *obj); 74 static void 75 i915_gem_object_retire__read(struct drm_i915_gem_object *obj, int ring); 76 77 static bool cpu_cache_is_coherent(struct drm_device *dev, 78 enum i915_cache_level level) 79 { 80 return HAS_LLC(dev) || level != I915_CACHE_NONE; 81 } 82 83 static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj) 84 { 85 if (!cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) 86 return true; 87 88 return obj->pin_display; 89 } 90 91 /* some bookkeeping */ 92 static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv, 93 size_t size) 94 { 95 spin_lock(&dev_priv->mm.object_stat_lock); 96 dev_priv->mm.object_count++; 97 dev_priv->mm.object_memory += size; 98 spin_unlock(&dev_priv->mm.object_stat_lock); 99 } 100 101 static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv, 102 size_t size) 103 { 104 spin_lock(&dev_priv->mm.object_stat_lock); 105 dev_priv->mm.object_count--; 106 dev_priv->mm.object_memory -= size; 107 spin_unlock(&dev_priv->mm.object_stat_lock); 108 } 109 110 static int 111 i915_gem_wait_for_error(struct i915_gpu_error *error) 112 { 113 int ret; 114 115 #define EXIT_COND (!i915_reset_in_progress(error) || \ 116 i915_terminally_wedged(error)) 117 if (EXIT_COND) 118 return 0; 119 120 /* 121 * Only wait 10 seconds for the gpu reset to complete to avoid hanging 122 * userspace. If it takes that long something really bad is going on and 123 * we should simply try to bail out and fail as gracefully as possible. 124 */ 125 #ifdef __NetBSD__ 126 spin_lock(&error->reset_lock); 127 DRM_SPIN_TIMED_WAIT_UNTIL(ret, &error->reset_queue, &error->reset_lock, 128 10*HZ, EXIT_COND); 129 spin_unlock(&error->reset_lock); 130 #else 131 ret = wait_event_interruptible_timeout(error->reset_queue, 132 EXIT_COND, 133 10*HZ); 134 #endif 135 if (ret == 0) { 136 DRM_ERROR("Timed out waiting for the gpu reset to complete\n"); 137 return -EIO; 138 } else if (ret < 0) { 139 return ret; 140 } 141 #undef EXIT_COND 142 143 return 0; 144 } 145 146 int i915_mutex_lock_interruptible(struct drm_device *dev) 147 { 148 struct drm_i915_private *dev_priv = dev->dev_private; 149 int ret; 150 151 ret = i915_gem_wait_for_error(&dev_priv->gpu_error); 152 if (ret) 153 return ret; 154 155 ret = mutex_lock_interruptible(&dev->struct_mutex); 156 if (ret) 157 return ret; 158 159 WARN_ON(i915_verify_lists(dev)); 160 return 0; 161 } 162 163 int 164 i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, 165 struct drm_file *file) 166 { 167 struct drm_i915_private *dev_priv = dev->dev_private; 168 struct drm_i915_gem_get_aperture *args = data; 169 struct i915_gtt *ggtt = &dev_priv->gtt; 170 struct i915_vma *vma; 171 size_t pinned; 172 173 pinned = 0; 174 mutex_lock(&dev->struct_mutex); 175 list_for_each_entry(vma, &ggtt->base.active_list, mm_list) 176 if (vma->pin_count) 177 pinned += vma->node.size; 178 list_for_each_entry(vma, &ggtt->base.inactive_list, mm_list) 179 if (vma->pin_count) 180 pinned += vma->node.size; 181 mutex_unlock(&dev->struct_mutex); 182 183 args->aper_size = dev_priv->gtt.base.total; 184 args->aper_available_size = args->aper_size - pinned; 185 186 return 0; 187 } 188 189 static int 190 i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj) 191 { 192 #ifndef __NetBSD__ 193 struct address_space *mapping = file_inode(obj->base.filp)->i_mapping; 194 #endif 195 char *vaddr = obj->phys_handle->vaddr; 196 #ifndef __NetBSD__ 197 struct sg_table *st; 198 struct scatterlist *sg; 199 #endif 200 int i; 201 202 if (WARN_ON(i915_gem_object_needs_bit17_swizzle(obj))) 203 return -EINVAL; 204 205 for (i = 0; i < obj->base.size / PAGE_SIZE; i++) { 206 struct page *page; 207 char *src; 208 209 #ifdef __NetBSD__ 210 struct pglist pages = TAILQ_HEAD_INITIALIZER(pages); 211 int ret; 212 /* XXX errno NetBSD->Linux */ 213 ret = -uvm_obj_wirepages(obj->base.filp, i*PAGE_SIZE, 214 (i + 1)*PAGE_SIZE, &pages); 215 if (ret) 216 return ret; 217 page = container_of(TAILQ_FIRST(&pages), struct page, p_vmp); 218 #else 219 page = shmem_read_mapping_page(mapping, i); 220 if (IS_ERR(page)) 221 return PTR_ERR(page); 222 #endif 223 224 src = kmap_atomic(page); 225 memcpy(vaddr, src, PAGE_SIZE); 226 drm_clflush_virt_range(vaddr, PAGE_SIZE); 227 kunmap_atomic(src); 228 229 #ifdef __NetBSD__ 230 uvm_obj_unwirepages(obj->base.filp, i*PAGE_SIZE, 231 (i + 1)*PAGE_SIZE); 232 #else 233 page_cache_release(page); 234 #endif 235 vaddr += PAGE_SIZE; 236 } 237 238 i915_gem_chipset_flush(obj->base.dev); 239 240 #ifdef __NetBSD__ 241 obj->pages = obj->phys_handle->dmah_map; 242 #else 243 st = kmalloc(sizeof(*st), GFP_KERNEL); 244 if (st == NULL) 245 return -ENOMEM; 246 247 if (sg_alloc_table(st, 1, GFP_KERNEL)) { 248 kfree(st); 249 return -ENOMEM; 250 } 251 252 sg = st->sgl; 253 sg->offset = 0; 254 sg->length = obj->base.size; 255 256 sg_dma_address(sg) = obj->phys_handle->busaddr; 257 sg_dma_len(sg) = obj->base.size; 258 259 obj->pages = st; 260 #endif 261 return 0; 262 } 263 264 static void 265 i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj) 266 { 267 int ret; 268 269 BUG_ON(obj->madv == __I915_MADV_PURGED); 270 271 ret = i915_gem_object_set_to_cpu_domain(obj, true); 272 if (ret) { 273 /* In the event of a disaster, abandon all caches and 274 * hope for the best. 275 */ 276 WARN_ON(ret != -EIO); 277 obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU; 278 } 279 280 if (obj->madv == I915_MADV_DONTNEED) 281 obj->dirty = 0; 282 283 if (obj->dirty) { 284 #ifndef __NetBSD__ 285 struct address_space *mapping = file_inode(obj->base.filp)->i_mapping; 286 #endif 287 const char *vaddr = obj->phys_handle->vaddr; 288 int i; 289 290 for (i = 0; i < obj->base.size / PAGE_SIZE; i++) { 291 struct page *page; 292 char *dst; 293 294 #ifdef __NetBSD__ 295 struct pglist pages = TAILQ_HEAD_INITIALIZER(pages); 296 /* XXX errno NetBSD->Linux */ 297 ret = -uvm_obj_wirepages(obj->base.filp, 298 i*PAGE_SIZE, (i + 1)*PAGE_SIZE, &pages); 299 if (ret) 300 continue; 301 page = container_of(TAILQ_FIRST(&pages), struct page, 302 p_vmp); 303 #endif 304 305 dst = kmap_atomic(page); 306 drm_clflush_virt_range(vaddr, PAGE_SIZE); 307 memcpy(dst, vaddr, PAGE_SIZE); 308 kunmap_atomic(dst); 309 310 set_page_dirty(page); 311 #ifdef __NetBSD__ 312 /* XXX mark page accessed */ 313 uvm_obj_unwirepages(obj->base.filp, i*PAGE_SIZE, 314 (i+1)*PAGE_SIZE); 315 #else 316 if (obj->madv == I915_MADV_WILLNEED) 317 mark_page_accessed(page); 318 page_cache_release(page); 319 #endif 320 vaddr += PAGE_SIZE; 321 } 322 obj->dirty = 0; 323 } 324 325 #ifdef __NetBSD__ 326 obj->pages = NULL; 327 #else 328 sg_free_table(obj->pages); 329 kfree(obj->pages); 330 #endif 331 } 332 333 static void 334 i915_gem_object_release_phys(struct drm_i915_gem_object *obj) 335 { 336 drm_pci_free(obj->base.dev, obj->phys_handle); 337 } 338 339 static const struct drm_i915_gem_object_ops i915_gem_phys_ops = { 340 .get_pages = i915_gem_object_get_pages_phys, 341 .put_pages = i915_gem_object_put_pages_phys, 342 .release = i915_gem_object_release_phys, 343 }; 344 345 static int 346 drop_pages(struct drm_i915_gem_object *obj) 347 { 348 struct i915_vma *vma, *next; 349 int ret; 350 351 drm_gem_object_reference(&obj->base); 352 list_for_each_entry_safe(vma, next, &obj->vma_list, vma_link) 353 if (i915_vma_unbind(vma)) 354 break; 355 356 ret = i915_gem_object_put_pages(obj); 357 drm_gem_object_unreference(&obj->base); 358 359 return ret; 360 } 361 362 int 363 i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, 364 int align) 365 { 366 drm_dma_handle_t *phys; 367 int ret; 368 369 if (obj->phys_handle) { 370 if ((unsigned long)obj->phys_handle->vaddr & (align -1)) 371 return -EBUSY; 372 373 return 0; 374 } 375 376 if (obj->madv != I915_MADV_WILLNEED) 377 return -EFAULT; 378 379 if (obj->base.filp == NULL) 380 return -EINVAL; 381 382 ret = drop_pages(obj); 383 if (ret) 384 return ret; 385 386 /* create a new object */ 387 phys = drm_pci_alloc(obj->base.dev, obj->base.size, align); 388 if (!phys) 389 return -ENOMEM; 390 391 obj->phys_handle = phys; 392 obj->ops = &i915_gem_phys_ops; 393 394 return i915_gem_object_get_pages(obj); 395 } 396 397 static int 398 i915_gem_phys_pwrite(struct drm_i915_gem_object *obj, 399 struct drm_i915_gem_pwrite *args, 400 struct drm_file *file_priv) 401 { 402 struct drm_device *dev = obj->base.dev; 403 void *vaddr = obj->phys_handle->vaddr + args->offset; 404 char __user *user_data = to_user_ptr(args->data_ptr); 405 int ret = 0; 406 407 /* We manually control the domain here and pretend that it 408 * remains coherent i.e. in the GTT domain, like shmem_pwrite. 409 */ 410 ret = i915_gem_object_wait_rendering(obj, false); 411 if (ret) 412 return ret; 413 414 intel_fb_obj_invalidate(obj, ORIGIN_CPU); 415 if (__copy_from_user_inatomic_nocache(vaddr, user_data, args->size)) { 416 unsigned long unwritten; 417 418 /* The physical object once assigned is fixed for the lifetime 419 * of the obj, so we can safely drop the lock and continue 420 * to access vaddr. 421 */ 422 mutex_unlock(&dev->struct_mutex); 423 unwritten = copy_from_user(vaddr, user_data, args->size); 424 mutex_lock(&dev->struct_mutex); 425 if (unwritten) { 426 ret = -EFAULT; 427 goto out; 428 } 429 } 430 431 drm_clflush_virt_range(vaddr, args->size); 432 i915_gem_chipset_flush(dev); 433 434 out: 435 intel_fb_obj_flush(obj, false, ORIGIN_CPU); 436 return ret; 437 } 438 439 void *i915_gem_object_alloc(struct drm_device *dev) 440 { 441 struct drm_i915_private *dev_priv = dev->dev_private; 442 return kmem_cache_zalloc(dev_priv->objects, GFP_KERNEL); 443 } 444 445 void i915_gem_object_free(struct drm_i915_gem_object *obj) 446 { 447 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 448 kmem_cache_free(dev_priv->objects, obj); 449 } 450 451 static int 452 i915_gem_create(struct drm_file *file, 453 struct drm_device *dev, 454 uint64_t size, 455 uint32_t *handle_p) 456 { 457 struct drm_i915_gem_object *obj; 458 int ret; 459 u32 handle; 460 461 size = roundup(size, PAGE_SIZE); 462 if (size == 0) 463 return -EINVAL; 464 465 /* Allocate the new object */ 466 obj = i915_gem_alloc_object(dev, size); 467 if (obj == NULL) 468 return -ENOMEM; 469 470 ret = drm_gem_handle_create(file, &obj->base, &handle); 471 /* drop reference from allocate - handle holds it now */ 472 drm_gem_object_unreference_unlocked(&obj->base); 473 if (ret) 474 return ret; 475 476 *handle_p = handle; 477 return 0; 478 } 479 480 int 481 i915_gem_dumb_create(struct drm_file *file, 482 struct drm_device *dev, 483 struct drm_mode_create_dumb *args) 484 { 485 /* have to work out size/pitch and return them */ 486 args->pitch = ALIGN(args->width * DIV_ROUND_UP(args->bpp, 8), 64); 487 args->size = args->pitch * args->height; 488 return i915_gem_create(file, dev, 489 args->size, &args->handle); 490 } 491 492 /** 493 * Creates a new mm object and returns a handle to it. 494 */ 495 int 496 i915_gem_create_ioctl(struct drm_device *dev, void *data, 497 struct drm_file *file) 498 { 499 struct drm_i915_gem_create *args = data; 500 501 return i915_gem_create(file, dev, 502 args->size, &args->handle); 503 } 504 505 static inline int 506 __copy_to_user_swizzled(char __user *cpu_vaddr, 507 const char *gpu_vaddr, int gpu_offset, 508 int length) 509 { 510 int ret, cpu_offset = 0; 511 512 while (length > 0) { 513 int cacheline_end = ALIGN(gpu_offset + 1, 64); 514 int this_length = min(cacheline_end - gpu_offset, length); 515 int swizzled_gpu_offset = gpu_offset ^ 64; 516 517 ret = __copy_to_user(cpu_vaddr + cpu_offset, 518 gpu_vaddr + swizzled_gpu_offset, 519 this_length); 520 if (ret) 521 return ret + length; 522 523 cpu_offset += this_length; 524 gpu_offset += this_length; 525 length -= this_length; 526 } 527 528 return 0; 529 } 530 531 static inline int 532 __copy_from_user_swizzled(char *gpu_vaddr, int gpu_offset, 533 const char __user *cpu_vaddr, 534 int length) 535 { 536 int ret, cpu_offset = 0; 537 538 while (length > 0) { 539 int cacheline_end = ALIGN(gpu_offset + 1, 64); 540 int this_length = min(cacheline_end - gpu_offset, length); 541 int swizzled_gpu_offset = gpu_offset ^ 64; 542 543 ret = __copy_from_user(gpu_vaddr + swizzled_gpu_offset, 544 cpu_vaddr + cpu_offset, 545 this_length); 546 if (ret) 547 return ret + length; 548 549 cpu_offset += this_length; 550 gpu_offset += this_length; 551 length -= this_length; 552 } 553 554 return 0; 555 } 556 557 /* 558 * Pins the specified object's pages and synchronizes the object with 559 * GPU accesses. Sets needs_clflush to non-zero if the caller should 560 * flush the object from the CPU cache. 561 */ 562 int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj, 563 int *needs_clflush) 564 { 565 int ret; 566 567 *needs_clflush = 0; 568 569 if (!obj->base.filp) 570 return -EINVAL; 571 572 if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)) { 573 /* If we're not in the cpu read domain, set ourself into the gtt 574 * read domain and manually flush cachelines (if required). This 575 * optimizes for the case when the gpu will dirty the data 576 * anyway again before the next pread happens. */ 577 *needs_clflush = !cpu_cache_is_coherent(obj->base.dev, 578 obj->cache_level); 579 ret = i915_gem_object_wait_rendering(obj, true); 580 if (ret) 581 return ret; 582 } 583 584 ret = i915_gem_object_get_pages(obj); 585 if (ret) 586 return ret; 587 588 i915_gem_object_pin_pages(obj); 589 590 return ret; 591 } 592 593 /* Per-page copy function for the shmem pread fastpath. 594 * Flushes invalid cachelines before reading the target if 595 * needs_clflush is set. */ 596 static int 597 shmem_pread_fast(struct page *page, int shmem_page_offset, int page_length, 598 char __user *user_data, 599 bool page_do_bit17_swizzling, bool needs_clflush) 600 { 601 #ifdef __NetBSD__ /* XXX atomic shmem fast path */ 602 return -EFAULT; 603 #else 604 char *vaddr; 605 int ret; 606 607 if (unlikely(page_do_bit17_swizzling)) 608 return -EINVAL; 609 610 vaddr = kmap_atomic(page); 611 if (needs_clflush) 612 drm_clflush_virt_range(vaddr + shmem_page_offset, 613 page_length); 614 ret = __copy_to_user_inatomic(user_data, 615 vaddr + shmem_page_offset, 616 page_length); 617 kunmap_atomic(vaddr); 618 619 return ret ? -EFAULT : 0; 620 #endif 621 } 622 623 static void 624 shmem_clflush_swizzled_range(char *addr, unsigned long length, 625 bool swizzled) 626 { 627 if (unlikely(swizzled)) { 628 unsigned long start = (unsigned long) addr; 629 unsigned long end = (unsigned long) addr + length; 630 631 /* For swizzling simply ensure that we always flush both 632 * channels. Lame, but simple and it works. Swizzled 633 * pwrite/pread is far from a hotpath - current userspace 634 * doesn't use it at all. */ 635 start = round_down(start, 128); 636 end = round_up(end, 128); 637 638 drm_clflush_virt_range((void *)start, end - start); 639 } else { 640 drm_clflush_virt_range(addr, length); 641 } 642 643 } 644 645 /* Only difference to the fast-path function is that this can handle bit17 646 * and uses non-atomic copy and kmap functions. */ 647 static int 648 shmem_pread_slow(struct page *page, int shmem_page_offset, int page_length, 649 char __user *user_data, 650 bool page_do_bit17_swizzling, bool needs_clflush) 651 { 652 char *vaddr; 653 int ret; 654 655 vaddr = kmap(page); 656 if (needs_clflush) 657 shmem_clflush_swizzled_range(vaddr + shmem_page_offset, 658 page_length, 659 page_do_bit17_swizzling); 660 661 if (page_do_bit17_swizzling) 662 ret = __copy_to_user_swizzled(user_data, 663 vaddr, shmem_page_offset, 664 page_length); 665 else 666 ret = __copy_to_user(user_data, 667 vaddr + shmem_page_offset, 668 page_length); 669 kunmap(page); 670 671 return ret ? - EFAULT : 0; 672 } 673 674 static int 675 i915_gem_shmem_pread(struct drm_device *dev, 676 struct drm_i915_gem_object *obj, 677 struct drm_i915_gem_pread *args, 678 struct drm_file *file) 679 { 680 char __user *user_data; 681 ssize_t remain; 682 loff_t offset; 683 int shmem_page_offset, page_length, ret = 0; 684 int obj_do_bit17_swizzling, page_do_bit17_swizzling; 685 #ifndef __NetBSD__ /* XXX */ 686 int prefaulted = 0; 687 #endif 688 int needs_clflush = 0; 689 #ifndef __NetBSD__ 690 struct sg_page_iter sg_iter; 691 #endif 692 693 user_data = to_user_ptr(args->data_ptr); 694 remain = args->size; 695 696 obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj); 697 698 ret = i915_gem_obj_prepare_shmem_read(obj, &needs_clflush); 699 if (ret) 700 return ret; 701 702 offset = args->offset; 703 704 #ifdef __NetBSD__ 705 while (0 < remain) 706 #else 707 for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 708 offset >> PAGE_SHIFT) 709 #endif 710 { 711 #ifdef __NetBSD__ 712 struct page *const page = i915_gem_object_get_page(obj, 713 atop(offset)); 714 #else 715 struct page *page = sg_page_iter_page(&sg_iter); 716 717 if (remain <= 0) 718 break; 719 #endif 720 721 /* Operation in this page 722 * 723 * shmem_page_offset = offset within page in shmem file 724 * page_length = bytes to copy for this page 725 */ 726 shmem_page_offset = offset_in_page(offset); 727 page_length = remain; 728 if ((shmem_page_offset + page_length) > PAGE_SIZE) 729 page_length = PAGE_SIZE - shmem_page_offset; 730 731 page_do_bit17_swizzling = obj_do_bit17_swizzling && 732 (page_to_phys(page) & (1 << 17)) != 0; 733 734 ret = shmem_pread_fast(page, shmem_page_offset, page_length, 735 user_data, page_do_bit17_swizzling, 736 needs_clflush); 737 if (ret == 0) 738 goto next_page; 739 740 mutex_unlock(&dev->struct_mutex); 741 #ifndef __NetBSD__ 742 if (likely(!i915.prefault_disable) && !prefaulted) { 743 ret = fault_in_multipages_writeable(user_data, remain); 744 /* Userspace is tricking us, but we've already clobbered 745 * its pages with the prefault and promised to write the 746 * data up to the first fault. Hence ignore any errors 747 * and just continue. */ 748 (void)ret; 749 prefaulted = 1; 750 } 751 #endif 752 ret = shmem_pread_slow(page, shmem_page_offset, page_length, 753 user_data, page_do_bit17_swizzling, 754 needs_clflush); 755 756 mutex_lock(&dev->struct_mutex); 757 758 if (ret) 759 goto out; 760 761 next_page: 762 remain -= page_length; 763 user_data += page_length; 764 offset += page_length; 765 } 766 767 out: 768 i915_gem_object_unpin_pages(obj); 769 770 return ret; 771 } 772 773 /** 774 * Reads data from the object referenced by handle. 775 * 776 * On error, the contents of *data are undefined. 777 */ 778 int 779 i915_gem_pread_ioctl(struct drm_device *dev, void *data, 780 struct drm_file *file) 781 { 782 struct drm_i915_gem_pread *args = data; 783 struct drm_gem_object *gobj; 784 struct drm_i915_gem_object *obj; 785 int ret = 0; 786 787 if (args->size == 0) 788 return 0; 789 790 if (!access_ok(VERIFY_WRITE, 791 to_user_ptr(args->data_ptr), 792 args->size)) 793 return -EFAULT; 794 795 ret = i915_mutex_lock_interruptible(dev); 796 if (ret) 797 return ret; 798 799 gobj = drm_gem_object_lookup(dev, file, args->handle); 800 if (gobj == NULL) { 801 ret = -ENOENT; 802 goto unlock; 803 } 804 obj = to_intel_bo(gobj); 805 806 /* Bounds check source. */ 807 if (args->offset > obj->base.size || 808 args->size > obj->base.size - args->offset) { 809 ret = -EINVAL; 810 goto out; 811 } 812 813 /* prime objects have no backing filp to GEM pread/pwrite 814 * pages from. 815 */ 816 if (!obj->base.filp) { 817 ret = -EINVAL; 818 goto out; 819 } 820 821 trace_i915_gem_object_pread(obj, args->offset, args->size); 822 823 ret = i915_gem_shmem_pread(dev, obj, args, file); 824 825 out: 826 drm_gem_object_unreference(&obj->base); 827 unlock: 828 mutex_unlock(&dev->struct_mutex); 829 return ret; 830 } 831 832 /* This is the fast write path which cannot handle 833 * page faults in the source data 834 */ 835 836 static inline int 837 fast_user_write(struct io_mapping *mapping, 838 loff_t page_base, int page_offset, 839 char __user *user_data, 840 int length) 841 { 842 #ifdef __NetBSD__ /* XXX atomic shmem fast path */ 843 return -EFAULT; 844 #else 845 void __iomem *vaddr_atomic; 846 void *vaddr; 847 unsigned long unwritten; 848 849 vaddr_atomic = io_mapping_map_atomic_wc(mapping, page_base); 850 /* We can use the cpu mem copy function because this is X86. */ 851 vaddr = (void __force*)vaddr_atomic + page_offset; 852 unwritten = __copy_from_user_inatomic_nocache(vaddr, 853 user_data, length); 854 io_mapping_unmap_atomic(vaddr_atomic); 855 return unwritten; 856 #endif 857 } 858 859 /** 860 * This is the fast pwrite path, where we copy the data directly from the 861 * user into the GTT, uncached. 862 */ 863 static int 864 i915_gem_gtt_pwrite_fast(struct drm_device *dev, 865 struct drm_i915_gem_object *obj, 866 struct drm_i915_gem_pwrite *args, 867 struct drm_file *file) 868 { 869 struct drm_i915_private *dev_priv = dev->dev_private; 870 ssize_t remain; 871 loff_t offset, page_base; 872 char __user *user_data; 873 int page_offset, page_length, ret; 874 875 ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE | PIN_NONBLOCK); 876 if (ret) 877 goto out; 878 879 ret = i915_gem_object_set_to_gtt_domain(obj, true); 880 if (ret) 881 goto out_unpin; 882 883 ret = i915_gem_object_put_fence(obj); 884 if (ret) 885 goto out_unpin; 886 887 user_data = to_user_ptr(args->data_ptr); 888 remain = args->size; 889 890 offset = i915_gem_obj_ggtt_offset(obj) + args->offset; 891 892 intel_fb_obj_invalidate(obj, ORIGIN_GTT); 893 894 while (remain > 0) { 895 /* Operation in this page 896 * 897 * page_base = page offset within aperture 898 * page_offset = offset within page 899 * page_length = bytes to copy for this page 900 */ 901 page_base = offset & PAGE_MASK; 902 page_offset = offset_in_page(offset); 903 page_length = remain; 904 if ((page_offset + remain) > PAGE_SIZE) 905 page_length = PAGE_SIZE - page_offset; 906 907 /* If we get a fault while copying data, then (presumably) our 908 * source page isn't available. Return the error and we'll 909 * retry in the slow path. 910 */ 911 if (fast_user_write(dev_priv->gtt.mappable, page_base, 912 page_offset, user_data, page_length)) { 913 ret = -EFAULT; 914 goto out_flush; 915 } 916 917 remain -= page_length; 918 user_data += page_length; 919 offset += page_length; 920 } 921 922 out_flush: 923 intel_fb_obj_flush(obj, false, ORIGIN_GTT); 924 out_unpin: 925 i915_gem_object_ggtt_unpin(obj); 926 out: 927 return ret; 928 } 929 930 /* Per-page copy function for the shmem pwrite fastpath. 931 * Flushes invalid cachelines before writing to the target if 932 * needs_clflush_before is set and flushes out any written cachelines after 933 * writing if needs_clflush is set. */ 934 static int 935 shmem_pwrite_fast(struct page *page, int shmem_page_offset, int page_length, 936 char __user *user_data, 937 bool page_do_bit17_swizzling, 938 bool needs_clflush_before, 939 bool needs_clflush_after) 940 { 941 #ifdef __NetBSD__ 942 return -EFAULT; 943 #else 944 char *vaddr; 945 int ret; 946 947 if (unlikely(page_do_bit17_swizzling)) 948 return -EINVAL; 949 950 vaddr = kmap_atomic(page); 951 if (needs_clflush_before) 952 drm_clflush_virt_range(vaddr + shmem_page_offset, 953 page_length); 954 ret = __copy_from_user_inatomic(vaddr + shmem_page_offset, 955 user_data, page_length); 956 if (needs_clflush_after) 957 drm_clflush_virt_range(vaddr + shmem_page_offset, 958 page_length); 959 kunmap_atomic(vaddr); 960 961 return ret ? -EFAULT : 0; 962 #endif 963 } 964 965 /* Only difference to the fast-path function is that this can handle bit17 966 * and uses non-atomic copy and kmap functions. */ 967 static int 968 shmem_pwrite_slow(struct page *page, int shmem_page_offset, int page_length, 969 char __user *user_data, 970 bool page_do_bit17_swizzling, 971 bool needs_clflush_before, 972 bool needs_clflush_after) 973 { 974 char *vaddr; 975 int ret; 976 977 vaddr = kmap(page); 978 if (unlikely(needs_clflush_before || page_do_bit17_swizzling)) 979 shmem_clflush_swizzled_range(vaddr + shmem_page_offset, 980 page_length, 981 page_do_bit17_swizzling); 982 if (page_do_bit17_swizzling) 983 ret = __copy_from_user_swizzled(vaddr, shmem_page_offset, 984 user_data, 985 page_length); 986 else 987 ret = __copy_from_user(vaddr + shmem_page_offset, 988 user_data, 989 page_length); 990 if (needs_clflush_after) 991 shmem_clflush_swizzled_range(vaddr + shmem_page_offset, 992 page_length, 993 page_do_bit17_swizzling); 994 kunmap(page); 995 996 return ret ? -EFAULT : 0; 997 } 998 999 static int 1000 i915_gem_shmem_pwrite(struct drm_device *dev, 1001 struct drm_i915_gem_object *obj, 1002 struct drm_i915_gem_pwrite *args, 1003 struct drm_file *file) 1004 { 1005 ssize_t remain; 1006 loff_t offset; 1007 char __user *user_data; 1008 int shmem_page_offset, page_length, ret = 0; 1009 int obj_do_bit17_swizzling, page_do_bit17_swizzling; 1010 int hit_slowpath = 0; 1011 int needs_clflush_after = 0; 1012 int needs_clflush_before = 0; 1013 #ifndef __NetBSD__ 1014 struct sg_page_iter sg_iter; 1015 int flush_mask = boot_cpu_data.x86_clflush_size - 1; 1016 #else 1017 int flush_mask = cpu_info_primary.ci_cflush_lsize - 1; 1018 #endif 1019 1020 user_data = to_user_ptr(args->data_ptr); 1021 remain = args->size; 1022 1023 obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj); 1024 1025 if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) { 1026 /* If we're not in the cpu write domain, set ourself into the gtt 1027 * write domain and manually flush cachelines (if required). This 1028 * optimizes for the case when the gpu will use the data 1029 * right away and we therefore have to clflush anyway. */ 1030 needs_clflush_after = cpu_write_needs_clflush(obj); 1031 ret = i915_gem_object_wait_rendering(obj, false); 1032 if (ret) 1033 return ret; 1034 } 1035 /* Same trick applies to invalidate partially written cachelines read 1036 * before writing. */ 1037 if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) 1038 needs_clflush_before = 1039 !cpu_cache_is_coherent(dev, obj->cache_level); 1040 1041 ret = i915_gem_object_get_pages(obj); 1042 if (ret) 1043 return ret; 1044 1045 intel_fb_obj_invalidate(obj, ORIGIN_CPU); 1046 1047 i915_gem_object_pin_pages(obj); 1048 1049 offset = args->offset; 1050 obj->dirty = 1; 1051 1052 #ifdef __NetBSD__ 1053 while (0 < remain) 1054 #else 1055 for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 1056 offset >> PAGE_SHIFT) 1057 #endif 1058 { 1059 #ifdef __NetBSD__ 1060 struct page *const page = i915_gem_object_get_page(obj, 1061 atop(offset)); 1062 #else 1063 struct page *page = sg_page_iter_page(&sg_iter); 1064 1065 if (remain <= 0) 1066 break; 1067 #endif 1068 1069 /* Operation in this page 1070 * 1071 * shmem_page_offset = offset within page in shmem file 1072 * page_length = bytes to copy for this page 1073 */ 1074 shmem_page_offset = offset_in_page(offset); 1075 1076 page_length = remain; 1077 if ((shmem_page_offset + page_length) > PAGE_SIZE) 1078 page_length = PAGE_SIZE - shmem_page_offset; 1079 1080 /* If we don't overwrite a cacheline completely we need to be 1081 * careful to have up-to-date data by first clflushing. Don't 1082 * overcomplicate things and flush the entire patch. */ 1083 const int partial_cacheline_write = needs_clflush_before && 1084 ((shmem_page_offset | page_length) & flush_mask); 1085 1086 page_do_bit17_swizzling = obj_do_bit17_swizzling && 1087 (page_to_phys(page) & (1 << 17)) != 0; 1088 1089 ret = shmem_pwrite_fast(page, shmem_page_offset, page_length, 1090 user_data, page_do_bit17_swizzling, 1091 partial_cacheline_write, 1092 needs_clflush_after); 1093 if (ret == 0) 1094 goto next_page; 1095 1096 hit_slowpath = 1; 1097 mutex_unlock(&dev->struct_mutex); 1098 ret = shmem_pwrite_slow(page, shmem_page_offset, page_length, 1099 user_data, page_do_bit17_swizzling, 1100 partial_cacheline_write, 1101 needs_clflush_after); 1102 1103 mutex_lock(&dev->struct_mutex); 1104 1105 if (ret) 1106 goto out; 1107 1108 next_page: 1109 remain -= page_length; 1110 user_data += page_length; 1111 offset += page_length; 1112 } 1113 1114 out: 1115 i915_gem_object_unpin_pages(obj); 1116 1117 if (hit_slowpath) { 1118 /* 1119 * Fixup: Flush cpu caches in case we didn't flush the dirty 1120 * cachelines in-line while writing and the object moved 1121 * out of the cpu write domain while we've dropped the lock. 1122 */ 1123 if (!needs_clflush_after && 1124 obj->base.write_domain != I915_GEM_DOMAIN_CPU) { 1125 if (i915_gem_clflush_object(obj, obj->pin_display)) 1126 needs_clflush_after = true; 1127 } 1128 } 1129 1130 if (needs_clflush_after) 1131 i915_gem_chipset_flush(dev); 1132 else 1133 obj->cache_dirty = true; 1134 1135 intel_fb_obj_flush(obj, false, ORIGIN_CPU); 1136 return ret; 1137 } 1138 1139 /** 1140 * Writes data to the object referenced by handle. 1141 * 1142 * On error, the contents of the buffer that were to be modified are undefined. 1143 */ 1144 int 1145 i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, 1146 struct drm_file *file) 1147 { 1148 struct drm_i915_private *dev_priv = dev->dev_private; 1149 struct drm_i915_gem_pwrite *args = data; 1150 struct drm_gem_object *gobj; 1151 struct drm_i915_gem_object *obj; 1152 int ret; 1153 1154 if (args->size == 0) 1155 return 0; 1156 1157 if (!access_ok(VERIFY_READ, 1158 to_user_ptr(args->data_ptr), 1159 args->size)) 1160 return -EFAULT; 1161 1162 #ifndef __NetBSD__ /* XXX prefault */ 1163 if (likely(!i915.prefault_disable)) { 1164 ret = fault_in_multipages_readable(to_user_ptr(args->data_ptr), 1165 args->size); 1166 if (ret) 1167 return -EFAULT; 1168 } 1169 #endif 1170 1171 intel_runtime_pm_get(dev_priv); 1172 1173 ret = i915_mutex_lock_interruptible(dev); 1174 if (ret) 1175 goto put_rpm; 1176 1177 gobj = drm_gem_object_lookup(dev, file, args->handle); 1178 if (gobj == NULL) { 1179 ret = -ENOENT; 1180 goto unlock; 1181 } 1182 obj = to_intel_bo(gobj); 1183 1184 /* Bounds check destination. */ 1185 if (args->offset > obj->base.size || 1186 args->size > obj->base.size - args->offset) { 1187 ret = -EINVAL; 1188 goto out; 1189 } 1190 1191 /* prime objects have no backing filp to GEM pread/pwrite 1192 * pages from. 1193 */ 1194 if (!obj->base.filp) { 1195 ret = -EINVAL; 1196 goto out; 1197 } 1198 1199 trace_i915_gem_object_pwrite(obj, args->offset, args->size); 1200 1201 ret = -EFAULT; 1202 /* We can only do the GTT pwrite on untiled buffers, as otherwise 1203 * it would end up going through the fenced access, and we'll get 1204 * different detiling behavior between reading and writing. 1205 * pread/pwrite currently are reading and writing from the CPU 1206 * perspective, requiring manual detiling by the client. 1207 */ 1208 if (obj->tiling_mode == I915_TILING_NONE && 1209 obj->base.write_domain != I915_GEM_DOMAIN_CPU && 1210 cpu_write_needs_clflush(obj)) { 1211 ret = i915_gem_gtt_pwrite_fast(dev, obj, args, file); 1212 /* Note that the gtt paths might fail with non-page-backed user 1213 * pointers (e.g. gtt mappings when moving data between 1214 * textures). Fallback to the shmem path in that case. */ 1215 } 1216 1217 if (ret == -EFAULT || ret == -ENOSPC) { 1218 if (obj->phys_handle) 1219 ret = i915_gem_phys_pwrite(obj, args, file); 1220 else 1221 ret = i915_gem_shmem_pwrite(dev, obj, args, file); 1222 } 1223 1224 out: 1225 drm_gem_object_unreference(&obj->base); 1226 unlock: 1227 mutex_unlock(&dev->struct_mutex); 1228 put_rpm: 1229 intel_runtime_pm_put(dev_priv); 1230 1231 return ret; 1232 } 1233 1234 int 1235 i915_gem_check_wedge(struct i915_gpu_error *error, 1236 bool interruptible) 1237 { 1238 if (i915_reset_in_progress(error)) { 1239 /* Non-interruptible callers can't handle -EAGAIN, hence return 1240 * -EIO unconditionally for these. */ 1241 if (!interruptible) 1242 return -EIO; 1243 1244 /* Recovery complete, but the reset failed ... */ 1245 if (i915_terminally_wedged(error)) 1246 return -EIO; 1247 1248 /* 1249 * Check if GPU Reset is in progress - we need intel_ring_begin 1250 * to work properly to reinit the hw state while the gpu is 1251 * still marked as reset-in-progress. Handle this with a flag. 1252 */ 1253 if (!error->reload_in_reset) 1254 return -EAGAIN; 1255 } 1256 1257 return 0; 1258 } 1259 1260 #ifndef __NetBSD__ 1261 static void fake_irq(unsigned long data) 1262 { 1263 wake_up_process((struct task_struct *)data); 1264 } 1265 #endif 1266 1267 static bool missed_irq(struct drm_i915_private *dev_priv, 1268 struct intel_engine_cs *ring) 1269 { 1270 return test_bit(ring->id, &dev_priv->gpu_error.missed_irq_rings); 1271 } 1272 1273 #ifndef __NetBSD__ 1274 static unsigned long local_clock_us(unsigned *cpu) 1275 { 1276 unsigned long t; 1277 1278 /* Cheaply and approximately convert from nanoseconds to microseconds. 1279 * The result and subsequent calculations are also defined in the same 1280 * approximate microseconds units. The principal source of timing 1281 * error here is from the simple truncation. 1282 * 1283 * Note that local_clock() is only defined wrt to the current CPU; 1284 * the comparisons are no longer valid if we switch CPUs. Instead of 1285 * blocking preemption for the entire busywait, we can detect the CPU 1286 * switch and use that as indicator of system load and a reason to 1287 * stop busywaiting, see busywait_stop(). 1288 */ 1289 *cpu = get_cpu(); 1290 t = local_clock() >> 10; 1291 put_cpu(); 1292 1293 return t; 1294 } 1295 1296 static bool busywait_stop(unsigned long timeout, unsigned cpu) 1297 { 1298 unsigned this_cpu; 1299 1300 if (time_after(local_clock_us(&this_cpu), timeout)) 1301 return true; 1302 1303 return this_cpu != cpu; 1304 } 1305 #endif 1306 1307 static int __i915_spin_request(struct drm_i915_gem_request *req, int state) 1308 { 1309 #ifndef __NetBSD__ 1310 unsigned long timeout; 1311 unsigned cpu; 1312 #endif 1313 1314 /* When waiting for high frequency requests, e.g. during synchronous 1315 * rendering split between the CPU and GPU, the finite amount of time 1316 * required to set up the irq and wait upon it limits the response 1317 * rate. By busywaiting on the request completion for a short while we 1318 * can service the high frequency waits as quick as possible. However, 1319 * if it is a slow request, we want to sleep as quickly as possible. 1320 * The tradeoff between waiting and sleeping is roughly the time it 1321 * takes to sleep on a request, on the order of a microsecond. 1322 */ 1323 1324 if (req->ring->irq_refcount) 1325 return -EBUSY; 1326 1327 /* Only spin if we know the GPU is processing this request */ 1328 if (!i915_gem_request_started(req, true)) 1329 return -EAGAIN; 1330 1331 #ifndef __NetBSD__ /* XXX No local clock in usec. */ 1332 timeout = local_clock_us(&cpu) + 5; 1333 while (!need_resched()) { 1334 if (i915_gem_request_completed(req, true)) 1335 return 0; 1336 1337 if (signal_pending_state(state, current)) 1338 break; 1339 1340 if (busywait_stop(timeout, cpu)) 1341 break; 1342 1343 cpu_relax_lowlatency(); 1344 } 1345 #endif 1346 1347 if (i915_gem_request_completed(req, false)) 1348 return 0; 1349 1350 return -EAGAIN; 1351 } 1352 1353 /** 1354 * __i915_wait_request - wait until execution of request has finished 1355 * @req: duh! 1356 * @reset_counter: reset sequence associated with the given request 1357 * @interruptible: do an interruptible wait (normally yes) 1358 * @timeout: in - how long to wait (NULL forever); out - how much time remaining 1359 * 1360 * Note: It is of utmost importance that the passed in seqno and reset_counter 1361 * values have been read by the caller in an smp safe manner. Where read-side 1362 * locks are involved, it is sufficient to read the reset_counter before 1363 * unlocking the lock that protects the seqno. For lockless tricks, the 1364 * reset_counter _must_ be read before, and an appropriate smp_rmb must be 1365 * inserted. 1366 * 1367 * Returns 0 if the request was found within the alloted time. Else returns the 1368 * errno with remaining time filled in timeout argument. 1369 */ 1370 int __i915_wait_request(struct drm_i915_gem_request *req, 1371 unsigned reset_counter, 1372 bool interruptible, 1373 s64 *timeout, 1374 struct intel_rps_client *rps) 1375 { 1376 struct intel_engine_cs *ring = i915_gem_request_get_ring(req); 1377 struct drm_device *dev = ring->dev; 1378 struct drm_i915_private *dev_priv = dev->dev_private; 1379 const bool irq_test_in_progress = 1380 ACCESS_ONCE(dev_priv->gpu_error.test_irq_rings) & intel_ring_flag(ring); 1381 #ifdef __NetBSD__ 1382 int state = 0; 1383 bool wedged; 1384 #else 1385 int state = interruptible ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE; 1386 DEFINE_WAIT(wait); 1387 unsigned long timeout_expire; 1388 #endif 1389 s64 before, now; 1390 int ret; 1391 1392 WARN(!intel_irqs_enabled(dev_priv), "IRQs disabled"); 1393 1394 if (list_empty(&req->list)) 1395 return 0; 1396 1397 if (i915_gem_request_completed(req, true)) 1398 return 0; 1399 1400 #ifndef __NetBSD__ 1401 timeout_expire = 0; 1402 if (timeout) { 1403 if (WARN_ON(*timeout < 0)) 1404 return -EINVAL; 1405 1406 if (*timeout == 0) 1407 return -ETIME; 1408 1409 timeout_expire = jiffies + nsecs_to_jiffies_timeout(*timeout); 1410 } 1411 #endif 1412 1413 if (INTEL_INFO(dev_priv)->gen >= 6) 1414 gen6_rps_boost(dev_priv, rps, req->emitted_jiffies); 1415 1416 /* Record current time in case interrupted by signal, or wedged */ 1417 trace_i915_gem_request_wait_begin(req); 1418 before = ktime_get_raw_ns(); 1419 1420 /* Optimistic spin for the next jiffie before touching IRQs */ 1421 ret = __i915_spin_request(req, state); 1422 if (ret == 0) 1423 goto out; 1424 1425 if (!irq_test_in_progress && WARN_ON(!ring->irq_get(ring))) { 1426 ret = -ENODEV; 1427 goto out; 1428 } 1429 1430 #ifdef __NetBSD__ 1431 # define EXIT_COND \ 1432 ((wedged = (reset_counter != \ 1433 atomic_read(&dev_priv->gpu_error.reset_counter))) || \ 1434 i915_gem_request_completed(req, false)) 1435 spin_lock(&dev_priv->irq_lock); 1436 if (timeout) { 1437 int ticks = missed_irq(dev_priv, ring) ? 1 : 1438 nsecs_to_jiffies_timeout(*timeout); 1439 if (interruptible) { 1440 DRM_SPIN_TIMED_WAIT_UNTIL(ret, &ring->irq_queue, 1441 &dev_priv->irq_lock, ticks, EXIT_COND); 1442 } else { 1443 DRM_SPIN_TIMED_WAIT_NOINTR_UNTIL(ret, &ring->irq_queue, 1444 &dev_priv->irq_lock, ticks, EXIT_COND); 1445 } 1446 if (ret < 0) /* Failure: return negative error as is. */ 1447 ; 1448 else if (ret == 0) /* Timed out: return -ETIME. */ 1449 ret = -ETIME; 1450 else /* Succeeded (ret > 0): return 0. */ 1451 ret = 0; 1452 } else { 1453 if (interruptible) { 1454 DRM_SPIN_WAIT_UNTIL(ret, &ring->irq_queue, 1455 &dev_priv->irq_lock, EXIT_COND); 1456 } else { 1457 DRM_SPIN_WAIT_NOINTR_UNTIL(ret, &ring->irq_queue, 1458 &dev_priv->irq_lock, EXIT_COND); 1459 } 1460 /* ret is negative on failure or zero on success. */ 1461 } 1462 spin_unlock(&dev_priv->irq_lock); 1463 if (wedged) { 1464 ret = i915_gem_check_wedge(&dev_priv->gpu_error, interruptible); 1465 if (ret == 0) 1466 ret = -EAGAIN; 1467 } 1468 #else 1469 for (;;) { 1470 struct timer_list timer; 1471 1472 prepare_to_wait(&ring->irq_queue, &wait, state); 1473 1474 /* We need to check whether any gpu reset happened in between 1475 * the caller grabbing the seqno and now ... */ 1476 if (reset_counter != atomic_read(&dev_priv->gpu_error.reset_counter)) { 1477 /* ... but upgrade the -EAGAIN to an -EIO if the gpu 1478 * is truely gone. */ 1479 ret = i915_gem_check_wedge(&dev_priv->gpu_error, interruptible); 1480 if (ret == 0) 1481 ret = -EAGAIN; 1482 break; 1483 } 1484 1485 if (i915_gem_request_completed(req, false)) { 1486 ret = 0; 1487 break; 1488 } 1489 1490 if (signal_pending_state(state, current)) { 1491 ret = -ERESTARTSYS; 1492 break; 1493 } 1494 1495 if (timeout && time_after_eq(jiffies, timeout_expire)) { 1496 ret = -ETIME; 1497 break; 1498 } 1499 1500 timer.function = NULL; 1501 if (timeout || missed_irq(dev_priv, ring)) { 1502 unsigned long expire; 1503 1504 setup_timer_on_stack(&timer, fake_irq, (unsigned long)current); 1505 expire = missed_irq(dev_priv, ring) ? jiffies + 1 : timeout_expire; 1506 mod_timer(&timer, expire); 1507 } 1508 1509 io_schedule(); 1510 1511 if (timer.function) { 1512 del_singleshot_timer_sync(&timer); 1513 destroy_timer_on_stack(&timer); 1514 } 1515 } 1516 #endif 1517 if (!irq_test_in_progress) 1518 ring->irq_put(ring); 1519 1520 #ifndef __NetBSD__ 1521 finish_wait(&ring->irq_queue, &wait); 1522 #endif 1523 1524 out: 1525 now = ktime_get_raw_ns(); 1526 trace_i915_gem_request_wait_end(req); 1527 1528 if (timeout) { 1529 s64 tres = *timeout - (now - before); 1530 1531 *timeout = tres < 0 ? 0 : tres; 1532 1533 /* 1534 * Apparently ktime isn't accurate enough and occasionally has a 1535 * bit of mismatch in the jiffies<->nsecs<->ktime loop. So patch 1536 * things up to make the test happy. We allow up to 1 jiffy. 1537 * 1538 * This is a regrssion from the timespec->ktime conversion. 1539 */ 1540 if (ret == -ETIME && *timeout < jiffies_to_usecs(1)*1000) 1541 *timeout = 0; 1542 } 1543 1544 return ret; 1545 } 1546 1547 int i915_gem_request_add_to_client(struct drm_i915_gem_request *req, 1548 struct drm_file *file) 1549 { 1550 struct drm_i915_private *dev_private __unused; 1551 struct drm_i915_file_private *file_priv; 1552 1553 WARN_ON(!req || !file || req->file_priv); 1554 1555 if (!req || !file) 1556 return -EINVAL; 1557 1558 if (req->file_priv) 1559 return -EINVAL; 1560 1561 dev_private = req->ring->dev->dev_private; 1562 file_priv = file->driver_priv; 1563 1564 spin_lock(&file_priv->mm.lock); 1565 req->file_priv = file_priv; 1566 list_add_tail(&req->client_list, &file_priv->mm.request_list); 1567 spin_unlock(&file_priv->mm.lock); 1568 1569 #ifndef __NetBSD__ 1570 req->pid = get_pid(task_pid(current)); 1571 #endif 1572 1573 return 0; 1574 } 1575 1576 static inline void 1577 i915_gem_request_remove_from_client(struct drm_i915_gem_request *request) 1578 { 1579 struct drm_i915_file_private *file_priv = request->file_priv; 1580 1581 if (!file_priv) 1582 return; 1583 1584 spin_lock(&file_priv->mm.lock); 1585 list_del(&request->client_list); 1586 request->file_priv = NULL; 1587 spin_unlock(&file_priv->mm.lock); 1588 1589 #ifndef __NetBSD__ 1590 put_pid(request->pid); 1591 request->pid = NULL; 1592 #endif 1593 } 1594 1595 static void i915_gem_request_retire(struct drm_i915_gem_request *request) 1596 { 1597 trace_i915_gem_request_retire(request); 1598 1599 /* We know the GPU must have read the request to have 1600 * sent us the seqno + interrupt, so use the position 1601 * of tail of the request to update the last known position 1602 * of the GPU head. 1603 * 1604 * Note this requires that we are always called in request 1605 * completion order. 1606 */ 1607 request->ringbuf->last_retired_head = request->postfix; 1608 1609 list_del_init(&request->list); 1610 i915_gem_request_remove_from_client(request); 1611 1612 i915_gem_request_unreference(request); 1613 } 1614 1615 static void 1616 __i915_gem_request_retire__upto(struct drm_i915_gem_request *req) 1617 { 1618 struct intel_engine_cs *engine = req->ring; 1619 struct drm_i915_gem_request *tmp; 1620 1621 lockdep_assert_held(&engine->dev->struct_mutex); 1622 1623 if (list_empty(&req->list)) 1624 return; 1625 1626 do { 1627 tmp = list_first_entry(&engine->request_list, 1628 typeof(*tmp), list); 1629 1630 i915_gem_request_retire(tmp); 1631 } while (tmp != req); 1632 1633 WARN_ON(i915_verify_lists(engine->dev)); 1634 } 1635 1636 /** 1637 * Waits for a request to be signaled, and cleans up the 1638 * request and object lists appropriately for that event. 1639 */ 1640 int 1641 i915_wait_request(struct drm_i915_gem_request *req) 1642 { 1643 struct drm_device *dev; 1644 struct drm_i915_private *dev_priv; 1645 bool interruptible; 1646 int ret; 1647 1648 BUG_ON(req == NULL); 1649 1650 dev = req->ring->dev; 1651 dev_priv = dev->dev_private; 1652 interruptible = dev_priv->mm.interruptible; 1653 1654 BUG_ON(!mutex_is_locked(&dev->struct_mutex)); 1655 1656 ret = i915_gem_check_wedge(&dev_priv->gpu_error, interruptible); 1657 if (ret) 1658 return ret; 1659 1660 ret = __i915_wait_request(req, 1661 atomic_read(&dev_priv->gpu_error.reset_counter), 1662 interruptible, NULL, NULL); 1663 if (ret) 1664 return ret; 1665 1666 __i915_gem_request_retire__upto(req); 1667 return 0; 1668 } 1669 1670 /** 1671 * Ensures that all rendering to the object has completed and the object is 1672 * safe to unbind from the GTT or access from the CPU. 1673 */ 1674 int 1675 i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj, 1676 bool readonly) 1677 { 1678 int ret, i; 1679 1680 if (!obj->active) 1681 return 0; 1682 1683 if (readonly) { 1684 if (obj->last_write_req != NULL) { 1685 ret = i915_wait_request(obj->last_write_req); 1686 if (ret) 1687 return ret; 1688 1689 i = obj->last_write_req->ring->id; 1690 if (obj->last_read_req[i] == obj->last_write_req) 1691 i915_gem_object_retire__read(obj, i); 1692 else 1693 i915_gem_object_retire__write(obj); 1694 } 1695 } else { 1696 for (i = 0; i < I915_NUM_RINGS; i++) { 1697 if (obj->last_read_req[i] == NULL) 1698 continue; 1699 1700 ret = i915_wait_request(obj->last_read_req[i]); 1701 if (ret) 1702 return ret; 1703 1704 i915_gem_object_retire__read(obj, i); 1705 } 1706 RQ_BUG_ON(obj->active); 1707 } 1708 1709 return 0; 1710 } 1711 1712 static void 1713 i915_gem_object_retire_request(struct drm_i915_gem_object *obj, 1714 struct drm_i915_gem_request *req) 1715 { 1716 int ring = req->ring->id; 1717 1718 if (obj->last_read_req[ring] == req) 1719 i915_gem_object_retire__read(obj, ring); 1720 else if (obj->last_write_req == req) 1721 i915_gem_object_retire__write(obj); 1722 1723 __i915_gem_request_retire__upto(req); 1724 } 1725 1726 /* A nonblocking variant of the above wait. This is a highly dangerous routine 1727 * as the object state may change during this call. 1728 */ 1729 static __must_check int 1730 i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj, 1731 struct intel_rps_client *rps, 1732 bool readonly) 1733 { 1734 struct drm_device *dev = obj->base.dev; 1735 struct drm_i915_private *dev_priv = dev->dev_private; 1736 struct drm_i915_gem_request *requests[I915_NUM_RINGS]; 1737 unsigned reset_counter; 1738 int ret, i, n = 0; 1739 1740 BUG_ON(!mutex_is_locked(&dev->struct_mutex)); 1741 BUG_ON(!dev_priv->mm.interruptible); 1742 1743 if (!obj->active) 1744 return 0; 1745 1746 ret = i915_gem_check_wedge(&dev_priv->gpu_error, true); 1747 if (ret) 1748 return ret; 1749 1750 reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter); 1751 1752 if (readonly) { 1753 struct drm_i915_gem_request *req; 1754 1755 req = obj->last_write_req; 1756 if (req == NULL) 1757 return 0; 1758 1759 requests[n++] = i915_gem_request_reference(req); 1760 } else { 1761 for (i = 0; i < I915_NUM_RINGS; i++) { 1762 struct drm_i915_gem_request *req; 1763 1764 req = obj->last_read_req[i]; 1765 if (req == NULL) 1766 continue; 1767 1768 requests[n++] = i915_gem_request_reference(req); 1769 } 1770 } 1771 1772 mutex_unlock(&dev->struct_mutex); 1773 for (i = 0; ret == 0 && i < n; i++) 1774 ret = __i915_wait_request(requests[i], reset_counter, true, 1775 NULL, rps); 1776 mutex_lock(&dev->struct_mutex); 1777 1778 for (i = 0; i < n; i++) { 1779 if (ret == 0) 1780 i915_gem_object_retire_request(obj, requests[i]); 1781 i915_gem_request_unreference(requests[i]); 1782 } 1783 1784 return ret; 1785 } 1786 1787 static struct intel_rps_client *to_rps_client(struct drm_file *file) 1788 { 1789 struct drm_i915_file_private *fpriv = file->driver_priv; 1790 return &fpriv->rps; 1791 } 1792 1793 /** 1794 * Called when user space prepares to use an object with the CPU, either 1795 * through the mmap ioctl's mapping or a GTT mapping. 1796 */ 1797 int 1798 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, 1799 struct drm_file *file) 1800 { 1801 struct drm_i915_gem_set_domain *args = data; 1802 struct drm_gem_object *gobj; 1803 struct drm_i915_gem_object *obj; 1804 uint32_t read_domains = args->read_domains; 1805 uint32_t write_domain = args->write_domain; 1806 int ret; 1807 1808 /* Only handle setting domains to types used by the CPU. */ 1809 if (write_domain & I915_GEM_GPU_DOMAINS) 1810 return -EINVAL; 1811 1812 if (read_domains & I915_GEM_GPU_DOMAINS) 1813 return -EINVAL; 1814 1815 /* Having something in the write domain implies it's in the read 1816 * domain, and only that read domain. Enforce that in the request. 1817 */ 1818 if (write_domain != 0 && read_domains != write_domain) 1819 return -EINVAL; 1820 1821 ret = i915_mutex_lock_interruptible(dev); 1822 if (ret) 1823 return ret; 1824 1825 gobj = drm_gem_object_lookup(dev, file, args->handle); 1826 if (gobj == NULL) { 1827 ret = -ENOENT; 1828 goto unlock; 1829 } 1830 obj = to_intel_bo(gobj); 1831 1832 /* Try to flush the object off the GPU without holding the lock. 1833 * We will repeat the flush holding the lock in the normal manner 1834 * to catch cases where we are gazumped. 1835 */ 1836 ret = i915_gem_object_wait_rendering__nonblocking(obj, 1837 to_rps_client(file), 1838 !write_domain); 1839 if (ret) 1840 goto unref; 1841 1842 if (read_domains & I915_GEM_DOMAIN_GTT) 1843 ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0); 1844 else 1845 ret = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0); 1846 1847 if (write_domain != 0) 1848 intel_fb_obj_invalidate(obj, 1849 write_domain == I915_GEM_DOMAIN_GTT ? 1850 ORIGIN_GTT : ORIGIN_CPU); 1851 1852 unref: 1853 drm_gem_object_unreference(&obj->base); 1854 unlock: 1855 mutex_unlock(&dev->struct_mutex); 1856 return ret; 1857 } 1858 1859 /** 1860 * Called when user space has done writes to this buffer 1861 */ 1862 int 1863 i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data, 1864 struct drm_file *file) 1865 { 1866 struct drm_i915_gem_sw_finish *args = data; 1867 struct drm_gem_object *gobj; 1868 struct drm_i915_gem_object *obj; 1869 int ret = 0; 1870 1871 ret = i915_mutex_lock_interruptible(dev); 1872 if (ret) 1873 return ret; 1874 1875 gobj = drm_gem_object_lookup(dev, file, args->handle); 1876 if (gobj == NULL) { 1877 ret = -ENOENT; 1878 goto unlock; 1879 } 1880 obj = to_intel_bo(gobj); 1881 1882 /* Pinned buffers may be scanout, so flush the cache */ 1883 if (obj->pin_display) 1884 i915_gem_object_flush_cpu_write_domain(obj); 1885 1886 drm_gem_object_unreference(&obj->base); 1887 unlock: 1888 mutex_unlock(&dev->struct_mutex); 1889 return ret; 1890 } 1891 1892 /** 1893 * Maps the contents of an object, returning the address it is mapped 1894 * into. 1895 * 1896 * While the mapping holds a reference on the contents of the object, it doesn't 1897 * imply a ref on the object itself. 1898 * 1899 * IMPORTANT: 1900 * 1901 * DRM driver writers who look a this function as an example for how to do GEM 1902 * mmap support, please don't implement mmap support like here. The modern way 1903 * to implement DRM mmap support is with an mmap offset ioctl (like 1904 * i915_gem_mmap_gtt) and then using the mmap syscall on the DRM fd directly. 1905 * That way debug tooling like valgrind will understand what's going on, hiding 1906 * the mmap call in a driver private ioctl will break that. The i915 driver only 1907 * does cpu mmaps this way because we didn't know better. 1908 */ 1909 int 1910 i915_gem_mmap_ioctl(struct drm_device *dev, void *data, 1911 struct drm_file *file) 1912 { 1913 struct drm_i915_gem_mmap *args = data; 1914 struct drm_gem_object *obj; 1915 unsigned long addr; 1916 #ifdef __NetBSD__ 1917 struct drm_i915_private *dev_priv = dev->dev_private; 1918 int ret; 1919 1920 if ((dev_priv->quirks & QUIRK_NETBSD_VERSION_CALLED) == 0) 1921 args->flags = 0; 1922 #endif 1923 1924 if (args->flags & ~(I915_MMAP_WC)) 1925 return -EINVAL; 1926 1927 if (args->flags & I915_MMAP_WC && !cpu_has_pat) 1928 return -ENODEV; 1929 1930 obj = drm_gem_object_lookup(dev, file, args->handle); 1931 if (obj == NULL) 1932 return -ENOENT; 1933 1934 /* prime objects have no backing filp to GEM mmap 1935 * pages from. 1936 */ 1937 if (!obj->filp) { 1938 drm_gem_object_unreference_unlocked(obj); 1939 return -EINVAL; 1940 } 1941 1942 #ifdef __NetBSD__ 1943 /* Acquire a reference for uvm_map to consume. */ 1944 uao_reference(obj->filp); 1945 addr = (*curproc->p_emul->e_vm_default_addr)(curproc, 1946 (vaddr_t)curproc->p_vmspace->vm_daddr, args->size, 1947 curproc->p_vmspace->vm_map.flags & VM_MAP_TOPDOWN); 1948 /* XXX errno NetBSD->Linux */ 1949 ret = -uvm_map(&curproc->p_vmspace->vm_map, &addr, args->size, 1950 obj->filp, args->offset, 0, 1951 UVM_MAPFLAG((VM_PROT_READ | VM_PROT_WRITE), 1952 (VM_PROT_READ | VM_PROT_WRITE), UVM_INH_COPY, UVM_ADV_NORMAL, 1953 0)); 1954 if (ret) { 1955 uao_detach(obj->filp); 1956 drm_gem_object_unreference_unlocked(obj); 1957 return ret; 1958 } 1959 drm_gem_object_unreference_unlocked(obj); 1960 #else 1961 addr = vm_mmap(obj->filp, 0, args->size, 1962 PROT_READ | PROT_WRITE, MAP_SHARED, 1963 args->offset); 1964 if (args->flags & I915_MMAP_WC) { 1965 struct mm_struct *mm = current->mm; 1966 struct vm_area_struct *vma; 1967 1968 down_write(&mm->mmap_sem); 1969 vma = find_vma(mm, addr); 1970 if (vma) 1971 vma->vm_page_prot = 1972 pgprot_writecombine(vm_get_page_prot(vma->vm_flags)); 1973 else 1974 addr = -ENOMEM; 1975 up_write(&mm->mmap_sem); 1976 } 1977 drm_gem_object_unreference_unlocked(obj); 1978 if (IS_ERR((void *)addr)) 1979 return addr; 1980 #endif 1981 1982 args->addr_ptr = (uint64_t) addr; 1983 1984 return 0; 1985 } 1986 1987 #ifdef __NetBSD__ /* XXX gem gtt fault */ 1988 static int i915_udv_fault(struct uvm_faultinfo *, vaddr_t, 1989 struct vm_page **, int, int, vm_prot_t, int, paddr_t); 1990 1991 int 1992 i915_gem_fault(struct uvm_faultinfo *ufi, vaddr_t vaddr, struct vm_page **pps, 1993 int npages, int centeridx, vm_prot_t access_type, int flags) 1994 { 1995 struct uvm_object *uobj = ufi->entry->object.uvm_obj; 1996 struct drm_gem_object *gem_obj = 1997 container_of(uobj, struct drm_gem_object, gemo_uvmobj); 1998 struct drm_i915_gem_object *obj = to_intel_bo(gem_obj); 1999 struct drm_device *dev = obj->base.dev; 2000 struct drm_i915_private *dev_priv = dev->dev_private; 2001 voff_t byte_offset; 2002 pgoff_t page_offset; 2003 int ret = 0; 2004 bool write = ISSET(access_type, VM_PROT_WRITE)? 1 : 0; 2005 2006 byte_offset = (ufi->entry->offset + (vaddr - ufi->entry->start)); 2007 KASSERT(byte_offset <= obj->base.size); 2008 page_offset = (byte_offset >> PAGE_SHIFT); 2009 2010 intel_runtime_pm_get(dev_priv); 2011 2012 /* Thanks, uvm, but we don't need this lock. */ 2013 rw_exit(uobj->vmobjlock); 2014 2015 ret = i915_mutex_lock_interruptible(dev); 2016 if (ret) 2017 goto out; 2018 2019 trace_i915_gem_object_fault(obj, page_offset, true, write); 2020 2021 ret = i915_gem_object_wait_rendering__nonblocking(obj, NULL, !write); 2022 if (ret) 2023 goto unlock; 2024 2025 if ((obj->cache_level != I915_CACHE_NONE) && !HAS_LLC(dev)) { 2026 ret = -EINVAL; 2027 goto unlock; 2028 } 2029 2030 ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE); 2031 if (ret) 2032 goto unlock; 2033 2034 ret = i915_gem_object_set_to_gtt_domain(obj, write); 2035 if (ret) 2036 goto unpin; 2037 2038 ret = i915_gem_object_get_fence(obj); 2039 if (ret) 2040 goto unpin; 2041 2042 obj->fault_mappable = true; 2043 2044 /* XXX errno NetBSD->Linux */ 2045 ret = -i915_udv_fault(ufi, vaddr, pps, npages, centeridx, access_type, 2046 flags, 2047 (dev_priv->gtt.mappable_base + i915_gem_obj_ggtt_offset(obj))); 2048 unpin: 2049 i915_gem_object_ggtt_unpin(obj); 2050 unlock: 2051 mutex_unlock(&dev->struct_mutex); 2052 out: 2053 rw_enter(uobj->vmobjlock, RW_WRITER); 2054 uvmfault_unlockall(ufi, ufi->entry->aref.ar_amap, uobj); 2055 2056 /* 2057 * Remap EINTR to success, so that we return to userland. 2058 * On the way out, we'll deliver the signal, and if the signal 2059 * is not fatal then the user code which faulted will most likely 2060 * fault again, and we'll come back here for another try. 2061 */ 2062 if (ret == -EINTR) 2063 ret = 0; 2064 /* XXX Deal with GPU hangs here... */ 2065 intel_runtime_pm_put(dev_priv); 2066 /* XXX errno Linux->NetBSD */ 2067 return -ret; 2068 } 2069 2070 /* 2071 * XXX i915_udv_fault is copypasta of udv_fault from uvm_device.c. 2072 * 2073 * XXX pmap_enter_default instead of pmap_enter because of a problem 2074 * with using weak aliases in kernel modules or something. 2075 */ 2076 int pmap_enter_default(pmap_t, vaddr_t, paddr_t, vm_prot_t, unsigned); 2077 2078 static int 2079 i915_udv_fault(struct uvm_faultinfo *ufi, vaddr_t vaddr, struct vm_page **pps, 2080 int npages, int centeridx, vm_prot_t access_type, int flags, 2081 paddr_t gtt_paddr) 2082 { 2083 struct vm_map_entry *entry = ufi->entry; 2084 vaddr_t curr_va; 2085 off_t curr_offset; 2086 paddr_t paddr; 2087 u_int mmapflags; 2088 int lcv; 2089 vm_prot_t mapprot; 2090 UVMHIST_FUNC("i915_udv_fault"); UVMHIST_CALLED(maphist); 2091 UVMHIST_LOG(maphist," flags=%jd", flags,0,0,0); 2092 2093 /* 2094 * we do not allow device mappings to be mapped copy-on-write 2095 * so we kill any attempt to do so here. 2096 */ 2097 2098 if (UVM_ET_ISCOPYONWRITE(entry)) { 2099 UVMHIST_LOG(maphist, "<- failed -- COW entry (etype=0x%jx)", 2100 entry->etype, 0,0,0); 2101 return(EIO); 2102 } 2103 2104 /* 2105 * now we must determine the offset in udv to use and the VA to 2106 * use for pmap_enter. note that we always use orig_map's pmap 2107 * for pmap_enter (even if we have a submap). since virtual 2108 * addresses in a submap must match the main map, this is ok. 2109 */ 2110 2111 /* udv offset = (offset from start of entry) + entry's offset */ 2112 curr_offset = entry->offset + (vaddr - entry->start); 2113 /* pmap va = vaddr (virtual address of pps[0]) */ 2114 curr_va = vaddr; 2115 2116 /* 2117 * loop over the page range entering in as needed 2118 */ 2119 2120 for (lcv = 0 ; lcv < npages ; lcv++, curr_offset += PAGE_SIZE, 2121 curr_va += PAGE_SIZE) { 2122 if ((flags & PGO_ALLPAGES) == 0 && lcv != centeridx) 2123 continue; 2124 2125 if (pps[lcv] == PGO_DONTCARE) 2126 continue; 2127 2128 paddr = (gtt_paddr + curr_offset); 2129 mmapflags = 0; 2130 mapprot = ufi->entry->protection; 2131 UVMHIST_LOG(maphist, 2132 " MAPPING: device: pm=0x%#jx, va=0x%jx, pa=0x%jx, at=%jd", 2133 (uintptr_t)ufi->orig_map->pmap, curr_va, paddr, mapprot); 2134 if (pmap_enter_default(ufi->orig_map->pmap, curr_va, paddr, mapprot, 2135 PMAP_CANFAIL | mapprot | mmapflags) != 0) { 2136 /* 2137 * pmap_enter() didn't have the resource to 2138 * enter this mapping. Unlock everything, 2139 * wait for the pagedaemon to free up some 2140 * pages, and then tell uvm_fault() to start 2141 * the fault again. 2142 * 2143 * XXX Needs some rethinking for the PGO_ALLPAGES 2144 * XXX case. 2145 */ 2146 pmap_update(ufi->orig_map->pmap); /* sync what we have so far */ 2147 return ENOMEM; 2148 } 2149 } 2150 2151 pmap_update(ufi->orig_map->pmap); 2152 return 0; 2153 } 2154 #else 2155 /** 2156 * i915_gem_fault - fault a page into the GTT 2157 * @vma: VMA in question 2158 * @vmf: fault info 2159 * 2160 * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped 2161 * from userspace. The fault handler takes care of binding the object to 2162 * the GTT (if needed), allocating and programming a fence register (again, 2163 * only if needed based on whether the old reg is still valid or the object 2164 * is tiled) and inserting a new PTE into the faulting process. 2165 * 2166 * Note that the faulting process may involve evicting existing objects 2167 * from the GTT and/or fence registers to make room. So performance may 2168 * suffer if the GTT working set is large or there are few fence registers 2169 * left. 2170 */ 2171 int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) 2172 { 2173 struct drm_i915_gem_object *obj = to_intel_bo(vma->vm_private_data); 2174 struct drm_device *dev = obj->base.dev; 2175 struct drm_i915_private *dev_priv = dev->dev_private; 2176 struct i915_ggtt_view view = i915_ggtt_view_normal; 2177 pgoff_t page_offset; 2178 unsigned long pfn; 2179 int ret = 0; 2180 bool write = !!(vmf->flags & FAULT_FLAG_WRITE); 2181 2182 intel_runtime_pm_get(dev_priv); 2183 2184 /* We don't use vmf->pgoff since that has the fake offset */ 2185 page_offset = ((unsigned long)vmf->virtual_address - vma->vm_start) >> 2186 PAGE_SHIFT; 2187 2188 ret = i915_mutex_lock_interruptible(dev); 2189 if (ret) 2190 goto out; 2191 2192 trace_i915_gem_object_fault(obj, page_offset, true, write); 2193 2194 /* Try to flush the object off the GPU first without holding the lock. 2195 * Upon reacquiring the lock, we will perform our sanity checks and then 2196 * repeat the flush holding the lock in the normal manner to catch cases 2197 * where we are gazumped. 2198 */ 2199 ret = i915_gem_object_wait_rendering__nonblocking(obj, NULL, !write); 2200 if (ret) 2201 goto unlock; 2202 2203 /* Access to snoopable pages through the GTT is incoherent. */ 2204 if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(dev)) { 2205 ret = -EFAULT; 2206 goto unlock; 2207 } 2208 2209 /* Use a partial view if the object is bigger than the aperture. */ 2210 if (obj->base.size >= dev_priv->gtt.mappable_end && 2211 obj->tiling_mode == I915_TILING_NONE) { 2212 static const unsigned int chunk_size = 256; // 1 MiB 2213 2214 memset(&view, 0, sizeof(view)); 2215 view.type = I915_GGTT_VIEW_PARTIAL; 2216 view.params.partial.offset = rounddown(page_offset, chunk_size); 2217 view.params.partial.size = 2218 min_t(unsigned int, 2219 chunk_size, 2220 (vma->vm_end - vma->vm_start)/PAGE_SIZE - 2221 view.params.partial.offset); 2222 } 2223 2224 /* Now pin it into the GTT if needed */ 2225 ret = i915_gem_object_ggtt_pin(obj, &view, 0, PIN_MAPPABLE); 2226 if (ret) 2227 goto unlock; 2228 2229 ret = i915_gem_object_set_to_gtt_domain(obj, write); 2230 if (ret) 2231 goto unpin; 2232 2233 ret = i915_gem_object_get_fence(obj); 2234 if (ret) 2235 goto unpin; 2236 2237 /* Finally, remap it using the new GTT offset */ 2238 pfn = dev_priv->gtt.mappable_base + 2239 i915_gem_obj_ggtt_offset_view(obj, &view); 2240 pfn >>= PAGE_SHIFT; 2241 2242 if (unlikely(view.type == I915_GGTT_VIEW_PARTIAL)) { 2243 /* Overriding existing pages in partial view does not cause 2244 * us any trouble as TLBs are still valid because the fault 2245 * is due to userspace losing part of the mapping or never 2246 * having accessed it before (at this partials' range). 2247 */ 2248 unsigned long base = vma->vm_start + 2249 (view.params.partial.offset << PAGE_SHIFT); 2250 unsigned int i; 2251 2252 for (i = 0; i < view.params.partial.size; i++) { 2253 ret = vm_insert_pfn(vma, base + i * PAGE_SIZE, pfn + i); 2254 if (ret) 2255 break; 2256 } 2257 2258 obj->fault_mappable = true; 2259 } else { 2260 if (!obj->fault_mappable) { 2261 unsigned long size = min_t(unsigned long, 2262 vma->vm_end - vma->vm_start, 2263 obj->base.size); 2264 int i; 2265 2266 for (i = 0; i < size >> PAGE_SHIFT; i++) { 2267 ret = vm_insert_pfn(vma, 2268 (unsigned long)vma->vm_start + i * PAGE_SIZE, 2269 pfn + i); 2270 if (ret) 2271 break; 2272 } 2273 2274 obj->fault_mappable = true; 2275 } else 2276 ret = vm_insert_pfn(vma, 2277 (unsigned long)vmf->virtual_address, 2278 pfn + page_offset); 2279 } 2280 unpin: 2281 i915_gem_object_ggtt_unpin_view(obj, &view); 2282 unlock: 2283 mutex_unlock(&dev->struct_mutex); 2284 out: 2285 switch (ret) { 2286 case -EIO: 2287 /* 2288 * We eat errors when the gpu is terminally wedged to avoid 2289 * userspace unduly crashing (gl has no provisions for mmaps to 2290 * fail). But any other -EIO isn't ours (e.g. swap in failure) 2291 * and so needs to be reported. 2292 */ 2293 if (!i915_terminally_wedged(&dev_priv->gpu_error)) { 2294 ret = VM_FAULT_SIGBUS; 2295 break; 2296 } 2297 case -EAGAIN: 2298 /* 2299 * EAGAIN means the gpu is hung and we'll wait for the error 2300 * handler to reset everything when re-faulting in 2301 * i915_mutex_lock_interruptible. 2302 */ 2303 case 0: 2304 case -ERESTARTSYS: 2305 case -EINTR: 2306 case -EBUSY: 2307 /* 2308 * EBUSY is ok: this just means that another thread 2309 * already did the job. 2310 */ 2311 ret = VM_FAULT_NOPAGE; 2312 break; 2313 case -ENOMEM: 2314 ret = VM_FAULT_OOM; 2315 break; 2316 case -ENOSPC: 2317 case -EFAULT: 2318 ret = VM_FAULT_SIGBUS; 2319 break; 2320 default: 2321 WARN_ONCE(ret, "unhandled error in i915_gem_fault: %i\n", ret); 2322 ret = VM_FAULT_SIGBUS; 2323 break; 2324 } 2325 2326 intel_runtime_pm_put(dev_priv); 2327 return ret; 2328 } 2329 #endif 2330 2331 /** 2332 * i915_gem_release_mmap - remove physical page mappings 2333 * @obj: obj in question 2334 * 2335 * Preserve the reservation of the mmapping with the DRM core code, but 2336 * relinquish ownership of the pages back to the system. 2337 * 2338 * It is vital that we remove the page mapping if we have mapped a tiled 2339 * object through the GTT and then lose the fence register due to 2340 * resource pressure. Similarly if the object has been moved out of the 2341 * aperture, than pages mapped into userspace must be revoked. Removing the 2342 * mapping will then trigger a page fault on the next user access, allowing 2343 * fixup by i915_gem_fault(). 2344 */ 2345 void 2346 i915_gem_release_mmap(struct drm_i915_gem_object *obj) 2347 { 2348 if (!obj->fault_mappable) 2349 return; 2350 2351 #ifdef __NetBSD__ /* XXX gem gtt fault */ 2352 { 2353 struct drm_device *const dev = obj->base.dev; 2354 struct drm_i915_private *const dev_priv = dev->dev_private; 2355 const paddr_t start = dev_priv->gtt.mappable_base + 2356 i915_gem_obj_ggtt_offset(obj); 2357 const size_t size = obj->base.size; 2358 const paddr_t end = start + size; 2359 paddr_t pa; 2360 2361 KASSERT((start & (PAGE_SIZE - 1)) == 0); 2362 KASSERT((size & (PAGE_SIZE - 1)) == 0); 2363 2364 for (pa = start; pa < end; pa += PAGE_SIZE) 2365 pmap_pv_protect(pa, VM_PROT_NONE); 2366 } 2367 #else 2368 drm_vma_node_unmap(&obj->base.vma_node, 2369 obj->base.dev->anon_inode->i_mapping); 2370 #endif 2371 obj->fault_mappable = false; 2372 } 2373 2374 void 2375 i915_gem_release_all_mmaps(struct drm_i915_private *dev_priv) 2376 { 2377 struct drm_i915_gem_object *obj; 2378 2379 list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) 2380 i915_gem_release_mmap(obj); 2381 } 2382 2383 uint32_t 2384 i915_gem_get_gtt_size(struct drm_device *dev, uint32_t size, int tiling_mode) 2385 { 2386 uint32_t gtt_size; 2387 2388 if (INTEL_INFO(dev)->gen >= 4 || 2389 tiling_mode == I915_TILING_NONE) 2390 return size; 2391 2392 /* Previous chips need a power-of-two fence region when tiling */ 2393 if (INTEL_INFO(dev)->gen == 3) 2394 gtt_size = 1024*1024; 2395 else 2396 gtt_size = 512*1024; 2397 2398 while (gtt_size < size) 2399 gtt_size <<= 1; 2400 2401 return gtt_size; 2402 } 2403 2404 /** 2405 * i915_gem_get_gtt_alignment - return required GTT alignment for an object 2406 * @obj: object to check 2407 * 2408 * Return the required GTT alignment for an object, taking into account 2409 * potential fence register mapping. 2410 */ 2411 uint32_t 2412 i915_gem_get_gtt_alignment(struct drm_device *dev, uint32_t size, 2413 int tiling_mode, bool fenced) 2414 { 2415 /* 2416 * Minimum alignment is 4k (GTT page size), but might be greater 2417 * if a fence register is needed for the object. 2418 */ 2419 if (INTEL_INFO(dev)->gen >= 4 || (!fenced && IS_G33(dev)) || 2420 tiling_mode == I915_TILING_NONE) 2421 return 4096; 2422 2423 /* 2424 * Previous chips need to be aligned to the size of the smallest 2425 * fence register that can contain the object. 2426 */ 2427 return i915_gem_get_gtt_size(dev, size, tiling_mode); 2428 } 2429 2430 static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj) 2431 { 2432 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 2433 int ret; 2434 2435 if (drm_vma_node_has_offset(&obj->base.vma_node)) 2436 return 0; 2437 2438 dev_priv->mm.shrinker_no_lock_stealing = true; 2439 2440 ret = drm_gem_create_mmap_offset(&obj->base); 2441 if (ret != -ENOSPC) 2442 goto out; 2443 2444 /* Badly fragmented mmap space? The only way we can recover 2445 * space is by destroying unwanted objects. We can't randomly release 2446 * mmap_offsets as userspace expects them to be persistent for the 2447 * lifetime of the objects. The closest we can is to release the 2448 * offsets on purgeable objects by truncating it and marking it purged, 2449 * which prevents userspace from ever using that object again. 2450 */ 2451 i915_gem_shrink(dev_priv, 2452 obj->base.size >> PAGE_SHIFT, 2453 I915_SHRINK_BOUND | 2454 I915_SHRINK_UNBOUND | 2455 I915_SHRINK_PURGEABLE); 2456 ret = drm_gem_create_mmap_offset(&obj->base); 2457 if (ret != -ENOSPC) 2458 goto out; 2459 2460 i915_gem_shrink_all(dev_priv); 2461 ret = drm_gem_create_mmap_offset(&obj->base); 2462 out: 2463 dev_priv->mm.shrinker_no_lock_stealing = false; 2464 2465 return ret; 2466 } 2467 2468 static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj) 2469 { 2470 drm_gem_free_mmap_offset(&obj->base); 2471 } 2472 2473 int 2474 i915_gem_mmap_gtt(struct drm_file *file, 2475 struct drm_device *dev, 2476 uint32_t handle, 2477 uint64_t *offset) 2478 { 2479 struct drm_gem_object *gobj; 2480 struct drm_i915_gem_object *obj; 2481 int ret; 2482 2483 ret = i915_mutex_lock_interruptible(dev); 2484 if (ret) 2485 return ret; 2486 2487 gobj = drm_gem_object_lookup(dev, file, handle); 2488 if (gobj == NULL) { 2489 ret = -ENOENT; 2490 goto unlock; 2491 } 2492 obj = to_intel_bo(gobj); 2493 2494 if (obj->madv != I915_MADV_WILLNEED) { 2495 DRM_DEBUG("Attempting to mmap a purgeable buffer\n"); 2496 ret = -EFAULT; 2497 goto out; 2498 } 2499 2500 ret = i915_gem_object_create_mmap_offset(obj); 2501 if (ret) 2502 goto out; 2503 2504 *offset = drm_vma_node_offset_addr(&obj->base.vma_node); 2505 2506 out: 2507 drm_gem_object_unreference(&obj->base); 2508 unlock: 2509 mutex_unlock(&dev->struct_mutex); 2510 return ret; 2511 } 2512 2513 /** 2514 * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing 2515 * @dev: DRM device 2516 * @data: GTT mapping ioctl data 2517 * @file: GEM object info 2518 * 2519 * Simply returns the fake offset to userspace so it can mmap it. 2520 * The mmap call will end up in drm_gem_mmap(), which will set things 2521 * up so we can get faults in the handler above. 2522 * 2523 * The fault handler will take care of binding the object into the GTT 2524 * (since it may have been evicted to make room for something), allocating 2525 * a fence register, and mapping the appropriate aperture address into 2526 * userspace. 2527 */ 2528 int 2529 i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data, 2530 struct drm_file *file) 2531 { 2532 struct drm_i915_gem_mmap_gtt *args = data; 2533 2534 return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset); 2535 } 2536 2537 /* Immediately discard the backing storage */ 2538 static void 2539 i915_gem_object_truncate(struct drm_i915_gem_object *obj) 2540 { 2541 i915_gem_object_free_mmap_offset(obj); 2542 2543 if (obj->base.filp == NULL) 2544 return; 2545 2546 #ifdef __NetBSD__ 2547 { 2548 struct uvm_object *const uobj = obj->base.filp; 2549 2550 if (uobj != NULL) { 2551 /* XXX Calling pgo_put like this is bogus. */ 2552 rw_enter(uobj->vmobjlock, RW_WRITER); 2553 (*uobj->pgops->pgo_put)(uobj, 0, obj->base.size, 2554 (PGO_ALLPAGES | PGO_FREE)); 2555 } 2556 } 2557 #else 2558 /* Our goal here is to return as much of the memory as 2559 * is possible back to the system as we are called from OOM. 2560 * To do this we must instruct the shmfs to drop all of its 2561 * backing pages, *now*. 2562 */ 2563 shmem_truncate_range(file_inode(obj->base.filp), 0, (loff_t)-1); 2564 #endif 2565 obj->madv = __I915_MADV_PURGED; 2566 } 2567 2568 /* Try to discard unwanted pages */ 2569 static void 2570 i915_gem_object_invalidate(struct drm_i915_gem_object *obj) 2571 { 2572 #ifdef __NetBSD__ 2573 struct uvm_object *uobj; 2574 #else 2575 struct address_space *mapping; 2576 #endif 2577 2578 switch (obj->madv) { 2579 case I915_MADV_DONTNEED: 2580 i915_gem_object_truncate(obj); 2581 case __I915_MADV_PURGED: 2582 return; 2583 } 2584 2585 if (obj->base.filp == NULL) 2586 return; 2587 2588 #ifdef __NetBSD__ 2589 uobj = obj->base.filp; 2590 rw_enter(uobj->vmobjlock, RW_WRITER); 2591 (*uobj->pgops->pgo_put)(uobj, 0, obj->base.size, 2592 PGO_ALLPAGES|PGO_DEACTIVATE|PGO_CLEANIT); 2593 #else 2594 mapping = file_inode(obj->base.filp)->i_mapping, 2595 invalidate_mapping_pages(mapping, 0, (loff_t)-1); 2596 #endif 2597 } 2598 2599 static void 2600 i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj) 2601 { 2602 #ifdef __NetBSD__ 2603 struct drm_device *const dev = obj->base.dev; 2604 struct vm_page *page; 2605 int ret; 2606 2607 /* XXX Cargo-culted from the Linux code. */ 2608 BUG_ON(obj->madv == __I915_MADV_PURGED); 2609 2610 ret = i915_gem_object_set_to_cpu_domain(obj, true); 2611 if (ret) { 2612 WARN_ON(ret != -EIO); 2613 i915_gem_clflush_object(obj, true); 2614 obj->base.read_domains = obj->base.write_domain = 2615 I915_GEM_DOMAIN_CPU; 2616 } 2617 2618 i915_gem_gtt_finish_object(obj); 2619 2620 if (i915_gem_object_needs_bit17_swizzle(obj)) 2621 i915_gem_object_save_bit_17_swizzle(obj); 2622 2623 if (obj->madv == I915_MADV_DONTNEED) 2624 obj->dirty = 0; 2625 2626 if (obj->dirty) { 2627 rw_enter(obj->base.filp->vmobjlock, RW_WRITER); 2628 TAILQ_FOREACH(page, &obj->pageq, pageq.queue) { 2629 uvm_pagemarkdirty(page, UVM_PAGE_STATUS_DIRTY); 2630 /* XXX mark page accessed */ 2631 } 2632 rw_exit(obj->base.filp->vmobjlock); 2633 } 2634 obj->dirty = 0; 2635 2636 uvm_obj_unwirepages(obj->base.filp, 0, obj->base.size); 2637 bus_dmamap_destroy(dev->dmat, obj->pages); 2638 #else 2639 struct sg_page_iter sg_iter; 2640 int ret; 2641 2642 BUG_ON(obj->madv == __I915_MADV_PURGED); 2643 2644 ret = i915_gem_object_set_to_cpu_domain(obj, true); 2645 if (ret) { 2646 /* In the event of a disaster, abandon all caches and 2647 * hope for the best. 2648 */ 2649 WARN_ON(ret != -EIO); 2650 i915_gem_clflush_object(obj, true); 2651 obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU; 2652 } 2653 2654 i915_gem_gtt_finish_object(obj); 2655 2656 if (i915_gem_object_needs_bit17_swizzle(obj)) 2657 i915_gem_object_save_bit_17_swizzle(obj); 2658 2659 if (obj->madv == I915_MADV_DONTNEED) 2660 obj->dirty = 0; 2661 2662 for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 0) { 2663 struct page *page = sg_page_iter_page(&sg_iter); 2664 2665 if (obj->dirty) 2666 set_page_dirty(page); 2667 2668 if (obj->madv == I915_MADV_WILLNEED) 2669 mark_page_accessed(page); 2670 2671 page_cache_release(page); 2672 } 2673 obj->dirty = 0; 2674 2675 sg_free_table(obj->pages); 2676 kfree(obj->pages); 2677 #endif 2678 } 2679 2680 int 2681 i915_gem_object_put_pages(struct drm_i915_gem_object *obj) 2682 { 2683 const struct drm_i915_gem_object_ops *ops = obj->ops; 2684 2685 if (obj->pages == NULL) 2686 return 0; 2687 2688 if (obj->pages_pin_count) 2689 return -EBUSY; 2690 2691 BUG_ON(i915_gem_obj_bound_any(obj)); 2692 2693 /* ->put_pages might need to allocate memory for the bit17 swizzle 2694 * array, hence protect them from being reaped by removing them from gtt 2695 * lists early. */ 2696 list_del(&obj->global_list); 2697 2698 ops->put_pages(obj); 2699 obj->pages = NULL; 2700 2701 i915_gem_object_invalidate(obj); 2702 2703 return 0; 2704 } 2705 2706 static int 2707 i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) 2708 { 2709 #ifdef __NetBSD__ 2710 struct drm_device *const dev = obj->base.dev; 2711 struct drm_i915_private *dev_priv = dev->dev_private; 2712 struct vm_page *page; 2713 int ret; 2714 2715 BUG_ON(obj->base.read_domains & I915_GEM_GPU_DOMAINS); 2716 BUG_ON(obj->base.write_domain & I915_GEM_GPU_DOMAINS); 2717 2718 KASSERT(obj->pages == NULL); 2719 TAILQ_INIT(&obj->pageq); 2720 2721 /* XXX errno NetBSD->Linux */ 2722 ret = -bus_dmamap_create(dev->dmat, obj->base.size, 2723 obj->base.size/PAGE_SIZE, PAGE_SIZE, 0, BUS_DMA_NOWAIT, 2724 &obj->pages); 2725 if (ret) 2726 goto fail0; 2727 2728 /* XXX errno NetBSD->Linux */ 2729 ret = -uvm_obj_wirepages(obj->base.filp, 0, obj->base.size, 2730 &obj->pageq); 2731 if (ret) /* XXX Try purge, shrink. */ 2732 goto fail1; 2733 2734 /* 2735 * Check that the paddrs will fit in 40 bits, or 32 bits on i965. 2736 * 2737 * XXX This should be unnecessary: the uao should guarantee 2738 * this constraint after uao_set_pgfl. 2739 * 2740 * XXX This should also be expanded for newer devices. 2741 */ 2742 TAILQ_FOREACH(page, &obj->pageq, pageq.queue) { 2743 const uint64_t mask = 2744 (IS_BROADWATER(dev) || IS_CRESTLINE(dev)? 2745 0xffffffffULL : 0xffffffffffULL); 2746 if (VM_PAGE_TO_PHYS(page) & ~mask) { 2747 DRM_ERROR("GEM physical address exceeds %u bits" 2748 ": %"PRIxMAX"\n", 2749 popcount64(mask), 2750 (uintmax_t)VM_PAGE_TO_PHYS(page)); 2751 ret = -EIO; 2752 goto fail2; 2753 } 2754 } 2755 2756 ret = i915_gem_gtt_prepare_object(obj); 2757 if (ret) 2758 goto fail2; 2759 2760 if (i915_gem_object_needs_bit17_swizzle(obj)) 2761 i915_gem_object_do_bit_17_swizzle(obj); 2762 2763 if (obj->tiling_mode != I915_TILING_NONE && 2764 dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) 2765 i915_gem_object_pin_pages(obj); 2766 2767 /* Success! */ 2768 return 0; 2769 2770 fail3: __unused 2771 i915_gem_gtt_finish_object(obj); 2772 fail2: uvm_obj_unwirepages(obj->base.filp, 0, obj->base.size); 2773 fail1: bus_dmamap_destroy(dev->dmat, obj->pages); 2774 obj->pages = NULL; 2775 fail0: KASSERT(ret); 2776 return ret; 2777 #else 2778 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 2779 int page_count, i; 2780 struct address_space *mapping; 2781 struct sg_table *st; 2782 struct scatterlist *sg; 2783 struct sg_page_iter sg_iter; 2784 struct page *page; 2785 unsigned long last_pfn = 0; /* suppress gcc warning */ 2786 int ret; 2787 gfp_t gfp; 2788 2789 /* Assert that the object is not currently in any GPU domain. As it 2790 * wasn't in the GTT, there shouldn't be any way it could have been in 2791 * a GPU cache 2792 */ 2793 BUG_ON(obj->base.read_domains & I915_GEM_GPU_DOMAINS); 2794 BUG_ON(obj->base.write_domain & I915_GEM_GPU_DOMAINS); 2795 2796 st = kmalloc(sizeof(*st), GFP_KERNEL); 2797 if (st == NULL) 2798 return -ENOMEM; 2799 2800 page_count = obj->base.size / PAGE_SIZE; 2801 if (sg_alloc_table(st, page_count, GFP_KERNEL)) { 2802 kfree(st); 2803 return -ENOMEM; 2804 } 2805 2806 /* Get the list of pages out of our struct file. They'll be pinned 2807 * at this point until we release them. 2808 * 2809 * Fail silently without starting the shrinker 2810 */ 2811 mapping = file_inode(obj->base.filp)->i_mapping; 2812 gfp = mapping_gfp_constraint(mapping, ~(__GFP_IO | __GFP_RECLAIM)); 2813 gfp |= __GFP_NORETRY | __GFP_NOWARN; 2814 sg = st->sgl; 2815 st->nents = 0; 2816 for (i = 0; i < page_count; i++) { 2817 page = shmem_read_mapping_page_gfp(mapping, i, gfp); 2818 if (IS_ERR(page)) { 2819 i915_gem_shrink(dev_priv, 2820 page_count, 2821 I915_SHRINK_BOUND | 2822 I915_SHRINK_UNBOUND | 2823 I915_SHRINK_PURGEABLE); 2824 page = shmem_read_mapping_page_gfp(mapping, i, gfp); 2825 } 2826 if (IS_ERR(page)) { 2827 /* We've tried hard to allocate the memory by reaping 2828 * our own buffer, now let the real VM do its job and 2829 * go down in flames if truly OOM. 2830 */ 2831 i915_gem_shrink_all(dev_priv); 2832 page = shmem_read_mapping_page(mapping, i); 2833 if (IS_ERR(page)) { 2834 ret = PTR_ERR(page); 2835 goto err_pages; 2836 } 2837 } 2838 #ifdef CONFIG_SWIOTLB 2839 if (swiotlb_nr_tbl()) { 2840 st->nents++; 2841 sg_set_page(sg, page, PAGE_SIZE, 0); 2842 sg = sg_next(sg); 2843 continue; 2844 } 2845 #endif 2846 if (!i || page_to_pfn(page) != last_pfn + 1) { 2847 if (i) 2848 sg = sg_next(sg); 2849 st->nents++; 2850 sg_set_page(sg, page, PAGE_SIZE, 0); 2851 } else { 2852 sg->length += PAGE_SIZE; 2853 } 2854 last_pfn = page_to_pfn(page); 2855 2856 /* Check that the i965g/gm workaround works. */ 2857 WARN_ON((gfp & __GFP_DMA32) && (last_pfn >= 0x00100000UL)); 2858 } 2859 #ifdef CONFIG_SWIOTLB 2860 if (!swiotlb_nr_tbl()) 2861 #endif 2862 sg_mark_end(sg); 2863 obj->pages = st; 2864 2865 ret = i915_gem_gtt_prepare_object(obj); 2866 if (ret) 2867 goto err_pages; 2868 2869 if (i915_gem_object_needs_bit17_swizzle(obj)) 2870 i915_gem_object_do_bit_17_swizzle(obj); 2871 2872 if (obj->tiling_mode != I915_TILING_NONE && 2873 dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) 2874 i915_gem_object_pin_pages(obj); 2875 2876 return 0; 2877 2878 err_pages: 2879 sg_mark_end(sg); 2880 for_each_sg_page(st->sgl, &sg_iter, st->nents, 0) 2881 page_cache_release(sg_page_iter_page(&sg_iter)); 2882 sg_free_table(st); 2883 kfree(st); 2884 2885 /* shmemfs first checks if there is enough memory to allocate the page 2886 * and reports ENOSPC should there be insufficient, along with the usual 2887 * ENOMEM for a genuine allocation failure. 2888 * 2889 * We use ENOSPC in our driver to mean that we have run out of aperture 2890 * space and so want to translate the error from shmemfs back to our 2891 * usual understanding of ENOMEM. 2892 */ 2893 if (ret == -ENOSPC) 2894 ret = -ENOMEM; 2895 2896 return ret; 2897 #endif 2898 } 2899 2900 /* Ensure that the associated pages are gathered from the backing storage 2901 * and pinned into our object. i915_gem_object_get_pages() may be called 2902 * multiple times before they are released by a single call to 2903 * i915_gem_object_put_pages() - once the pages are no longer referenced 2904 * either as a result of memory pressure (reaping pages under the shrinker) 2905 * or as the object is itself released. 2906 */ 2907 int 2908 i915_gem_object_get_pages(struct drm_i915_gem_object *obj) 2909 { 2910 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 2911 const struct drm_i915_gem_object_ops *ops = obj->ops; 2912 int ret; 2913 2914 if (obj->pages) 2915 return 0; 2916 2917 if (obj->madv != I915_MADV_WILLNEED) { 2918 DRM_DEBUG("Attempting to obtain a purgeable object\n"); 2919 return -EFAULT; 2920 } 2921 2922 BUG_ON(obj->pages_pin_count); 2923 2924 ret = ops->get_pages(obj); 2925 if (ret) 2926 return ret; 2927 2928 list_add_tail(&obj->global_list, &dev_priv->mm.unbound_list); 2929 2930 #ifndef __NetBSD__ 2931 obj->get_page.sg = obj->pages->sgl; 2932 obj->get_page.last = 0; 2933 #endif 2934 2935 return 0; 2936 } 2937 2938 void i915_vma_move_to_active(struct i915_vma *vma, 2939 struct drm_i915_gem_request *req) 2940 { 2941 struct drm_i915_gem_object *obj = vma->obj; 2942 struct intel_engine_cs *ring; 2943 2944 ring = i915_gem_request_get_ring(req); 2945 2946 /* Add a reference if we're newly entering the active list. */ 2947 if (obj->active == 0) 2948 drm_gem_object_reference(&obj->base); 2949 obj->active |= intel_ring_flag(ring); 2950 2951 list_move_tail(&obj->ring_list[ring->id], &ring->active_list); 2952 i915_gem_request_assign(&obj->last_read_req[ring->id], req); 2953 2954 list_move_tail(&vma->mm_list, &vma->vm->active_list); 2955 } 2956 2957 static void 2958 i915_gem_object_retire__write(struct drm_i915_gem_object *obj) 2959 { 2960 RQ_BUG_ON(obj->last_write_req == NULL); 2961 RQ_BUG_ON(!(obj->active & intel_ring_flag(obj->last_write_req->ring))); 2962 2963 i915_gem_request_assign(&obj->last_write_req, NULL); 2964 intel_fb_obj_flush(obj, true, ORIGIN_CS); 2965 } 2966 2967 static void 2968 i915_gem_object_retire__read(struct drm_i915_gem_object *obj, int ring) 2969 { 2970 struct i915_vma *vma; 2971 2972 RQ_BUG_ON(obj->last_read_req[ring] == NULL); 2973 RQ_BUG_ON(!(obj->active & (1 << ring))); 2974 2975 list_del_init(&obj->ring_list[ring]); 2976 i915_gem_request_assign(&obj->last_read_req[ring], NULL); 2977 2978 if (obj->last_write_req && obj->last_write_req->ring->id == ring) 2979 i915_gem_object_retire__write(obj); 2980 2981 obj->active &= ~(1 << ring); 2982 if (obj->active) 2983 return; 2984 2985 /* Bump our place on the bound list to keep it roughly in LRU order 2986 * so that we don't steal from recently used but inactive objects 2987 * (unless we are forced to ofc!) 2988 */ 2989 list_move_tail(&obj->global_list, 2990 &to_i915(obj->base.dev)->mm.bound_list); 2991 2992 list_for_each_entry(vma, &obj->vma_list, vma_link) { 2993 if (!list_empty(&vma->mm_list)) 2994 list_move_tail(&vma->mm_list, &vma->vm->inactive_list); 2995 } 2996 2997 i915_gem_request_assign(&obj->last_fenced_req, NULL); 2998 drm_gem_object_unreference(&obj->base); 2999 } 3000 3001 static int 3002 i915_gem_init_seqno(struct drm_device *dev, u32 seqno) 3003 { 3004 struct drm_i915_private *dev_priv = dev->dev_private; 3005 struct intel_engine_cs *ring; 3006 int ret, i, j; 3007 3008 /* Carefully retire all requests without writing to the rings */ 3009 for_each_ring(ring, dev_priv, i) { 3010 ret = intel_ring_idle(ring); 3011 if (ret) 3012 return ret; 3013 } 3014 i915_gem_retire_requests(dev); 3015 3016 /* Finally reset hw state */ 3017 for_each_ring(ring, dev_priv, i) { 3018 intel_ring_init_seqno(ring, seqno); 3019 3020 for (j = 0; j < ARRAY_SIZE(ring->semaphore.sync_seqno); j++) 3021 ring->semaphore.sync_seqno[j] = 0; 3022 } 3023 3024 return 0; 3025 } 3026 3027 int i915_gem_set_seqno(struct drm_device *dev, u32 seqno) 3028 { 3029 struct drm_i915_private *dev_priv = dev->dev_private; 3030 int ret; 3031 3032 if (seqno == 0) 3033 return -EINVAL; 3034 3035 /* HWS page needs to be set less than what we 3036 * will inject to ring 3037 */ 3038 ret = i915_gem_init_seqno(dev, seqno - 1); 3039 if (ret) 3040 return ret; 3041 3042 /* Carefully set the last_seqno value so that wrap 3043 * detection still works 3044 */ 3045 dev_priv->next_seqno = seqno; 3046 dev_priv->last_seqno = seqno - 1; 3047 if (dev_priv->last_seqno == 0) 3048 dev_priv->last_seqno--; 3049 3050 return 0; 3051 } 3052 3053 int 3054 i915_gem_get_seqno(struct drm_device *dev, u32 *seqno) 3055 { 3056 struct drm_i915_private *dev_priv = dev->dev_private; 3057 3058 /* reserve 0 for non-seqno */ 3059 if (dev_priv->next_seqno == 0) { 3060 int ret = i915_gem_init_seqno(dev, 0); 3061 if (ret) 3062 return ret; 3063 3064 dev_priv->next_seqno = 1; 3065 } 3066 3067 *seqno = dev_priv->last_seqno = dev_priv->next_seqno++; 3068 return 0; 3069 } 3070 3071 /* 3072 * NB: This function is not allowed to fail. Doing so would mean the the 3073 * request is not being tracked for completion but the work itself is 3074 * going to happen on the hardware. This would be a Bad Thing(tm). 3075 */ 3076 void __i915_add_request(struct drm_i915_gem_request *request, 3077 struct drm_i915_gem_object *obj, 3078 bool flush_caches) 3079 { 3080 struct intel_engine_cs *ring; 3081 struct drm_i915_private *dev_priv; 3082 struct intel_ringbuffer *ringbuf; 3083 u32 request_start; 3084 int ret; 3085 3086 if (WARN_ON(request == NULL)) 3087 return; 3088 3089 ring = request->ring; 3090 dev_priv = ring->dev->dev_private; 3091 ringbuf = request->ringbuf; 3092 3093 /* 3094 * To ensure that this call will not fail, space for its emissions 3095 * should already have been reserved in the ring buffer. Let the ring 3096 * know that it is time to use that space up. 3097 */ 3098 intel_ring_reserved_space_use(ringbuf); 3099 3100 request_start = intel_ring_get_tail(ringbuf); 3101 /* 3102 * Emit any outstanding flushes - execbuf can fail to emit the flush 3103 * after having emitted the batchbuffer command. Hence we need to fix 3104 * things up similar to emitting the lazy request. The difference here 3105 * is that the flush _must_ happen before the next request, no matter 3106 * what. 3107 */ 3108 if (flush_caches) { 3109 if (i915.enable_execlists) 3110 ret = logical_ring_flush_all_caches(request); 3111 else 3112 ret = intel_ring_flush_all_caches(request); 3113 /* Not allowed to fail! */ 3114 WARN(ret, "*_ring_flush_all_caches failed: %d!\n", ret); 3115 } 3116 3117 /* Record the position of the start of the request so that 3118 * should we detect the updated seqno part-way through the 3119 * GPU processing the request, we never over-estimate the 3120 * position of the head. 3121 */ 3122 request->postfix = intel_ring_get_tail(ringbuf); 3123 3124 if (i915.enable_execlists) 3125 ret = ring->emit_request(request); 3126 else { 3127 ret = ring->add_request(request); 3128 3129 request->tail = intel_ring_get_tail(ringbuf); 3130 } 3131 /* Not allowed to fail! */ 3132 WARN(ret, "emit|add_request failed: %d!\n", ret); 3133 3134 request->head = request_start; 3135 3136 /* Whilst this request exists, batch_obj will be on the 3137 * active_list, and so will hold the active reference. Only when this 3138 * request is retired will the the batch_obj be moved onto the 3139 * inactive_list and lose its active reference. Hence we do not need 3140 * to explicitly hold another reference here. 3141 */ 3142 request->batch_obj = obj; 3143 3144 request->emitted_jiffies = jiffies; 3145 request->previous_seqno = ring->last_submitted_seqno; 3146 ring->last_submitted_seqno = request->seqno; 3147 list_add_tail(&request->list, &ring->request_list); 3148 3149 trace_i915_gem_request_add(request); 3150 3151 i915_queue_hangcheck(ring->dev); 3152 3153 queue_delayed_work(dev_priv->wq, 3154 &dev_priv->mm.retire_work, 3155 round_jiffies_up_relative(HZ)); 3156 intel_mark_busy(dev_priv->dev); 3157 3158 /* Sanity check that the reserved size was large enough. */ 3159 intel_ring_reserved_space_end(ringbuf); 3160 } 3161 3162 static bool i915_context_is_banned(struct drm_i915_private *dev_priv, 3163 const struct intel_context *ctx) 3164 { 3165 unsigned long elapsed; 3166 3167 elapsed = get_seconds() - ctx->hang_stats.guilty_ts; 3168 3169 if (ctx->hang_stats.banned) 3170 return true; 3171 3172 if (ctx->hang_stats.ban_period_seconds && 3173 elapsed <= ctx->hang_stats.ban_period_seconds) { 3174 if (!i915_gem_context_is_default(ctx)) { 3175 DRM_DEBUG("context hanging too fast, banning!\n"); 3176 return true; 3177 } else if (i915_stop_ring_allow_ban(dev_priv)) { 3178 if (i915_stop_ring_allow_warn(dev_priv)) 3179 DRM_ERROR("gpu hanging too fast, banning!\n"); 3180 return true; 3181 } 3182 } 3183 3184 return false; 3185 } 3186 3187 static void i915_set_reset_status(struct drm_i915_private *dev_priv, 3188 struct intel_context *ctx, 3189 const bool guilty) 3190 { 3191 struct i915_ctx_hang_stats *hs; 3192 3193 if (WARN_ON(!ctx)) 3194 return; 3195 3196 hs = &ctx->hang_stats; 3197 3198 if (guilty) { 3199 hs->banned = i915_context_is_banned(dev_priv, ctx); 3200 hs->batch_active++; 3201 hs->guilty_ts = get_seconds(); 3202 } else { 3203 hs->batch_pending++; 3204 } 3205 } 3206 3207 void i915_gem_request_free(struct kref *req_ref) 3208 { 3209 struct drm_i915_gem_request *req = container_of(req_ref, 3210 typeof(*req), ref); 3211 struct intel_context *ctx = req->ctx; 3212 3213 if (req->file_priv) 3214 i915_gem_request_remove_from_client(req); 3215 3216 if (ctx) { 3217 if (i915.enable_execlists) { 3218 if (ctx != req->ring->default_context) 3219 intel_lr_context_unpin(req); 3220 } 3221 3222 i915_gem_context_unreference(ctx); 3223 } 3224 3225 kmem_cache_free(req->i915->requests, req); 3226 } 3227 3228 int i915_gem_request_alloc(struct intel_engine_cs *ring, 3229 struct intel_context *ctx, 3230 struct drm_i915_gem_request **req_out) 3231 { 3232 struct drm_i915_private *dev_priv = to_i915(ring->dev); 3233 struct drm_i915_gem_request *req; 3234 int ret; 3235 3236 if (!req_out) 3237 return -EINVAL; 3238 3239 *req_out = NULL; 3240 3241 req = kmem_cache_zalloc(dev_priv->requests, GFP_KERNEL); 3242 if (req == NULL) 3243 return -ENOMEM; 3244 3245 ret = i915_gem_get_seqno(ring->dev, &req->seqno); 3246 if (ret) 3247 goto err; 3248 3249 kref_init(&req->ref); 3250 req->i915 = dev_priv; 3251 req->ring = ring; 3252 req->ctx = ctx; 3253 i915_gem_context_reference(req->ctx); 3254 3255 if (i915.enable_execlists) 3256 ret = intel_logical_ring_alloc_request_extras(req); 3257 else 3258 ret = intel_ring_alloc_request_extras(req); 3259 if (ret) { 3260 i915_gem_context_unreference(req->ctx); 3261 goto err; 3262 } 3263 3264 /* 3265 * Reserve space in the ring buffer for all the commands required to 3266 * eventually emit this request. This is to guarantee that the 3267 * i915_add_request() call can't fail. Note that the reserve may need 3268 * to be redone if the request is not actually submitted straight 3269 * away, e.g. because a GPU scheduler has deferred it. 3270 */ 3271 if (i915.enable_execlists) 3272 ret = intel_logical_ring_reserve_space(req); 3273 else 3274 ret = intel_ring_reserve_space(req); 3275 if (ret) { 3276 /* 3277 * At this point, the request is fully allocated even if not 3278 * fully prepared. Thus it can be cleaned up using the proper 3279 * free code. 3280 */ 3281 i915_gem_request_cancel(req); 3282 return ret; 3283 } 3284 3285 *req_out = req; 3286 return 0; 3287 3288 err: 3289 kmem_cache_free(dev_priv->requests, req); 3290 return ret; 3291 } 3292 3293 void i915_gem_request_cancel(struct drm_i915_gem_request *req) 3294 { 3295 intel_ring_reserved_space_cancel(req->ringbuf); 3296 3297 i915_gem_request_unreference(req); 3298 } 3299 3300 struct drm_i915_gem_request * 3301 i915_gem_find_active_request(struct intel_engine_cs *ring) 3302 { 3303 struct drm_i915_gem_request *request; 3304 3305 list_for_each_entry(request, &ring->request_list, list) { 3306 if (i915_gem_request_completed(request, false)) 3307 continue; 3308 3309 return request; 3310 } 3311 3312 return NULL; 3313 } 3314 3315 static void i915_gem_reset_ring_status(struct drm_i915_private *dev_priv, 3316 struct intel_engine_cs *ring) 3317 { 3318 struct drm_i915_gem_request *request; 3319 bool ring_hung; 3320 3321 request = i915_gem_find_active_request(ring); 3322 3323 if (request == NULL) 3324 return; 3325 3326 ring_hung = ring->hangcheck.score >= HANGCHECK_SCORE_RING_HUNG; 3327 3328 i915_set_reset_status(dev_priv, request->ctx, ring_hung); 3329 3330 list_for_each_entry_continue(request, &ring->request_list, list) 3331 i915_set_reset_status(dev_priv, request->ctx, false); 3332 } 3333 3334 static void i915_gem_reset_ring_cleanup(struct drm_i915_private *dev_priv, 3335 struct intel_engine_cs *ring) 3336 { 3337 while (!list_empty(&ring->active_list)) { 3338 struct drm_i915_gem_object *obj; 3339 3340 obj = list_first_entry(&ring->active_list, 3341 struct drm_i915_gem_object, 3342 ring_list[ring->id]); 3343 3344 i915_gem_object_retire__read(obj, ring->id); 3345 } 3346 3347 /* 3348 * Clear the execlists queue up before freeing the requests, as those 3349 * are the ones that keep the context and ringbuffer backing objects 3350 * pinned in place. 3351 */ 3352 while (!list_empty(&ring->execlist_queue)) { 3353 struct drm_i915_gem_request *submit_req; 3354 3355 submit_req = list_first_entry(&ring->execlist_queue, 3356 struct drm_i915_gem_request, 3357 execlist_link); 3358 list_del(&submit_req->execlist_link); 3359 3360 if (submit_req->ctx != ring->default_context) 3361 intel_lr_context_unpin(submit_req); 3362 3363 i915_gem_request_unreference(submit_req); 3364 } 3365 3366 /* 3367 * We must free the requests after all the corresponding objects have 3368 * been moved off active lists. Which is the same order as the normal 3369 * retire_requests function does. This is important if object hold 3370 * implicit references on things like e.g. ppgtt address spaces through 3371 * the request. 3372 */ 3373 while (!list_empty(&ring->request_list)) { 3374 struct drm_i915_gem_request *request; 3375 3376 request = list_first_entry(&ring->request_list, 3377 struct drm_i915_gem_request, 3378 list); 3379 3380 i915_gem_request_retire(request); 3381 } 3382 } 3383 3384 void i915_gem_reset(struct drm_device *dev) 3385 { 3386 struct drm_i915_private *dev_priv = dev->dev_private; 3387 struct intel_engine_cs *ring; 3388 int i; 3389 3390 /* 3391 * Before we free the objects from the requests, we need to inspect 3392 * them for finding the guilty party. As the requests only borrow 3393 * their reference to the objects, the inspection must be done first. 3394 */ 3395 for_each_ring(ring, dev_priv, i) 3396 i915_gem_reset_ring_status(dev_priv, ring); 3397 3398 for_each_ring(ring, dev_priv, i) 3399 i915_gem_reset_ring_cleanup(dev_priv, ring); 3400 3401 i915_gem_context_reset(dev); 3402 3403 i915_gem_restore_fences(dev); 3404 3405 WARN_ON(i915_verify_lists(dev)); 3406 } 3407 3408 /** 3409 * This function clears the request list as sequence numbers are passed. 3410 */ 3411 void 3412 i915_gem_retire_requests_ring(struct intel_engine_cs *ring) 3413 { 3414 WARN_ON(i915_verify_lists(ring->dev)); 3415 3416 /* Retire requests first as we use it above for the early return. 3417 * If we retire requests last, we may use a later seqno and so clear 3418 * the requests lists without clearing the active list, leading to 3419 * confusion. 3420 */ 3421 while (!list_empty(&ring->request_list)) { 3422 struct drm_i915_gem_request *request; 3423 3424 request = list_first_entry(&ring->request_list, 3425 struct drm_i915_gem_request, 3426 list); 3427 3428 if (!i915_gem_request_completed(request, true)) 3429 break; 3430 3431 i915_gem_request_retire(request); 3432 } 3433 3434 /* Move any buffers on the active list that are no longer referenced 3435 * by the ringbuffer to the flushing/inactive lists as appropriate, 3436 * before we free the context associated with the requests. 3437 */ 3438 while (!list_empty(&ring->active_list)) { 3439 struct drm_i915_gem_object *obj; 3440 3441 obj = list_first_entry(&ring->active_list, 3442 struct drm_i915_gem_object, 3443 ring_list[ring->id]); 3444 3445 if (!list_empty(&obj->last_read_req[ring->id]->list)) 3446 break; 3447 3448 i915_gem_object_retire__read(obj, ring->id); 3449 } 3450 3451 if (unlikely(ring->trace_irq_req && 3452 i915_gem_request_completed(ring->trace_irq_req, true))) { 3453 ring->irq_put(ring); 3454 i915_gem_request_assign(&ring->trace_irq_req, NULL); 3455 } 3456 3457 WARN_ON(i915_verify_lists(ring->dev)); 3458 } 3459 3460 bool 3461 i915_gem_retire_requests(struct drm_device *dev) 3462 { 3463 struct drm_i915_private *dev_priv = dev->dev_private; 3464 struct intel_engine_cs *ring; 3465 bool idle = true; 3466 int i; 3467 3468 for_each_ring(ring, dev_priv, i) { 3469 i915_gem_retire_requests_ring(ring); 3470 idle &= list_empty(&ring->request_list); 3471 if (i915.enable_execlists) { 3472 unsigned long flags; 3473 3474 spin_lock_irqsave(&ring->execlist_lock, flags); 3475 idle &= list_empty(&ring->execlist_queue); 3476 spin_unlock_irqrestore(&ring->execlist_lock, flags); 3477 3478 intel_execlists_retire_requests(ring); 3479 } 3480 } 3481 3482 if (idle) 3483 mod_delayed_work(dev_priv->wq, 3484 &dev_priv->mm.idle_work, 3485 msecs_to_jiffies(100)); 3486 3487 return idle; 3488 } 3489 3490 static void 3491 i915_gem_retire_work_handler(struct work_struct *work) 3492 { 3493 struct drm_i915_private *dev_priv = 3494 container_of(work, typeof(*dev_priv), mm.retire_work.work); 3495 struct drm_device *dev = dev_priv->dev; 3496 bool idle; 3497 3498 /* Come back later if the device is busy... */ 3499 idle = false; 3500 if (mutex_trylock(&dev->struct_mutex)) { 3501 idle = i915_gem_retire_requests(dev); 3502 mutex_unlock(&dev->struct_mutex); 3503 } 3504 if (!idle) 3505 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 3506 round_jiffies_up_relative(HZ)); 3507 } 3508 3509 static void 3510 i915_gem_idle_work_handler(struct work_struct *work) 3511 { 3512 struct drm_i915_private *dev_priv = 3513 container_of(work, typeof(*dev_priv), mm.idle_work.work); 3514 struct drm_device *dev = dev_priv->dev; 3515 struct intel_engine_cs *ring; 3516 int i; 3517 3518 for_each_ring(ring, dev_priv, i) 3519 if (!list_empty(&ring->request_list)) 3520 return; 3521 3522 intel_mark_idle(dev); 3523 3524 if (mutex_trylock(&dev->struct_mutex)) { 3525 struct intel_engine_cs *ring; 3526 int i; 3527 3528 for_each_ring(ring, dev_priv, i) 3529 i915_gem_batch_pool_fini(&ring->batch_pool); 3530 3531 mutex_unlock(&dev->struct_mutex); 3532 } 3533 } 3534 3535 /** 3536 * Ensures that an object will eventually get non-busy by flushing any required 3537 * write domains, emitting any outstanding lazy request and retiring and 3538 * completed requests. 3539 */ 3540 static int 3541 i915_gem_object_flush_active(struct drm_i915_gem_object *obj) 3542 { 3543 int i; 3544 3545 if (!obj->active) 3546 return 0; 3547 3548 for (i = 0; i < I915_NUM_RINGS; i++) { 3549 struct drm_i915_gem_request *req; 3550 3551 req = obj->last_read_req[i]; 3552 if (req == NULL) 3553 continue; 3554 3555 if (list_empty(&req->list)) 3556 goto retire; 3557 3558 if (i915_gem_request_completed(req, true)) { 3559 __i915_gem_request_retire__upto(req); 3560 retire: 3561 i915_gem_object_retire__read(obj, i); 3562 } 3563 } 3564 3565 return 0; 3566 } 3567 3568 /** 3569 * i915_gem_wait_ioctl - implements DRM_IOCTL_I915_GEM_WAIT 3570 * @DRM_IOCTL_ARGS: standard ioctl arguments 3571 * 3572 * Returns 0 if successful, else an error is returned with the remaining time in 3573 * the timeout parameter. 3574 * -ETIME: object is still busy after timeout 3575 * -ERESTARTSYS: signal interrupted the wait 3576 * -ENONENT: object doesn't exist 3577 * Also possible, but rare: 3578 * -EAGAIN: GPU wedged 3579 * -ENOMEM: damn 3580 * -ENODEV: Internal IRQ fail 3581 * -E?: The add request failed 3582 * 3583 * The wait ioctl with a timeout of 0 reimplements the busy ioctl. With any 3584 * non-zero timeout parameter the wait ioctl will wait for the given number of 3585 * nanoseconds on an object becoming unbusy. Since the wait itself does so 3586 * without holding struct_mutex the object may become re-busied before this 3587 * function completes. A similar but shorter * race condition exists in the busy 3588 * ioctl 3589 */ 3590 int 3591 i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) 3592 { 3593 struct drm_i915_private *dev_priv = dev->dev_private; 3594 struct drm_i915_gem_wait *args = data; 3595 struct drm_gem_object *gobj; 3596 struct drm_i915_gem_object *obj; 3597 struct drm_i915_gem_request *req[I915_NUM_RINGS]; 3598 unsigned reset_counter; 3599 int i, n = 0; 3600 int ret; 3601 3602 if (args->flags != 0) 3603 return -EINVAL; 3604 3605 ret = i915_mutex_lock_interruptible(dev); 3606 if (ret) 3607 return ret; 3608 3609 gobj = drm_gem_object_lookup(dev, file, args->bo_handle); 3610 if (gobj == NULL) { 3611 mutex_unlock(&dev->struct_mutex); 3612 return -ENOENT; 3613 } 3614 obj = to_intel_bo(gobj); 3615 3616 /* Need to make sure the object gets inactive eventually. */ 3617 ret = i915_gem_object_flush_active(obj); 3618 if (ret) 3619 goto out; 3620 3621 if (!obj->active) 3622 goto out; 3623 3624 /* Do this after OLR check to make sure we make forward progress polling 3625 * on this IOCTL with a timeout == 0 (like busy ioctl) 3626 */ 3627 if (args->timeout_ns == 0) { 3628 ret = -ETIME; 3629 goto out; 3630 } 3631 3632 drm_gem_object_unreference(&obj->base); 3633 reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter); 3634 3635 for (i = 0; i < I915_NUM_RINGS; i++) { 3636 if (obj->last_read_req[i] == NULL) 3637 continue; 3638 3639 req[n++] = i915_gem_request_reference(obj->last_read_req[i]); 3640 } 3641 3642 mutex_unlock(&dev->struct_mutex); 3643 3644 for (i = 0; i < n; i++) { 3645 if (ret == 0) 3646 ret = __i915_wait_request(req[i], reset_counter, true, 3647 args->timeout_ns > 0 ? &args->timeout_ns : NULL, 3648 file->driver_priv); 3649 i915_gem_request_unreference__unlocked(req[i]); 3650 } 3651 return ret; 3652 3653 out: 3654 drm_gem_object_unreference(&obj->base); 3655 mutex_unlock(&dev->struct_mutex); 3656 return ret; 3657 } 3658 3659 static int 3660 __i915_gem_object_sync(struct drm_i915_gem_object *obj, 3661 struct intel_engine_cs *to, 3662 struct drm_i915_gem_request *from_req, 3663 struct drm_i915_gem_request **to_req) 3664 { 3665 struct intel_engine_cs *from; 3666 int ret; 3667 3668 from = i915_gem_request_get_ring(from_req); 3669 if (to == from) 3670 return 0; 3671 3672 if (i915_gem_request_completed(from_req, true)) 3673 return 0; 3674 3675 if (!i915_semaphore_is_enabled(obj->base.dev)) { 3676 struct drm_i915_private *i915 = to_i915(obj->base.dev); 3677 ret = __i915_wait_request(from_req, 3678 atomic_read(&i915->gpu_error.reset_counter), 3679 i915->mm.interruptible, 3680 NULL, 3681 &i915->rps.semaphores); 3682 if (ret) 3683 return ret; 3684 3685 i915_gem_object_retire_request(obj, from_req); 3686 } else { 3687 int idx = intel_ring_sync_index(from, to); 3688 u32 seqno = i915_gem_request_get_seqno(from_req); 3689 3690 WARN_ON(!to_req); 3691 3692 if (seqno <= from->semaphore.sync_seqno[idx]) 3693 return 0; 3694 3695 if (*to_req == NULL) { 3696 ret = i915_gem_request_alloc(to, to->default_context, to_req); 3697 if (ret) 3698 return ret; 3699 } 3700 3701 trace_i915_gem_ring_sync_to(*to_req, from, from_req); 3702 ret = to->semaphore.sync_to(*to_req, from, seqno); 3703 if (ret) 3704 return ret; 3705 3706 /* We use last_read_req because sync_to() 3707 * might have just caused seqno wrap under 3708 * the radar. 3709 */ 3710 from->semaphore.sync_seqno[idx] = 3711 i915_gem_request_get_seqno(obj->last_read_req[from->id]); 3712 } 3713 3714 return 0; 3715 } 3716 3717 /** 3718 * i915_gem_object_sync - sync an object to a ring. 3719 * 3720 * @obj: object which may be in use on another ring. 3721 * @to: ring we wish to use the object on. May be NULL. 3722 * @to_req: request we wish to use the object for. See below. 3723 * This will be allocated and returned if a request is 3724 * required but not passed in. 3725 * 3726 * This code is meant to abstract object synchronization with the GPU. 3727 * Calling with NULL implies synchronizing the object with the CPU 3728 * rather than a particular GPU ring. Conceptually we serialise writes 3729 * between engines inside the GPU. We only allow one engine to write 3730 * into a buffer at any time, but multiple readers. To ensure each has 3731 * a coherent view of memory, we must: 3732 * 3733 * - If there is an outstanding write request to the object, the new 3734 * request must wait for it to complete (either CPU or in hw, requests 3735 * on the same ring will be naturally ordered). 3736 * 3737 * - If we are a write request (pending_write_domain is set), the new 3738 * request must wait for outstanding read requests to complete. 3739 * 3740 * For CPU synchronisation (NULL to) no request is required. For syncing with 3741 * rings to_req must be non-NULL. However, a request does not have to be 3742 * pre-allocated. If *to_req is NULL and sync commands will be emitted then a 3743 * request will be allocated automatically and returned through *to_req. Note 3744 * that it is not guaranteed that commands will be emitted (because the system 3745 * might already be idle). Hence there is no need to create a request that 3746 * might never have any work submitted. Note further that if a request is 3747 * returned in *to_req, it is the responsibility of the caller to submit 3748 * that request (after potentially adding more work to it). 3749 * 3750 * Returns 0 if successful, else propagates up the lower layer error. 3751 */ 3752 int 3753 i915_gem_object_sync(struct drm_i915_gem_object *obj, 3754 struct intel_engine_cs *to, 3755 struct drm_i915_gem_request **to_req) 3756 { 3757 const bool readonly = obj->base.pending_write_domain == 0; 3758 struct drm_i915_gem_request *req[I915_NUM_RINGS]; 3759 int ret, i, n; 3760 3761 if (!obj->active) 3762 return 0; 3763 3764 if (to == NULL) 3765 return i915_gem_object_wait_rendering(obj, readonly); 3766 3767 n = 0; 3768 if (readonly) { 3769 if (obj->last_write_req) 3770 req[n++] = obj->last_write_req; 3771 } else { 3772 for (i = 0; i < I915_NUM_RINGS; i++) 3773 if (obj->last_read_req[i]) 3774 req[n++] = obj->last_read_req[i]; 3775 } 3776 for (i = 0; i < n; i++) { 3777 ret = __i915_gem_object_sync(obj, to, req[i], to_req); 3778 if (ret) 3779 return ret; 3780 } 3781 3782 return 0; 3783 } 3784 3785 static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj) 3786 { 3787 u32 old_write_domain, old_read_domains; 3788 3789 /* Force a pagefault for domain tracking on next user access */ 3790 i915_gem_release_mmap(obj); 3791 3792 if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0) 3793 return; 3794 3795 /* Wait for any direct GTT access to complete */ 3796 mb(); 3797 3798 old_read_domains = obj->base.read_domains; 3799 old_write_domain = obj->base.write_domain; 3800 3801 obj->base.read_domains &= ~I915_GEM_DOMAIN_GTT; 3802 obj->base.write_domain &= ~I915_GEM_DOMAIN_GTT; 3803 3804 trace_i915_gem_object_change_domain(obj, 3805 old_read_domains, 3806 old_write_domain); 3807 } 3808 3809 static int __i915_vma_unbind(struct i915_vma *vma, bool wait) 3810 { 3811 struct drm_i915_gem_object *obj = vma->obj; 3812 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 3813 int ret; 3814 3815 if (list_empty(&vma->vma_link)) 3816 return 0; 3817 3818 if (!drm_mm_node_allocated(&vma->node)) { 3819 i915_gem_vma_destroy(vma); 3820 return 0; 3821 } 3822 3823 if (vma->pin_count) 3824 return -EBUSY; 3825 3826 BUG_ON(obj->pages == NULL); 3827 3828 if (wait) { 3829 ret = i915_gem_object_wait_rendering(obj, false); 3830 if (ret) 3831 return ret; 3832 } 3833 3834 if (i915_is_ggtt(vma->vm) && 3835 vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) { 3836 i915_gem_object_finish_gtt(obj); 3837 3838 /* release the fence reg _after_ flushing */ 3839 ret = i915_gem_object_put_fence(obj); 3840 if (ret) 3841 return ret; 3842 } 3843 3844 trace_i915_vma_unbind(vma); 3845 3846 vma->vm->unbind_vma(vma); 3847 vma->bound = 0; 3848 3849 list_del_init(&vma->mm_list); 3850 if (i915_is_ggtt(vma->vm)) { 3851 if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) { 3852 obj->map_and_fenceable = false; 3853 } else if (vma->ggtt_view.pages) { 3854 #ifdef __NetBSD__ 3855 panic("rotated/partial views can't happen"); 3856 #else 3857 sg_free_table(vma->ggtt_view.pages); 3858 kfree(vma->ggtt_view.pages); 3859 #endif 3860 } 3861 vma->ggtt_view.pages = NULL; 3862 } 3863 3864 drm_mm_remove_node(&vma->node); 3865 i915_gem_vma_destroy(vma); 3866 3867 /* Since the unbound list is global, only move to that list if 3868 * no more VMAs exist. */ 3869 if (list_empty(&obj->vma_list)) 3870 list_move_tail(&obj->global_list, &dev_priv->mm.unbound_list); 3871 3872 /* And finally now the object is completely decoupled from this vma, 3873 * we can drop its hold on the backing storage and allow it to be 3874 * reaped by the shrinker. 3875 */ 3876 i915_gem_object_unpin_pages(obj); 3877 3878 return 0; 3879 } 3880 3881 int i915_vma_unbind(struct i915_vma *vma) 3882 { 3883 return __i915_vma_unbind(vma, true); 3884 } 3885 3886 int __i915_vma_unbind_no_wait(struct i915_vma *vma) 3887 { 3888 return __i915_vma_unbind(vma, false); 3889 } 3890 3891 int i915_gpu_idle(struct drm_device *dev) 3892 { 3893 struct drm_i915_private *dev_priv = dev->dev_private; 3894 struct intel_engine_cs *ring; 3895 int ret, i; 3896 3897 /* Flush everything onto the inactive list. */ 3898 for_each_ring(ring, dev_priv, i) { 3899 if (!i915.enable_execlists) { 3900 struct drm_i915_gem_request *req; 3901 3902 ret = i915_gem_request_alloc(ring, ring->default_context, &req); 3903 if (ret) 3904 return ret; 3905 3906 ret = i915_switch_context(req); 3907 if (ret) { 3908 i915_gem_request_cancel(req); 3909 return ret; 3910 } 3911 3912 i915_add_request_no_flush(req); 3913 } 3914 3915 ret = intel_ring_idle(ring); 3916 if (ret) 3917 return ret; 3918 } 3919 3920 WARN_ON(i915_verify_lists(dev)); 3921 return 0; 3922 } 3923 3924 static bool i915_gem_valid_gtt_space(struct i915_vma *vma, 3925 unsigned long cache_level) 3926 { 3927 struct drm_mm_node *gtt_space = &vma->node; 3928 struct drm_mm_node *other; 3929 3930 /* 3931 * On some machines we have to be careful when putting differing types 3932 * of snoopable memory together to avoid the prefetcher crossing memory 3933 * domains and dying. During vm initialisation, we decide whether or not 3934 * these constraints apply and set the drm_mm.color_adjust 3935 * appropriately. 3936 */ 3937 if (vma->vm->mm.color_adjust == NULL) 3938 return true; 3939 3940 if (!drm_mm_node_allocated(gtt_space)) 3941 return true; 3942 3943 if (list_empty(>t_space->node_list)) 3944 return true; 3945 3946 other = list_entry(gtt_space->node_list.prev, struct drm_mm_node, node_list); 3947 if (other->allocated && !other->hole_follows && other->color != cache_level) 3948 return false; 3949 3950 other = list_entry(gtt_space->node_list.next, struct drm_mm_node, node_list); 3951 if (other->allocated && !gtt_space->hole_follows && other->color != cache_level) 3952 return false; 3953 3954 return true; 3955 } 3956 3957 /** 3958 * Finds free space in the GTT aperture and binds the object or a view of it 3959 * there. 3960 */ 3961 static struct i915_vma * 3962 i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj, 3963 struct i915_address_space *vm, 3964 const struct i915_ggtt_view *ggtt_view, 3965 unsigned alignment, 3966 uint64_t flags) 3967 { 3968 struct drm_device *dev = obj->base.dev; 3969 struct drm_i915_private *dev_priv = dev->dev_private; 3970 u32 fence_alignment, unfenced_alignment; 3971 u32 search_flag, alloc_flag; 3972 u64 start, end; 3973 u64 size, fence_size; 3974 struct i915_vma *vma; 3975 int ret; 3976 3977 if (i915_is_ggtt(vm)) { 3978 u32 view_size; 3979 3980 if (WARN_ON(!ggtt_view)) 3981 return ERR_PTR(-EINVAL); 3982 3983 view_size = i915_ggtt_view_size(obj, ggtt_view); 3984 3985 fence_size = i915_gem_get_gtt_size(dev, 3986 view_size, 3987 obj->tiling_mode); 3988 fence_alignment = i915_gem_get_gtt_alignment(dev, 3989 view_size, 3990 obj->tiling_mode, 3991 true); 3992 unfenced_alignment = i915_gem_get_gtt_alignment(dev, 3993 view_size, 3994 obj->tiling_mode, 3995 false); 3996 size = flags & PIN_MAPPABLE ? fence_size : view_size; 3997 } else { 3998 fence_size = i915_gem_get_gtt_size(dev, 3999 obj->base.size, 4000 obj->tiling_mode); 4001 fence_alignment = i915_gem_get_gtt_alignment(dev, 4002 obj->base.size, 4003 obj->tiling_mode, 4004 true); 4005 unfenced_alignment = 4006 i915_gem_get_gtt_alignment(dev, 4007 obj->base.size, 4008 obj->tiling_mode, 4009 false); 4010 size = flags & PIN_MAPPABLE ? fence_size : obj->base.size; 4011 } 4012 4013 start = flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0; 4014 end = vm->total; 4015 if (flags & PIN_MAPPABLE) 4016 end = min_t(u64, end, dev_priv->gtt.mappable_end); 4017 if (flags & PIN_ZONE_4G) 4018 end = min_t(u64, end, (1ULL << 32)); 4019 4020 if (alignment == 0) 4021 alignment = flags & PIN_MAPPABLE ? fence_alignment : 4022 unfenced_alignment; 4023 if (flags & PIN_MAPPABLE && alignment & (fence_alignment - 1)) { 4024 DRM_DEBUG("Invalid object (view type=%u) alignment requested %u\n", 4025 ggtt_view ? ggtt_view->type : 0, 4026 alignment); 4027 return ERR_PTR(-EINVAL); 4028 } 4029 4030 /* If binding the object/GGTT view requires more space than the entire 4031 * aperture has, reject it early before evicting everything in a vain 4032 * attempt to find space. 4033 */ 4034 if (size > end) { 4035 DRM_DEBUG("Attempting to bind an object (view type=%u) larger than the aperture: size=%"PRIx64" > %s aperture=%"PRIx64"\n", 4036 ggtt_view ? ggtt_view->type : 0, 4037 size, 4038 flags & PIN_MAPPABLE ? "mappable" : "total", 4039 end); 4040 return ERR_PTR(-E2BIG); 4041 } 4042 4043 ret = i915_gem_object_get_pages(obj); 4044 if (ret) 4045 return ERR_PTR(ret); 4046 4047 i915_gem_object_pin_pages(obj); 4048 4049 vma = ggtt_view ? i915_gem_obj_lookup_or_create_ggtt_vma(obj, ggtt_view) : 4050 i915_gem_obj_lookup_or_create_vma(obj, vm); 4051 4052 if (IS_ERR(vma)) 4053 goto err_unpin; 4054 4055 if (flags & PIN_HIGH) { 4056 search_flag = DRM_MM_SEARCH_BELOW; 4057 alloc_flag = DRM_MM_CREATE_TOP; 4058 } else { 4059 search_flag = DRM_MM_SEARCH_DEFAULT; 4060 alloc_flag = DRM_MM_CREATE_DEFAULT; 4061 } 4062 4063 search_free: 4064 ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node, 4065 size, alignment, 4066 obj->cache_level, 4067 start, end, 4068 search_flag, 4069 alloc_flag); 4070 if (ret) { 4071 ret = i915_gem_evict_something(dev, vm, size, alignment, 4072 obj->cache_level, 4073 start, end, 4074 flags); 4075 if (ret == 0) 4076 goto search_free; 4077 4078 goto err_free_vma; 4079 } 4080 if (WARN_ON(!i915_gem_valid_gtt_space(vma, obj->cache_level))) { 4081 ret = -EINVAL; 4082 goto err_remove_node; 4083 } 4084 4085 trace_i915_vma_bind(vma, flags); 4086 ret = i915_vma_bind(vma, obj->cache_level, flags); 4087 if (ret) 4088 goto err_remove_node; 4089 4090 list_move_tail(&obj->global_list, &dev_priv->mm.bound_list); 4091 list_add_tail(&vma->mm_list, &vm->inactive_list); 4092 4093 return vma; 4094 4095 err_remove_node: 4096 drm_mm_remove_node(&vma->node); 4097 err_free_vma: 4098 i915_gem_vma_destroy(vma); 4099 vma = ERR_PTR(ret); 4100 err_unpin: 4101 i915_gem_object_unpin_pages(obj); 4102 return vma; 4103 } 4104 4105 bool 4106 i915_gem_clflush_object(struct drm_i915_gem_object *obj, 4107 bool force) 4108 { 4109 /* If we don't have a page list set up, then we're not pinned 4110 * to GPU, and we can ignore the cache flush because it'll happen 4111 * again at bind time. 4112 */ 4113 if (obj->pages == NULL) 4114 return false; 4115 4116 /* 4117 * Stolen memory is always coherent with the GPU as it is explicitly 4118 * marked as wc by the system, or the system is cache-coherent. 4119 */ 4120 if (obj->stolen || obj->phys_handle) 4121 return false; 4122 4123 /* If the GPU is snooping the contents of the CPU cache, 4124 * we do not need to manually clear the CPU cache lines. However, 4125 * the caches are only snooped when the render cache is 4126 * flushed/invalidated. As we always have to emit invalidations 4127 * and flushes when moving into and out of the RENDER domain, correct 4128 * snooping behaviour occurs naturally as the result of our domain 4129 * tracking. 4130 */ 4131 if (!force && cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) { 4132 obj->cache_dirty = true; 4133 return false; 4134 } 4135 4136 trace_i915_gem_object_clflush(obj); 4137 #ifdef __NetBSD__ 4138 drm_clflush_pglist(&obj->pageq); 4139 #else 4140 drm_clflush_sg(obj->pages); 4141 #endif 4142 obj->cache_dirty = false; 4143 4144 return true; 4145 } 4146 4147 /** Flushes the GTT write domain for the object if it's dirty. */ 4148 static void 4149 i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj) 4150 { 4151 uint32_t old_write_domain; 4152 4153 if (obj->base.write_domain != I915_GEM_DOMAIN_GTT) 4154 return; 4155 4156 /* No actual flushing is required for the GTT write domain. Writes 4157 * to it immediately go to main memory as far as we know, so there's 4158 * no chipset flush. It also doesn't land in render cache. 4159 * 4160 * However, we do have to enforce the order so that all writes through 4161 * the GTT land before any writes to the device, such as updates to 4162 * the GATT itself. 4163 */ 4164 wmb(); 4165 4166 old_write_domain = obj->base.write_domain; 4167 obj->base.write_domain = 0; 4168 4169 intel_fb_obj_flush(obj, false, ORIGIN_GTT); 4170 4171 trace_i915_gem_object_change_domain(obj, 4172 obj->base.read_domains, 4173 old_write_domain); 4174 } 4175 4176 /** Flushes the CPU write domain for the object if it's dirty. */ 4177 static void 4178 i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj) 4179 { 4180 uint32_t old_write_domain; 4181 4182 if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) 4183 return; 4184 4185 if (i915_gem_clflush_object(obj, obj->pin_display)) 4186 i915_gem_chipset_flush(obj->base.dev); 4187 4188 old_write_domain = obj->base.write_domain; 4189 obj->base.write_domain = 0; 4190 4191 intel_fb_obj_flush(obj, false, ORIGIN_CPU); 4192 4193 trace_i915_gem_object_change_domain(obj, 4194 obj->base.read_domains, 4195 old_write_domain); 4196 } 4197 4198 /** 4199 * Moves a single object to the GTT read, and possibly write domain. 4200 * 4201 * This function returns when the move is complete, including waiting on 4202 * flushes to occur. 4203 */ 4204 int 4205 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) 4206 { 4207 uint32_t old_write_domain, old_read_domains; 4208 struct i915_vma *vma; 4209 int ret; 4210 4211 if (obj->base.write_domain == I915_GEM_DOMAIN_GTT) 4212 return 0; 4213 4214 ret = i915_gem_object_wait_rendering(obj, !write); 4215 if (ret) 4216 return ret; 4217 4218 /* Flush and acquire obj->pages so that we are coherent through 4219 * direct access in memory with previous cached writes through 4220 * shmemfs and that our cache domain tracking remains valid. 4221 * For example, if the obj->filp was moved to swap without us 4222 * being notified and releasing the pages, we would mistakenly 4223 * continue to assume that the obj remained out of the CPU cached 4224 * domain. 4225 */ 4226 ret = i915_gem_object_get_pages(obj); 4227 if (ret) 4228 return ret; 4229 4230 i915_gem_object_flush_cpu_write_domain(obj); 4231 4232 /* Serialise direct access to this object with the barriers for 4233 * coherent writes from the GPU, by effectively invalidating the 4234 * GTT domain upon first access. 4235 */ 4236 if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0) 4237 mb(); 4238 4239 old_write_domain = obj->base.write_domain; 4240 old_read_domains = obj->base.read_domains; 4241 4242 /* It should now be out of any other write domains, and we can update 4243 * the domain values for our changes. 4244 */ 4245 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_GTT) != 0); 4246 obj->base.read_domains |= I915_GEM_DOMAIN_GTT; 4247 if (write) { 4248 obj->base.read_domains = I915_GEM_DOMAIN_GTT; 4249 obj->base.write_domain = I915_GEM_DOMAIN_GTT; 4250 obj->dirty = 1; 4251 } 4252 4253 trace_i915_gem_object_change_domain(obj, 4254 old_read_domains, 4255 old_write_domain); 4256 4257 /* And bump the LRU for this access */ 4258 vma = i915_gem_obj_to_ggtt(obj); 4259 if (vma && drm_mm_node_allocated(&vma->node) && !obj->active) 4260 list_move_tail(&vma->mm_list, 4261 &to_i915(obj->base.dev)->gtt.base.inactive_list); 4262 4263 return 0; 4264 } 4265 4266 /** 4267 * Changes the cache-level of an object across all VMA. 4268 * 4269 * After this function returns, the object will be in the new cache-level 4270 * across all GTT and the contents of the backing storage will be coherent, 4271 * with respect to the new cache-level. In order to keep the backing storage 4272 * coherent for all users, we only allow a single cache level to be set 4273 * globally on the object and prevent it from being changed whilst the 4274 * hardware is reading from the object. That is if the object is currently 4275 * on the scanout it will be set to uncached (or equivalent display 4276 * cache coherency) and all non-MOCS GPU access will also be uncached so 4277 * that all direct access to the scanout remains coherent. 4278 */ 4279 int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, 4280 enum i915_cache_level cache_level) 4281 { 4282 struct drm_device *dev = obj->base.dev; 4283 struct i915_vma *vma, *next; 4284 bool bound = false; 4285 int ret = 0; 4286 4287 if (obj->cache_level == cache_level) 4288 goto out; 4289 4290 /* Inspect the list of currently bound VMA and unbind any that would 4291 * be invalid given the new cache-level. This is principally to 4292 * catch the issue of the CS prefetch crossing page boundaries and 4293 * reading an invalid PTE on older architectures. 4294 */ 4295 list_for_each_entry_safe(vma, next, &obj->vma_list, vma_link) { 4296 if (!drm_mm_node_allocated(&vma->node)) 4297 continue; 4298 4299 if (vma->pin_count) { 4300 DRM_DEBUG("can not change the cache level of pinned objects\n"); 4301 return -EBUSY; 4302 } 4303 4304 if (!i915_gem_valid_gtt_space(vma, cache_level)) { 4305 ret = i915_vma_unbind(vma); 4306 if (ret) 4307 return ret; 4308 } else 4309 bound = true; 4310 } 4311 4312 /* We can reuse the existing drm_mm nodes but need to change the 4313 * cache-level on the PTE. We could simply unbind them all and 4314 * rebind with the correct cache-level on next use. However since 4315 * we already have a valid slot, dma mapping, pages etc, we may as 4316 * rewrite the PTE in the belief that doing so tramples upon less 4317 * state and so involves less work. 4318 */ 4319 if (bound) { 4320 /* Before we change the PTE, the GPU must not be accessing it. 4321 * If we wait upon the object, we know that all the bound 4322 * VMA are no longer active. 4323 */ 4324 ret = i915_gem_object_wait_rendering(obj, false); 4325 if (ret) 4326 return ret; 4327 4328 if (!HAS_LLC(dev) && cache_level != I915_CACHE_NONE) { 4329 /* Access to snoopable pages through the GTT is 4330 * incoherent and on some machines causes a hard 4331 * lockup. Relinquish the CPU mmaping to force 4332 * userspace to refault in the pages and we can 4333 * then double check if the GTT mapping is still 4334 * valid for that pointer access. 4335 */ 4336 i915_gem_release_mmap(obj); 4337 4338 /* As we no longer need a fence for GTT access, 4339 * we can relinquish it now (and so prevent having 4340 * to steal a fence from someone else on the next 4341 * fence request). Note GPU activity would have 4342 * dropped the fence as all snoopable access is 4343 * supposed to be linear. 4344 */ 4345 ret = i915_gem_object_put_fence(obj); 4346 if (ret) 4347 return ret; 4348 } else { 4349 /* We either have incoherent backing store and 4350 * so no GTT access or the architecture is fully 4351 * coherent. In such cases, existing GTT mmaps 4352 * ignore the cache bit in the PTE and we can 4353 * rewrite it without confusing the GPU or having 4354 * to force userspace to fault back in its mmaps. 4355 */ 4356 } 4357 4358 list_for_each_entry(vma, &obj->vma_list, vma_link) { 4359 if (!drm_mm_node_allocated(&vma->node)) 4360 continue; 4361 4362 ret = i915_vma_bind(vma, cache_level, PIN_UPDATE); 4363 if (ret) 4364 return ret; 4365 } 4366 } 4367 4368 list_for_each_entry(vma, &obj->vma_list, vma_link) 4369 vma->node.color = cache_level; 4370 obj->cache_level = cache_level; 4371 4372 out: 4373 /* Flush the dirty CPU caches to the backing storage so that the 4374 * object is now coherent at its new cache level (with respect 4375 * to the access domain). 4376 */ 4377 if (obj->cache_dirty && 4378 obj->base.write_domain != I915_GEM_DOMAIN_CPU && 4379 cpu_write_needs_clflush(obj)) { 4380 if (i915_gem_clflush_object(obj, true)) 4381 i915_gem_chipset_flush(obj->base.dev); 4382 } 4383 4384 return 0; 4385 } 4386 4387 int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data, 4388 struct drm_file *file) 4389 { 4390 struct drm_i915_gem_caching *args = data; 4391 struct drm_gem_object *gobj; 4392 struct drm_i915_gem_object *obj; 4393 4394 gobj = drm_gem_object_lookup(dev, file, args->handle); 4395 if (gobj == NULL) 4396 return -ENOENT; 4397 obj = to_intel_bo(gobj); 4398 4399 switch (obj->cache_level) { 4400 case I915_CACHE_LLC: 4401 case I915_CACHE_L3_LLC: 4402 args->caching = I915_CACHING_CACHED; 4403 break; 4404 4405 case I915_CACHE_WT: 4406 args->caching = I915_CACHING_DISPLAY; 4407 break; 4408 4409 default: 4410 args->caching = I915_CACHING_NONE; 4411 break; 4412 } 4413 4414 drm_gem_object_unreference_unlocked(&obj->base); 4415 return 0; 4416 } 4417 4418 int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data, 4419 struct drm_file *file) 4420 { 4421 struct drm_i915_private *dev_priv = dev->dev_private; 4422 struct drm_i915_gem_caching *args = data; 4423 struct drm_gem_object *gobj; 4424 struct drm_i915_gem_object *obj; 4425 enum i915_cache_level level; 4426 int ret; 4427 4428 switch (args->caching) { 4429 case I915_CACHING_NONE: 4430 level = I915_CACHE_NONE; 4431 break; 4432 case I915_CACHING_CACHED: 4433 /* 4434 * Due to a HW issue on BXT A stepping, GPU stores via a 4435 * snooped mapping may leave stale data in a corresponding CPU 4436 * cacheline, whereas normally such cachelines would get 4437 * invalidated. 4438 */ 4439 if (IS_BROXTON(dev) && INTEL_REVID(dev) < BXT_REVID_B0) 4440 return -ENODEV; 4441 4442 level = I915_CACHE_LLC; 4443 break; 4444 case I915_CACHING_DISPLAY: 4445 level = HAS_WT(dev) ? I915_CACHE_WT : I915_CACHE_NONE; 4446 break; 4447 default: 4448 return -EINVAL; 4449 } 4450 4451 intel_runtime_pm_get(dev_priv); 4452 4453 ret = i915_mutex_lock_interruptible(dev); 4454 if (ret) 4455 goto rpm_put; 4456 4457 gobj = drm_gem_object_lookup(dev, file, args->handle); 4458 if (gobj == NULL) { 4459 ret = -ENOENT; 4460 goto unlock; 4461 } 4462 obj = to_intel_bo(gobj); 4463 4464 ret = i915_gem_object_set_cache_level(obj, level); 4465 4466 drm_gem_object_unreference(&obj->base); 4467 unlock: 4468 mutex_unlock(&dev->struct_mutex); 4469 rpm_put: 4470 intel_runtime_pm_put(dev_priv); 4471 4472 return ret; 4473 } 4474 4475 /* 4476 * Prepare buffer for display plane (scanout, cursors, etc). 4477 * Can be called from an uninterruptible phase (modesetting) and allows 4478 * any flushes to be pipelined (for pageflips). 4479 */ 4480 int 4481 i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, 4482 u32 alignment, 4483 struct intel_engine_cs *pipelined, 4484 struct drm_i915_gem_request **pipelined_request, 4485 const struct i915_ggtt_view *view) 4486 { 4487 u32 old_read_domains, old_write_domain; 4488 int ret; 4489 4490 ret = i915_gem_object_sync(obj, pipelined, pipelined_request); 4491 if (ret) 4492 return ret; 4493 4494 /* Mark the pin_display early so that we account for the 4495 * display coherency whilst setting up the cache domains. 4496 */ 4497 obj->pin_display++; 4498 4499 /* The display engine is not coherent with the LLC cache on gen6. As 4500 * a result, we make sure that the pinning that is about to occur is 4501 * done with uncached PTEs. This is lowest common denominator for all 4502 * chipsets. 4503 * 4504 * However for gen6+, we could do better by using the GFDT bit instead 4505 * of uncaching, which would allow us to flush all the LLC-cached data 4506 * with that bit in the PTE to main memory with just one PIPE_CONTROL. 4507 */ 4508 ret = i915_gem_object_set_cache_level(obj, 4509 HAS_WT(obj->base.dev) ? I915_CACHE_WT : I915_CACHE_NONE); 4510 if (ret) 4511 goto err_unpin_display; 4512 4513 /* As the user may map the buffer once pinned in the display plane 4514 * (e.g. libkms for the bootup splash), we have to ensure that we 4515 * always use map_and_fenceable for all scanout buffers. 4516 */ 4517 ret = i915_gem_object_ggtt_pin(obj, view, alignment, 4518 view->type == I915_GGTT_VIEW_NORMAL ? 4519 PIN_MAPPABLE : 0); 4520 if (ret) 4521 goto err_unpin_display; 4522 4523 i915_gem_object_flush_cpu_write_domain(obj); 4524 4525 old_write_domain = obj->base.write_domain; 4526 old_read_domains = obj->base.read_domains; 4527 4528 /* It should now be out of any other write domains, and we can update 4529 * the domain values for our changes. 4530 */ 4531 obj->base.write_domain = 0; 4532 obj->base.read_domains |= I915_GEM_DOMAIN_GTT; 4533 4534 trace_i915_gem_object_change_domain(obj, 4535 old_read_domains, 4536 old_write_domain); 4537 4538 return 0; 4539 4540 err_unpin_display: 4541 obj->pin_display--; 4542 return ret; 4543 } 4544 4545 void 4546 i915_gem_object_unpin_from_display_plane(struct drm_i915_gem_object *obj, 4547 const struct i915_ggtt_view *view) 4548 { 4549 if (WARN_ON(obj->pin_display == 0)) 4550 return; 4551 4552 i915_gem_object_ggtt_unpin_view(obj, view); 4553 4554 obj->pin_display--; 4555 } 4556 4557 /** 4558 * Moves a single object to the CPU read, and possibly write domain. 4559 * 4560 * This function returns when the move is complete, including waiting on 4561 * flushes to occur. 4562 */ 4563 int 4564 i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) 4565 { 4566 uint32_t old_write_domain, old_read_domains; 4567 int ret; 4568 4569 if (obj->base.write_domain == I915_GEM_DOMAIN_CPU) 4570 return 0; 4571 4572 ret = i915_gem_object_wait_rendering(obj, !write); 4573 if (ret) 4574 return ret; 4575 4576 i915_gem_object_flush_gtt_write_domain(obj); 4577 4578 old_write_domain = obj->base.write_domain; 4579 old_read_domains = obj->base.read_domains; 4580 4581 /* Flush the CPU cache if it's still invalid. */ 4582 if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) { 4583 i915_gem_clflush_object(obj, false); 4584 4585 obj->base.read_domains |= I915_GEM_DOMAIN_CPU; 4586 } 4587 4588 /* It should now be out of any other write domains, and we can update 4589 * the domain values for our changes. 4590 */ 4591 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_CPU) != 0); 4592 4593 /* If we're writing through the CPU, then the GPU read domains will 4594 * need to be invalidated at next use. 4595 */ 4596 if (write) { 4597 obj->base.read_domains = I915_GEM_DOMAIN_CPU; 4598 obj->base.write_domain = I915_GEM_DOMAIN_CPU; 4599 } 4600 4601 trace_i915_gem_object_change_domain(obj, 4602 old_read_domains, 4603 old_write_domain); 4604 4605 return 0; 4606 } 4607 4608 /* Throttle our rendering by waiting until the ring has completed our requests 4609 * emitted over 20 msec ago. 4610 * 4611 * Note that if we were to use the current jiffies each time around the loop, 4612 * we wouldn't escape the function with any frames outstanding if the time to 4613 * render a frame was over 20ms. 4614 * 4615 * This should get us reasonable parallelism between CPU and GPU but also 4616 * relatively low latency when blocking on a particular request to finish. 4617 */ 4618 static int 4619 i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file) 4620 { 4621 struct drm_i915_private *dev_priv = dev->dev_private; 4622 struct drm_i915_file_private *file_priv = file->driver_priv; 4623 unsigned long recent_enough = jiffies - DRM_I915_THROTTLE_JIFFIES; 4624 struct drm_i915_gem_request *request, *target = NULL; 4625 unsigned reset_counter; 4626 int ret; 4627 4628 ret = i915_gem_wait_for_error(&dev_priv->gpu_error); 4629 if (ret) 4630 return ret; 4631 4632 ret = i915_gem_check_wedge(&dev_priv->gpu_error, false); 4633 if (ret) 4634 return ret; 4635 4636 spin_lock(&file_priv->mm.lock); 4637 list_for_each_entry(request, &file_priv->mm.request_list, client_list) { 4638 if (time_after_eq(request->emitted_jiffies, recent_enough)) 4639 break; 4640 4641 /* 4642 * Note that the request might not have been submitted yet. 4643 * In which case emitted_jiffies will be zero. 4644 */ 4645 if (!request->emitted_jiffies) 4646 continue; 4647 4648 target = request; 4649 } 4650 reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter); 4651 if (target) 4652 i915_gem_request_reference(target); 4653 spin_unlock(&file_priv->mm.lock); 4654 4655 if (target == NULL) 4656 return 0; 4657 4658 ret = __i915_wait_request(target, reset_counter, true, NULL, NULL); 4659 if (ret == 0) 4660 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 0); 4661 4662 i915_gem_request_unreference__unlocked(target); 4663 4664 return ret; 4665 } 4666 4667 static bool 4668 i915_vma_misplaced(struct i915_vma *vma, uint32_t alignment, uint64_t flags) 4669 { 4670 struct drm_i915_gem_object *obj = vma->obj; 4671 4672 if (alignment && 4673 vma->node.start & (alignment - 1)) 4674 return true; 4675 4676 if (flags & PIN_MAPPABLE && !obj->map_and_fenceable) 4677 return true; 4678 4679 if (flags & PIN_OFFSET_BIAS && 4680 vma->node.start < (flags & PIN_OFFSET_MASK)) 4681 return true; 4682 4683 return false; 4684 } 4685 4686 void __i915_vma_set_map_and_fenceable(struct i915_vma *vma) 4687 { 4688 struct drm_i915_gem_object *obj = vma->obj; 4689 bool mappable, fenceable; 4690 u32 fence_size, fence_alignment; 4691 4692 fence_size = i915_gem_get_gtt_size(obj->base.dev, 4693 obj->base.size, 4694 obj->tiling_mode); 4695 fence_alignment = i915_gem_get_gtt_alignment(obj->base.dev, 4696 obj->base.size, 4697 obj->tiling_mode, 4698 true); 4699 4700 fenceable = (vma->node.size == fence_size && 4701 (vma->node.start & (fence_alignment - 1)) == 0); 4702 4703 mappable = (vma->node.start + fence_size <= 4704 to_i915(obj->base.dev)->gtt.mappable_end); 4705 4706 obj->map_and_fenceable = mappable && fenceable; 4707 } 4708 4709 static int 4710 i915_gem_object_do_pin(struct drm_i915_gem_object *obj, 4711 struct i915_address_space *vm, 4712 const struct i915_ggtt_view *ggtt_view, 4713 uint32_t alignment, 4714 uint64_t flags) 4715 { 4716 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 4717 struct i915_vma *vma; 4718 unsigned bound; 4719 int ret; 4720 4721 if (WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base)) 4722 return -ENODEV; 4723 4724 if (WARN_ON(flags & (PIN_GLOBAL | PIN_MAPPABLE) && !i915_is_ggtt(vm))) 4725 return -EINVAL; 4726 4727 if (WARN_ON((flags & (PIN_MAPPABLE | PIN_GLOBAL)) == PIN_MAPPABLE)) 4728 return -EINVAL; 4729 4730 if (WARN_ON(i915_is_ggtt(vm) != !!ggtt_view)) 4731 return -EINVAL; 4732 4733 vma = ggtt_view ? i915_gem_obj_to_ggtt_view(obj, ggtt_view) : 4734 i915_gem_obj_to_vma(obj, vm); 4735 4736 if (IS_ERR(vma)) 4737 return PTR_ERR(vma); 4738 4739 if (vma) { 4740 if (WARN_ON(vma->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT)) 4741 return -EBUSY; 4742 4743 if (i915_vma_misplaced(vma, alignment, flags)) { 4744 WARN(vma->pin_count, 4745 "bo is already pinned in %s with incorrect alignment:" 4746 " offset=%08x %08x, req.alignment=%x, req.map_and_fenceable=%d," 4747 " obj->map_and_fenceable=%d\n", 4748 ggtt_view ? "ggtt" : "ppgtt", 4749 upper_32_bits(vma->node.start), 4750 lower_32_bits(vma->node.start), 4751 alignment, 4752 !!(flags & PIN_MAPPABLE), 4753 obj->map_and_fenceable); 4754 ret = i915_vma_unbind(vma); 4755 if (ret) 4756 return ret; 4757 4758 vma = NULL; 4759 } 4760 } 4761 4762 bound = vma ? vma->bound : 0; 4763 if (vma == NULL || !drm_mm_node_allocated(&vma->node)) { 4764 vma = i915_gem_object_bind_to_vm(obj, vm, ggtt_view, alignment, 4765 flags); 4766 if (IS_ERR(vma)) 4767 return PTR_ERR(vma); 4768 } else { 4769 ret = i915_vma_bind(vma, obj->cache_level, flags); 4770 if (ret) 4771 return ret; 4772 } 4773 4774 if (ggtt_view && ggtt_view->type == I915_GGTT_VIEW_NORMAL && 4775 (bound ^ vma->bound) & GLOBAL_BIND) { 4776 __i915_vma_set_map_and_fenceable(vma); 4777 WARN_ON(flags & PIN_MAPPABLE && !obj->map_and_fenceable); 4778 } 4779 4780 vma->pin_count++; 4781 return 0; 4782 } 4783 4784 int 4785 i915_gem_object_pin(struct drm_i915_gem_object *obj, 4786 struct i915_address_space *vm, 4787 uint32_t alignment, 4788 uint64_t flags) 4789 { 4790 return i915_gem_object_do_pin(obj, vm, 4791 i915_is_ggtt(vm) ? &i915_ggtt_view_normal : NULL, 4792 alignment, flags); 4793 } 4794 4795 int 4796 i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, 4797 const struct i915_ggtt_view *view, 4798 uint32_t alignment, 4799 uint64_t flags) 4800 { 4801 if (WARN_ONCE(!view, "no view specified")) 4802 return -EINVAL; 4803 4804 return i915_gem_object_do_pin(obj, i915_obj_to_ggtt(obj), view, 4805 alignment, flags | PIN_GLOBAL); 4806 } 4807 4808 void 4809 i915_gem_object_ggtt_unpin_view(struct drm_i915_gem_object *obj, 4810 const struct i915_ggtt_view *view) 4811 { 4812 struct i915_vma *vma = i915_gem_obj_to_ggtt_view(obj, view); 4813 4814 BUG_ON(!vma); 4815 WARN_ON(vma->pin_count == 0); 4816 WARN_ON(!i915_gem_obj_ggtt_bound_view(obj, view)); 4817 4818 --vma->pin_count; 4819 } 4820 4821 int 4822 i915_gem_busy_ioctl(struct drm_device *dev, void *data, 4823 struct drm_file *file) 4824 { 4825 struct drm_i915_gem_busy *args = data; 4826 struct drm_gem_object *gobj; 4827 struct drm_i915_gem_object *obj; 4828 int ret; 4829 4830 ret = i915_mutex_lock_interruptible(dev); 4831 if (ret) 4832 return ret; 4833 4834 gobj = drm_gem_object_lookup(dev, file, args->handle); 4835 if (gobj == NULL) { 4836 ret = -ENOENT; 4837 goto unlock; 4838 } 4839 obj = to_intel_bo(gobj); 4840 4841 /* Count all active objects as busy, even if they are currently not used 4842 * by the gpu. Users of this interface expect objects to eventually 4843 * become non-busy without any further actions, therefore emit any 4844 * necessary flushes here. 4845 */ 4846 ret = i915_gem_object_flush_active(obj); 4847 if (ret) 4848 goto unref; 4849 4850 BUILD_BUG_ON(I915_NUM_RINGS > 16); 4851 args->busy = obj->active << 16; 4852 if (obj->last_write_req) 4853 args->busy |= obj->last_write_req->ring->id; 4854 4855 unref: 4856 drm_gem_object_unreference(&obj->base); 4857 unlock: 4858 mutex_unlock(&dev->struct_mutex); 4859 return ret; 4860 } 4861 4862 int 4863 i915_gem_throttle_ioctl(struct drm_device *dev, void *data, 4864 struct drm_file *file_priv) 4865 { 4866 return i915_gem_ring_throttle(dev, file_priv); 4867 } 4868 4869 int 4870 i915_gem_madvise_ioctl(struct drm_device *dev, void *data, 4871 struct drm_file *file_priv) 4872 { 4873 struct drm_i915_private *dev_priv = dev->dev_private; 4874 struct drm_i915_gem_madvise *args = data; 4875 struct drm_gem_object *gobj; 4876 struct drm_i915_gem_object *obj; 4877 int ret; 4878 4879 switch (args->madv) { 4880 case I915_MADV_DONTNEED: 4881 case I915_MADV_WILLNEED: 4882 break; 4883 default: 4884 return -EINVAL; 4885 } 4886 4887 ret = i915_mutex_lock_interruptible(dev); 4888 if (ret) 4889 return ret; 4890 4891 gobj = drm_gem_object_lookup(dev, file_priv, args->handle); 4892 if (gobj == NULL) { 4893 ret = -ENOENT; 4894 goto unlock; 4895 } 4896 obj = to_intel_bo(gobj); 4897 4898 if (i915_gem_obj_is_pinned(obj)) { 4899 ret = -EINVAL; 4900 goto out; 4901 } 4902 4903 if (obj->pages && 4904 obj->tiling_mode != I915_TILING_NONE && 4905 dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) { 4906 if (obj->madv == I915_MADV_WILLNEED) 4907 i915_gem_object_unpin_pages(obj); 4908 if (args->madv == I915_MADV_WILLNEED) 4909 i915_gem_object_pin_pages(obj); 4910 } 4911 4912 if (obj->madv != __I915_MADV_PURGED) 4913 obj->madv = args->madv; 4914 4915 /* if the object is no longer attached, discard its backing storage */ 4916 if (obj->madv == I915_MADV_DONTNEED && obj->pages == NULL) 4917 i915_gem_object_truncate(obj); 4918 4919 args->retained = obj->madv != __I915_MADV_PURGED; 4920 4921 out: 4922 drm_gem_object_unreference(&obj->base); 4923 unlock: 4924 mutex_unlock(&dev->struct_mutex); 4925 return ret; 4926 } 4927 4928 void i915_gem_object_init(struct drm_i915_gem_object *obj, 4929 const struct drm_i915_gem_object_ops *ops) 4930 { 4931 int i; 4932 4933 INIT_LIST_HEAD(&obj->global_list); 4934 for (i = 0; i < I915_NUM_RINGS; i++) 4935 INIT_LIST_HEAD(&obj->ring_list[i]); 4936 INIT_LIST_HEAD(&obj->obj_exec_link); 4937 INIT_LIST_HEAD(&obj->vma_list); 4938 INIT_LIST_HEAD(&obj->batch_pool_link); 4939 4940 obj->ops = ops; 4941 4942 obj->fence_reg = I915_FENCE_REG_NONE; 4943 obj->madv = I915_MADV_WILLNEED; 4944 4945 i915_gem_info_add_obj(obj->base.dev->dev_private, obj->base.size); 4946 } 4947 4948 static const struct drm_i915_gem_object_ops i915_gem_object_ops = { 4949 .get_pages = i915_gem_object_get_pages_gtt, 4950 .put_pages = i915_gem_object_put_pages_gtt, 4951 }; 4952 4953 struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev, 4954 size_t size) 4955 { 4956 #ifdef __NetBSD__ 4957 struct drm_i915_private *const dev_priv = dev->dev_private; 4958 #endif 4959 struct drm_i915_gem_object *obj; 4960 #ifndef __NetBSD__ 4961 struct address_space *mapping; 4962 gfp_t mask; 4963 #endif 4964 4965 obj = i915_gem_object_alloc(dev); 4966 if (obj == NULL) 4967 return NULL; 4968 4969 if (drm_gem_object_init(dev, &obj->base, size) != 0) { 4970 i915_gem_object_free(obj); 4971 return NULL; 4972 } 4973 4974 #ifdef __NetBSD__ 4975 uao_set_pgfl(obj->base.filp, dev_priv->gtt.pgfl); 4976 #else 4977 mask = GFP_HIGHUSER | __GFP_RECLAIMABLE; 4978 if (IS_CRESTLINE(dev) || IS_BROADWATER(dev)) { 4979 /* 965gm cannot relocate objects above 4GiB. */ 4980 mask &= ~__GFP_HIGHMEM; 4981 mask |= __GFP_DMA32; 4982 } 4983 4984 mapping = file_inode(obj->base.filp)->i_mapping; 4985 mapping_set_gfp_mask(mapping, mask); 4986 #endif 4987 4988 i915_gem_object_init(obj, &i915_gem_object_ops); 4989 4990 obj->base.write_domain = I915_GEM_DOMAIN_CPU; 4991 obj->base.read_domains = I915_GEM_DOMAIN_CPU; 4992 4993 if (HAS_LLC(dev)) { 4994 /* On some devices, we can have the GPU use the LLC (the CPU 4995 * cache) for about a 10% performance improvement 4996 * compared to uncached. Graphics requests other than 4997 * display scanout are coherent with the CPU in 4998 * accessing this cache. This means in this mode we 4999 * don't need to clflush on the CPU side, and on the 5000 * GPU side we only need to flush internal caches to 5001 * get data visible to the CPU. 5002 * 5003 * However, we maintain the display planes as UC, and so 5004 * need to rebind when first used as such. 5005 */ 5006 obj->cache_level = I915_CACHE_LLC; 5007 } else 5008 obj->cache_level = I915_CACHE_NONE; 5009 5010 trace_i915_gem_object_create(obj); 5011 5012 return obj; 5013 } 5014 5015 static bool discard_backing_storage(struct drm_i915_gem_object *obj) 5016 { 5017 /* If we are the last user of the backing storage (be it shmemfs 5018 * pages or stolen etc), we know that the pages are going to be 5019 * immediately released. In this case, we can then skip copying 5020 * back the contents from the GPU. 5021 */ 5022 5023 if (obj->madv != I915_MADV_WILLNEED) 5024 return false; 5025 5026 if (obj->base.filp == NULL) 5027 return true; 5028 5029 /* At first glance, this looks racy, but then again so would be 5030 * userspace racing mmap against close. However, the first external 5031 * reference to the filp can only be obtained through the 5032 * i915_gem_mmap_ioctl() which safeguards us against the user 5033 * acquiring such a reference whilst we are in the middle of 5034 * freeing the object. 5035 */ 5036 #ifdef __NetBSD__ 5037 /* XXX This number might be a fencepost. */ 5038 return obj->base.filp->uo_refs == 1; 5039 #else 5040 return atomic_long_read(&obj->base.filp->f_count) == 1; 5041 #endif 5042 } 5043 5044 void i915_gem_free_object(struct drm_gem_object *gem_obj) 5045 { 5046 struct drm_i915_gem_object *obj = to_intel_bo(gem_obj); 5047 struct drm_device *dev = obj->base.dev; 5048 struct drm_i915_private *dev_priv = dev->dev_private; 5049 struct i915_vma *vma, *next; 5050 5051 intel_runtime_pm_get(dev_priv); 5052 5053 trace_i915_gem_object_destroy(obj); 5054 5055 list_for_each_entry_safe(vma, next, &obj->vma_list, vma_link) { 5056 int ret; 5057 5058 vma->pin_count = 0; 5059 ret = i915_vma_unbind(vma); 5060 if (WARN_ON(ret == -ERESTARTSYS)) { 5061 bool was_interruptible; 5062 5063 was_interruptible = dev_priv->mm.interruptible; 5064 dev_priv->mm.interruptible = false; 5065 5066 WARN_ON(i915_vma_unbind(vma)); 5067 5068 dev_priv->mm.interruptible = was_interruptible; 5069 } 5070 } 5071 5072 /* Stolen objects don't hold a ref, but do hold pin count. Fix that up 5073 * before progressing. */ 5074 if (obj->stolen) 5075 i915_gem_object_unpin_pages(obj); 5076 5077 WARN_ON(obj->frontbuffer_bits); 5078 5079 if (obj->pages && obj->madv == I915_MADV_WILLNEED && 5080 dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES && 5081 obj->tiling_mode != I915_TILING_NONE) 5082 i915_gem_object_unpin_pages(obj); 5083 5084 if (WARN_ON(obj->pages_pin_count)) 5085 obj->pages_pin_count = 0; 5086 if (discard_backing_storage(obj)) 5087 obj->madv = I915_MADV_DONTNEED; 5088 i915_gem_object_put_pages(obj); 5089 i915_gem_object_free_mmap_offset(obj); 5090 5091 BUG_ON(obj->pages); 5092 5093 if (obj->base.import_attach) 5094 drm_prime_gem_destroy(&obj->base, NULL); 5095 5096 if (obj->ops->release) 5097 obj->ops->release(obj); 5098 5099 drm_gem_object_release(&obj->base); 5100 i915_gem_info_remove_obj(dev_priv, obj->base.size); 5101 5102 kfree(obj->bit_17); 5103 i915_gem_object_free(obj); 5104 5105 intel_runtime_pm_put(dev_priv); 5106 } 5107 5108 struct i915_vma *i915_gem_obj_to_vma(struct drm_i915_gem_object *obj, 5109 struct i915_address_space *vm) 5110 { 5111 struct i915_vma *vma; 5112 list_for_each_entry(vma, &obj->vma_list, vma_link) { 5113 if (i915_is_ggtt(vma->vm) && 5114 vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL) 5115 continue; 5116 if (vma->vm == vm) 5117 return vma; 5118 } 5119 return NULL; 5120 } 5121 5122 struct i915_vma *i915_gem_obj_to_ggtt_view(struct drm_i915_gem_object *obj, 5123 const struct i915_ggtt_view *view) 5124 { 5125 struct i915_address_space *ggtt = i915_obj_to_ggtt(obj); 5126 struct i915_vma *vma; 5127 5128 if (WARN_ONCE(!view, "no view specified")) 5129 return ERR_PTR(-EINVAL); 5130 5131 list_for_each_entry(vma, &obj->vma_list, vma_link) 5132 if (vma->vm == ggtt && 5133 i915_ggtt_view_equal(&vma->ggtt_view, view)) 5134 return vma; 5135 return NULL; 5136 } 5137 5138 void i915_gem_vma_destroy(struct i915_vma *vma) 5139 { 5140 struct i915_address_space *vm = NULL; 5141 WARN_ON(vma->node.allocated); 5142 5143 /* Keep the vma as a placeholder in the execbuffer reservation lists */ 5144 if (!list_empty(&vma->exec_list)) 5145 return; 5146 5147 vm = vma->vm; 5148 5149 if (!i915_is_ggtt(vm)) 5150 i915_ppgtt_put(i915_vm_to_ppgtt(vm)); 5151 5152 list_del(&vma->vma_link); 5153 5154 kmem_cache_free(to_i915(vma->obj->base.dev)->vmas, vma); 5155 } 5156 5157 static void 5158 i915_gem_stop_ringbuffers(struct drm_device *dev) 5159 { 5160 struct drm_i915_private *dev_priv = dev->dev_private; 5161 struct intel_engine_cs *ring; 5162 int i; 5163 5164 for_each_ring(ring, dev_priv, i) 5165 dev_priv->gt.stop_ring(ring); 5166 } 5167 5168 int 5169 i915_gem_suspend(struct drm_device *dev) 5170 { 5171 struct drm_i915_private *dev_priv = dev->dev_private; 5172 int ret = 0; 5173 5174 mutex_lock(&dev->struct_mutex); 5175 ret = i915_gpu_idle(dev); 5176 if (ret) 5177 goto err; 5178 5179 i915_gem_retire_requests(dev); 5180 5181 i915_gem_stop_ringbuffers(dev); 5182 mutex_unlock(&dev->struct_mutex); 5183 5184 cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work); 5185 cancel_delayed_work_sync(&dev_priv->mm.retire_work); 5186 flush_delayed_work(&dev_priv->mm.idle_work); 5187 5188 /* Assert that we sucessfully flushed all the work and 5189 * reset the GPU back to its idle, low power state. 5190 */ 5191 WARN_ON(dev_priv->mm.busy); 5192 5193 return 0; 5194 5195 err: 5196 mutex_unlock(&dev->struct_mutex); 5197 return ret; 5198 } 5199 5200 int i915_gem_l3_remap(struct drm_i915_gem_request *req, int slice) 5201 { 5202 struct intel_engine_cs *ring = req->ring; 5203 struct drm_device *dev = ring->dev; 5204 struct drm_i915_private *dev_priv = dev->dev_private; 5205 u32 reg_base = GEN7_L3LOG_BASE + (slice * 0x200); 5206 u32 *remap_info = dev_priv->l3_parity.remap_info[slice]; 5207 int i, ret; 5208 5209 if (!HAS_L3_DPF(dev) || !remap_info) 5210 return 0; 5211 5212 ret = intel_ring_begin(req, GEN7_L3LOG_SIZE / 4 * 3); 5213 if (ret) 5214 return ret; 5215 5216 /* 5217 * Note: We do not worry about the concurrent register cacheline hang 5218 * here because no other code should access these registers other than 5219 * at initialization time. 5220 */ 5221 for (i = 0; i < GEN7_L3LOG_SIZE; i += 4) { 5222 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); 5223 intel_ring_emit(ring, reg_base + i); 5224 intel_ring_emit(ring, remap_info[i/4]); 5225 } 5226 5227 intel_ring_advance(ring); 5228 5229 return ret; 5230 } 5231 5232 void i915_gem_init_swizzling(struct drm_device *dev) 5233 { 5234 struct drm_i915_private *dev_priv = dev->dev_private; 5235 5236 if (INTEL_INFO(dev)->gen < 5 || 5237 dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE) 5238 return; 5239 5240 I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) | 5241 DISP_TILE_SURFACE_SWIZZLING); 5242 5243 if (IS_GEN5(dev)) 5244 return; 5245 5246 I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_SWZCTL); 5247 if (IS_GEN6(dev)) 5248 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_SNB)); 5249 else if (IS_GEN7(dev)) 5250 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_IVB)); 5251 else if (IS_GEN8(dev)) 5252 I915_WRITE(GAMTARBMODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_BDW)); 5253 else 5254 BUG(); 5255 } 5256 5257 static void init_unused_ring(struct drm_device *dev, u32 base) 5258 { 5259 struct drm_i915_private *dev_priv = dev->dev_private; 5260 5261 I915_WRITE(RING_CTL(base), 0); 5262 I915_WRITE(RING_HEAD(base), 0); 5263 I915_WRITE(RING_TAIL(base), 0); 5264 I915_WRITE(RING_START(base), 0); 5265 } 5266 5267 static void init_unused_rings(struct drm_device *dev) 5268 { 5269 if (IS_I830(dev)) { 5270 init_unused_ring(dev, PRB1_BASE); 5271 init_unused_ring(dev, SRB0_BASE); 5272 init_unused_ring(dev, SRB1_BASE); 5273 init_unused_ring(dev, SRB2_BASE); 5274 init_unused_ring(dev, SRB3_BASE); 5275 } else if (IS_GEN2(dev)) { 5276 init_unused_ring(dev, SRB0_BASE); 5277 init_unused_ring(dev, SRB1_BASE); 5278 } else if (IS_GEN3(dev)) { 5279 init_unused_ring(dev, PRB1_BASE); 5280 init_unused_ring(dev, PRB2_BASE); 5281 } 5282 } 5283 5284 int i915_gem_init_rings(struct drm_device *dev) 5285 { 5286 struct drm_i915_private *dev_priv = dev->dev_private; 5287 int ret; 5288 5289 ret = intel_init_render_ring_buffer(dev); 5290 if (ret) 5291 return ret; 5292 5293 if (HAS_BSD(dev)) { 5294 ret = intel_init_bsd_ring_buffer(dev); 5295 if (ret) 5296 goto cleanup_render_ring; 5297 } 5298 5299 if (HAS_BLT(dev)) { 5300 ret = intel_init_blt_ring_buffer(dev); 5301 if (ret) 5302 goto cleanup_bsd_ring; 5303 } 5304 5305 if (HAS_VEBOX(dev)) { 5306 ret = intel_init_vebox_ring_buffer(dev); 5307 if (ret) 5308 goto cleanup_blt_ring; 5309 } 5310 5311 if (HAS_BSD2(dev)) { 5312 ret = intel_init_bsd2_ring_buffer(dev); 5313 if (ret) 5314 goto cleanup_vebox_ring; 5315 } 5316 5317 return 0; 5318 5319 cleanup_vebox_ring: 5320 intel_cleanup_ring_buffer(&dev_priv->ring[VECS]); 5321 cleanup_blt_ring: 5322 intel_cleanup_ring_buffer(&dev_priv->ring[BCS]); 5323 cleanup_bsd_ring: 5324 intel_cleanup_ring_buffer(&dev_priv->ring[VCS]); 5325 cleanup_render_ring: 5326 intel_cleanup_ring_buffer(&dev_priv->ring[RCS]); 5327 5328 return ret; 5329 } 5330 5331 int 5332 i915_gem_init_hw(struct drm_device *dev) 5333 { 5334 struct drm_i915_private *dev_priv = dev->dev_private; 5335 struct intel_engine_cs *ring; 5336 int ret, i, j; 5337 5338 if (INTEL_INFO(dev)->gen < 6 && !intel_enable_gtt()) 5339 return -EIO; 5340 5341 /* Double layer security blanket, see i915_gem_init() */ 5342 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 5343 5344 if (dev_priv->ellc_size) 5345 I915_WRITE(HSW_IDICR, I915_READ(HSW_IDICR) | IDIHASHMSK(0xf)); 5346 5347 if (IS_HASWELL(dev)) 5348 I915_WRITE(MI_PREDICATE_RESULT_2, IS_HSW_GT3(dev) ? 5349 LOWER_SLICE_ENABLED : LOWER_SLICE_DISABLED); 5350 5351 if (HAS_PCH_NOP(dev)) { 5352 if (IS_IVYBRIDGE(dev)) { 5353 u32 temp = I915_READ(GEN7_MSG_CTL); 5354 temp &= ~(WAIT_FOR_PCH_FLR_ACK | WAIT_FOR_PCH_RESET_ACK); 5355 I915_WRITE(GEN7_MSG_CTL, temp); 5356 } else if (INTEL_INFO(dev)->gen >= 7) { 5357 u32 temp = I915_READ(HSW_NDE_RSTWRN_OPT); 5358 temp &= ~RESET_PCH_HANDSHAKE_ENABLE; 5359 I915_WRITE(HSW_NDE_RSTWRN_OPT, temp); 5360 } 5361 } 5362 5363 i915_gem_init_swizzling(dev); 5364 5365 /* 5366 * At least 830 can leave some of the unused rings 5367 * "active" (ie. head != tail) after resume which 5368 * will prevent c3 entry. Makes sure all unused rings 5369 * are totally idle. 5370 */ 5371 init_unused_rings(dev); 5372 5373 BUG_ON(!dev_priv->ring[RCS].default_context); 5374 5375 ret = i915_ppgtt_init_hw(dev); 5376 if (ret) { 5377 DRM_ERROR("PPGTT enable HW failed %d\n", ret); 5378 goto out; 5379 } 5380 5381 /* Need to do basic initialisation of all rings first: */ 5382 for_each_ring(ring, dev_priv, i) { 5383 ret = ring->init_hw(ring); 5384 if (ret) 5385 goto out; 5386 } 5387 5388 /* We can't enable contexts until all firmware is loaded */ 5389 if (HAS_GUC_UCODE(dev)) { 5390 ret = intel_guc_ucode_load(dev); 5391 if (ret) { 5392 /* 5393 * If we got an error and GuC submission is enabled, map 5394 * the error to -EIO so the GPU will be declared wedged. 5395 * OTOH, if we didn't intend to use the GuC anyway, just 5396 * discard the error and carry on. 5397 */ 5398 DRM_ERROR("Failed to initialize GuC, error %d%s\n", ret, 5399 i915.enable_guc_submission ? "" : 5400 " (ignored)"); 5401 ret = i915.enable_guc_submission ? -EIO : 0; 5402 if (ret) 5403 goto out; 5404 } 5405 } 5406 5407 /* 5408 * Increment the next seqno by 0x100 so we have a visible break 5409 * on re-initialisation 5410 */ 5411 ret = i915_gem_set_seqno(dev, dev_priv->next_seqno+0x100); 5412 if (ret) 5413 goto out; 5414 5415 /* Now it is safe to go back round and do everything else: */ 5416 for_each_ring(ring, dev_priv, i) { 5417 struct drm_i915_gem_request *req; 5418 5419 WARN_ON(!ring->default_context); 5420 5421 ret = i915_gem_request_alloc(ring, ring->default_context, &req); 5422 if (ret) { 5423 i915_gem_cleanup_ringbuffer(dev); 5424 goto out; 5425 } 5426 5427 if (ring->id == RCS) { 5428 for (j = 0; j < NUM_L3_SLICES(dev); j++) 5429 i915_gem_l3_remap(req, j); 5430 } 5431 5432 ret = i915_ppgtt_init_ring(req); 5433 if (ret && ret != -EIO) { 5434 DRM_ERROR("PPGTT enable ring #%d failed %d\n", i, ret); 5435 i915_gem_request_cancel(req); 5436 i915_gem_cleanup_ringbuffer(dev); 5437 goto out; 5438 } 5439 5440 ret = i915_gem_context_enable(req); 5441 if (ret && ret != -EIO) { 5442 DRM_ERROR("Context enable ring #%d failed %d\n", i, ret); 5443 i915_gem_request_cancel(req); 5444 i915_gem_cleanup_ringbuffer(dev); 5445 goto out; 5446 } 5447 5448 i915_add_request_no_flush(req); 5449 } 5450 5451 out: 5452 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 5453 return ret; 5454 } 5455 5456 int i915_gem_init(struct drm_device *dev) 5457 { 5458 struct drm_i915_private *dev_priv = dev->dev_private; 5459 int ret; 5460 5461 i915.enable_execlists = intel_sanitize_enable_execlists(dev, 5462 i915.enable_execlists); 5463 5464 idr_preload(GFP_KERNEL); /* gem context */ 5465 mutex_lock(&dev->struct_mutex); 5466 5467 if (IS_VALLEYVIEW(dev)) { 5468 /* VLVA0 (potential hack), BIOS isn't actually waking us */ 5469 I915_WRITE(VLV_GTLC_WAKE_CTRL, VLV_GTLC_ALLOWWAKEREQ); 5470 if (wait_for((I915_READ(VLV_GTLC_PW_STATUS) & 5471 VLV_GTLC_ALLOWWAKEACK), 10)) 5472 DRM_DEBUG_DRIVER("allow wake ack timed out\n"); 5473 } 5474 5475 if (!i915.enable_execlists) { 5476 dev_priv->gt.execbuf_submit = i915_gem_ringbuffer_submission; 5477 dev_priv->gt.init_rings = i915_gem_init_rings; 5478 dev_priv->gt.cleanup_ring = intel_cleanup_ring_buffer; 5479 dev_priv->gt.stop_ring = intel_stop_ring_buffer; 5480 } else { 5481 dev_priv->gt.execbuf_submit = intel_execlists_submission; 5482 dev_priv->gt.init_rings = intel_logical_rings_init; 5483 dev_priv->gt.cleanup_ring = intel_logical_ring_cleanup; 5484 dev_priv->gt.stop_ring = intel_logical_ring_stop; 5485 } 5486 5487 /* This is just a security blanket to placate dragons. 5488 * On some systems, we very sporadically observe that the first TLBs 5489 * used by the CS may be stale, despite us poking the TLB reset. If 5490 * we hold the forcewake during initialisation these problems 5491 * just magically go away. 5492 */ 5493 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 5494 5495 ret = i915_gem_init_userptr(dev); 5496 if (ret) 5497 goto out_unlock; 5498 5499 i915_gem_init_global_gtt(dev); 5500 5501 ret = i915_gem_context_init(dev); 5502 if (ret) 5503 goto out_unlock; 5504 5505 ret = dev_priv->gt.init_rings(dev); 5506 if (ret) 5507 goto out_unlock; 5508 5509 ret = i915_gem_init_hw(dev); 5510 if (ret == -EIO) { 5511 /* Allow ring initialisation to fail by marking the GPU as 5512 * wedged. But we only want to do this where the GPU is angry, 5513 * for all other failure, such as an allocation failure, bail. 5514 */ 5515 DRM_ERROR("Failed to initialize GPU, declaring it wedged\n"); 5516 atomic_or(I915_WEDGED, &dev_priv->gpu_error.reset_counter); 5517 ret = 0; 5518 } 5519 5520 out_unlock: 5521 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 5522 mutex_unlock(&dev->struct_mutex); 5523 idr_preload_end(); 5524 5525 return ret; 5526 } 5527 5528 void 5529 i915_gem_cleanup_ringbuffer(struct drm_device *dev) 5530 { 5531 struct drm_i915_private *dev_priv = dev->dev_private; 5532 struct intel_engine_cs *ring; 5533 int i; 5534 5535 for_each_ring(ring, dev_priv, i) 5536 dev_priv->gt.cleanup_ring(ring); 5537 5538 if (i915.enable_execlists) 5539 /* 5540 * Neither the BIOS, ourselves or any other kernel 5541 * expects the system to be in execlists mode on startup, 5542 * so we need to reset the GPU back to legacy mode. 5543 */ 5544 intel_gpu_reset(dev); 5545 } 5546 5547 static void 5548 init_ring_lists(struct intel_engine_cs *ring) 5549 { 5550 INIT_LIST_HEAD(&ring->active_list); 5551 INIT_LIST_HEAD(&ring->request_list); 5552 } 5553 5554 void 5555 i915_gem_load(struct drm_device *dev) 5556 { 5557 struct drm_i915_private *dev_priv = dev->dev_private; 5558 int i; 5559 5560 dev_priv->objects = 5561 kmem_cache_create("i915_gem_object", 5562 sizeof(struct drm_i915_gem_object), 0, 5563 SLAB_HWCACHE_ALIGN, 5564 NULL); 5565 dev_priv->vmas = 5566 kmem_cache_create("i915_gem_vma", 5567 sizeof(struct i915_vma), 0, 5568 SLAB_HWCACHE_ALIGN, 5569 NULL); 5570 dev_priv->requests = 5571 kmem_cache_create("i915_gem_request", 5572 sizeof(struct drm_i915_gem_request), 0, 5573 SLAB_HWCACHE_ALIGN, 5574 NULL); 5575 5576 INIT_LIST_HEAD(&dev_priv->vm_list); 5577 INIT_LIST_HEAD(&dev_priv->context_list); 5578 INIT_LIST_HEAD(&dev_priv->mm.unbound_list); 5579 INIT_LIST_HEAD(&dev_priv->mm.bound_list); 5580 INIT_LIST_HEAD(&dev_priv->mm.fence_list); 5581 for (i = 0; i < I915_NUM_RINGS; i++) 5582 init_ring_lists(&dev_priv->ring[i]); 5583 for (i = 0; i < I915_MAX_NUM_FENCES; i++) 5584 INIT_LIST_HEAD(&dev_priv->fence_regs[i].lru_list); 5585 INIT_DELAYED_WORK(&dev_priv->mm.retire_work, 5586 i915_gem_retire_work_handler); 5587 INIT_DELAYED_WORK(&dev_priv->mm.idle_work, 5588 i915_gem_idle_work_handler); 5589 #ifdef __NetBSD__ 5590 spin_lock_init(&dev_priv->gpu_error.reset_lock); 5591 DRM_INIT_WAITQUEUE(&dev_priv->gpu_error.reset_queue, "i915errst"); 5592 #else 5593 init_waitqueue_head(&dev_priv->gpu_error.reset_queue); 5594 #endif 5595 5596 dev_priv->relative_constants_mode = I915_EXEC_CONSTANTS_REL_GENERAL; 5597 5598 if (INTEL_INFO(dev)->gen >= 7 && !IS_VALLEYVIEW(dev)) 5599 dev_priv->num_fence_regs = 32; 5600 else if (INTEL_INFO(dev)->gen >= 4 || IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev)) 5601 dev_priv->num_fence_regs = 16; 5602 else 5603 dev_priv->num_fence_regs = 8; 5604 5605 if (intel_vgpu_active(dev)) 5606 dev_priv->num_fence_regs = 5607 I915_READ(vgtif_reg(avail_rs.fence_num)); 5608 5609 /* 5610 * Set initial sequence number for requests. 5611 * Using this number allows the wraparound to happen early, 5612 * catching any obvious problems. 5613 */ 5614 dev_priv->next_seqno = ((u32)~0 - 0x1100); 5615 dev_priv->last_seqno = ((u32)~0 - 0x1101); 5616 5617 /* Initialize fence registers to zero */ 5618 INIT_LIST_HEAD(&dev_priv->mm.fence_list); 5619 i915_gem_restore_fences(dev); 5620 5621 i915_gem_detect_bit_6_swizzle(dev); 5622 #ifdef __NetBSD__ 5623 DRM_INIT_WAITQUEUE(&dev_priv->pending_flip_queue, "i915flip"); 5624 spin_lock_init(&dev_priv->pending_flip_lock); 5625 #else 5626 init_waitqueue_head(&dev_priv->pending_flip_queue); 5627 #endif 5628 5629 dev_priv->mm.interruptible = true; 5630 5631 i915_gem_shrinker_init(dev_priv); 5632 mutex_init(&dev_priv->fb_tracking.lock); 5633 } 5634 5635 void i915_gem_release(struct drm_device *dev, struct drm_file *file) 5636 { 5637 struct drm_i915_file_private *file_priv = file->driver_priv; 5638 5639 /* Clean up our request list when the client is going away, so that 5640 * later retire_requests won't dereference our soon-to-be-gone 5641 * file_priv. 5642 */ 5643 spin_lock(&file_priv->mm.lock); 5644 while (!list_empty(&file_priv->mm.request_list)) { 5645 struct drm_i915_gem_request *request; 5646 5647 request = list_first_entry(&file_priv->mm.request_list, 5648 struct drm_i915_gem_request, 5649 client_list); 5650 list_del(&request->client_list); 5651 request->file_priv = NULL; 5652 } 5653 spin_unlock(&file_priv->mm.lock); 5654 5655 if (!list_empty(&file_priv->rps.link)) { 5656 spin_lock(&to_i915(dev)->rps.client_lock); 5657 list_del(&file_priv->rps.link); 5658 spin_unlock(&to_i915(dev)->rps.client_lock); 5659 } 5660 } 5661 5662 int i915_gem_open(struct drm_device *dev, struct drm_file *file) 5663 { 5664 struct drm_i915_file_private *file_priv; 5665 int ret; 5666 5667 DRM_DEBUG_DRIVER("\n"); 5668 5669 file_priv = kzalloc(sizeof(*file_priv), GFP_KERNEL); 5670 if (!file_priv) 5671 return -ENOMEM; 5672 5673 file->driver_priv = file_priv; 5674 file_priv->dev_priv = dev->dev_private; 5675 file_priv->file = file; 5676 INIT_LIST_HEAD(&file_priv->rps.link); 5677 5678 spin_lock_init(&file_priv->mm.lock); 5679 INIT_LIST_HEAD(&file_priv->mm.request_list); 5680 5681 ret = i915_gem_context_open(dev, file); 5682 if (ret) 5683 kfree(file_priv); 5684 5685 return ret; 5686 } 5687 5688 /** 5689 * i915_gem_track_fb - update frontbuffer tracking 5690 * @old: current GEM buffer for the frontbuffer slots 5691 * @new: new GEM buffer for the frontbuffer slots 5692 * @frontbuffer_bits: bitmask of frontbuffer slots 5693 * 5694 * This updates the frontbuffer tracking bits @frontbuffer_bits by clearing them 5695 * from @old and setting them in @new. Both @old and @new can be NULL. 5696 */ 5697 void i915_gem_track_fb(struct drm_i915_gem_object *old, 5698 struct drm_i915_gem_object *new, 5699 unsigned frontbuffer_bits) 5700 { 5701 if (old) { 5702 WARN_ON(!mutex_is_locked(&old->base.dev->struct_mutex)); 5703 WARN_ON(!(old->frontbuffer_bits & frontbuffer_bits)); 5704 old->frontbuffer_bits &= ~frontbuffer_bits; 5705 } 5706 5707 if (new) { 5708 WARN_ON(!mutex_is_locked(&new->base.dev->struct_mutex)); 5709 WARN_ON(new->frontbuffer_bits & frontbuffer_bits); 5710 new->frontbuffer_bits |= frontbuffer_bits; 5711 } 5712 } 5713 5714 /* All the new VM stuff */ 5715 u64 i915_gem_obj_offset(struct drm_i915_gem_object *o, 5716 struct i915_address_space *vm) 5717 { 5718 struct drm_i915_private *dev_priv = o->base.dev->dev_private; 5719 struct i915_vma *vma; 5720 5721 WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base); 5722 5723 list_for_each_entry(vma, &o->vma_list, vma_link) { 5724 if (i915_is_ggtt(vma->vm) && 5725 vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL) 5726 continue; 5727 if (vma->vm == vm) 5728 return vma->node.start; 5729 } 5730 5731 WARN(1, "%s vma for this object not found.\n", 5732 i915_is_ggtt(vm) ? "global" : "ppgtt"); 5733 return -1; 5734 } 5735 5736 u64 i915_gem_obj_ggtt_offset_view(struct drm_i915_gem_object *o, 5737 const struct i915_ggtt_view *view) 5738 { 5739 struct i915_address_space *ggtt = i915_obj_to_ggtt(o); 5740 struct i915_vma *vma; 5741 5742 list_for_each_entry(vma, &o->vma_list, vma_link) 5743 if (vma->vm == ggtt && 5744 i915_ggtt_view_equal(&vma->ggtt_view, view)) 5745 return vma->node.start; 5746 5747 WARN(1, "global vma for this object not found. (view=%u)\n", view->type); 5748 return -1; 5749 } 5750 5751 bool i915_gem_obj_bound(struct drm_i915_gem_object *o, 5752 struct i915_address_space *vm) 5753 { 5754 struct i915_vma *vma; 5755 5756 list_for_each_entry(vma, &o->vma_list, vma_link) { 5757 if (i915_is_ggtt(vma->vm) && 5758 vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL) 5759 continue; 5760 if (vma->vm == vm && drm_mm_node_allocated(&vma->node)) 5761 return true; 5762 } 5763 5764 return false; 5765 } 5766 5767 bool i915_gem_obj_ggtt_bound_view(struct drm_i915_gem_object *o, 5768 const struct i915_ggtt_view *view) 5769 { 5770 struct i915_address_space *ggtt = i915_obj_to_ggtt(o); 5771 struct i915_vma *vma; 5772 5773 list_for_each_entry(vma, &o->vma_list, vma_link) 5774 if (vma->vm == ggtt && 5775 i915_ggtt_view_equal(&vma->ggtt_view, view) && 5776 drm_mm_node_allocated(&vma->node)) 5777 return true; 5778 5779 return false; 5780 } 5781 5782 bool i915_gem_obj_bound_any(struct drm_i915_gem_object *o) 5783 { 5784 struct i915_vma *vma; 5785 5786 list_for_each_entry(vma, &o->vma_list, vma_link) 5787 if (drm_mm_node_allocated(&vma->node)) 5788 return true; 5789 5790 return false; 5791 } 5792 5793 unsigned long i915_gem_obj_size(struct drm_i915_gem_object *o, 5794 struct i915_address_space *vm) 5795 { 5796 struct drm_i915_private *dev_priv = o->base.dev->dev_private; 5797 struct i915_vma *vma; 5798 5799 WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base); 5800 5801 BUG_ON(list_empty(&o->vma_list)); 5802 5803 list_for_each_entry(vma, &o->vma_list, vma_link) { 5804 if (i915_is_ggtt(vma->vm) && 5805 vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL) 5806 continue; 5807 if (vma->vm == vm) 5808 return vma->node.size; 5809 } 5810 return 0; 5811 } 5812 5813 bool i915_gem_obj_is_pinned(struct drm_i915_gem_object *obj) 5814 { 5815 struct i915_vma *vma; 5816 list_for_each_entry(vma, &obj->vma_list, vma_link) 5817 if (vma->pin_count > 0) 5818 return true; 5819 5820 return false; 5821 } 5822 5823 /* Allocate a new GEM object and fill it with the supplied data */ 5824 struct drm_i915_gem_object * 5825 i915_gem_object_create_from_data(struct drm_device *dev, 5826 const void *data, size_t size) 5827 { 5828 struct drm_i915_gem_object *obj; 5829 #ifdef __NetBSD__ 5830 struct iovec iov = { .iov_base = __UNCONST(data), .iov_len = size }; 5831 struct uio uio = { 5832 .uio_iov = &iov, 5833 .uio_iovcnt = 1, 5834 .uio_offset = 0, 5835 .uio_resid = size, 5836 .uio_rw = UIO_WRITE, 5837 }; 5838 #else 5839 struct sg_table *sg; 5840 #endif 5841 size_t bytes; 5842 int ret; 5843 5844 obj = i915_gem_alloc_object(dev, round_up(size, PAGE_SIZE)); 5845 if (IS_ERR_OR_NULL(obj)) 5846 return obj; 5847 5848 ret = i915_gem_object_set_to_cpu_domain(obj, true); 5849 if (ret) 5850 goto fail; 5851 5852 ret = i915_gem_object_get_pages(obj); 5853 if (ret) 5854 goto fail; 5855 5856 i915_gem_object_pin_pages(obj); 5857 #ifdef __NetBSD__ 5858 UIO_SETUP_SYSSPACE(&uio); 5859 /* XXX errno NetBSD->Linux */ 5860 ret = -ubc_uiomove(obj->base.filp, &uio, size, UVM_ADV_NORMAL, 5861 UBC_WRITE); 5862 if (ret) 5863 goto fail; 5864 bytes = size - uio.uio_resid; 5865 #else 5866 sg = obj->pages; 5867 bytes = sg_copy_from_buffer(sg->sgl, sg->nents, (void *)data, size); 5868 #endif 5869 i915_gem_object_unpin_pages(obj); 5870 5871 if (WARN_ON(bytes != size)) { 5872 DRM_ERROR("Incomplete copy, wrote %zu of %zu", bytes, size); 5873 ret = -EFAULT; 5874 goto fail; 5875 } 5876 5877 return obj; 5878 5879 fail: 5880 drm_gem_object_unreference(&obj->base); 5881 return ERR_PTR(ret); 5882 } 5883