1 /* 2 * Copyright © 2008 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Eric Anholt <eric@anholt.net> 25 * 26 */ 27 28 #ifdef __NetBSD__ 29 #if 0 /* XXX uvmhist option? */ 30 #include "opt_uvmhist.h" 31 #endif 32 33 #include <sys/types.h> 34 #include <sys/param.h> 35 36 #include <x86/machdep.h> /* x86_select_freelist */ 37 38 #include <uvm/uvm.h> 39 #include <uvm/uvm_extern.h> 40 #include <uvm/uvm_fault.h> 41 #include <uvm/uvm_page.h> 42 #include <uvm/uvm_pmap.h> 43 #include <uvm/uvm_prot.h> 44 #endif 45 46 #include <drm/drmP.h> 47 #include <drm/i915_drm.h> 48 #include "i915_drv.h" 49 #include "i915_trace.h" 50 #include "intel_drv.h" 51 #include <linux/shmem_fs.h> 52 #include <linux/slab.h> 53 #include <linux/swap.h> 54 #include <linux/pci.h> 55 #include <linux/dma-buf.h> 56 #include <linux/errno.h> 57 #include <linux/time.h> 58 #include <linux/err.h> 59 #include <asm/param.h> 60 61 static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj); 62 static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj); 63 static __must_check int i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj, 64 unsigned alignment, 65 bool map_and_fenceable, 66 bool nonblocking); 67 static int i915_gem_phys_pwrite(struct drm_device *dev, 68 struct drm_i915_gem_object *obj, 69 struct drm_i915_gem_pwrite *args, 70 struct drm_file *file); 71 72 static void i915_gem_write_fence(struct drm_device *dev, int reg, 73 struct drm_i915_gem_object *obj); 74 static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj, 75 struct drm_i915_fence_reg *fence, 76 bool enable); 77 78 static int i915_gem_inactive_shrink(struct shrinker *shrinker, 79 struct shrink_control *sc); 80 static long i915_gem_purge(struct drm_i915_private *dev_priv, long target); 81 static void i915_gem_shrink_all(struct drm_i915_private *dev_priv); 82 static void i915_gem_object_truncate(struct drm_i915_gem_object *obj); 83 84 static inline void i915_gem_object_fence_lost(struct drm_i915_gem_object *obj) 85 { 86 if (obj->tiling_mode) 87 i915_gem_release_mmap(obj); 88 89 /* As we do not have an associated fence register, we will force 90 * a tiling change if we ever need to acquire one. 91 */ 92 obj->fence_dirty = false; 93 obj->fence_reg = I915_FENCE_REG_NONE; 94 } 95 96 /* some bookkeeping */ 97 static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv, 98 size_t size) 99 { 100 dev_priv->mm.object_count++; 101 dev_priv->mm.object_memory += size; 102 } 103 104 static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv, 105 size_t size) 106 { 107 dev_priv->mm.object_count--; 108 dev_priv->mm.object_memory -= size; 109 } 110 111 static int 112 i915_gem_wait_for_error(struct drm_device *dev) 113 { 114 struct drm_i915_private *dev_priv = dev->dev_private; 115 struct completion *x = &dev_priv->error_completion; 116 #ifndef __NetBSD__ 117 unsigned long flags; 118 #endif 119 int ret; 120 121 if (!atomic_read(&dev_priv->mm.wedged)) 122 return 0; 123 124 /* 125 * Only wait 10 seconds for the gpu reset to complete to avoid hanging 126 * userspace. If it takes that long something really bad is going on and 127 * we should simply try to bail out and fail as gracefully as possible. 128 */ 129 ret = wait_for_completion_interruptible_timeout(x, 10*HZ); 130 if (ret == 0) { 131 DRM_ERROR("Timed out waiting for the gpu reset to complete\n"); 132 return -EIO; 133 } else if (ret < 0) { 134 return ret; 135 } 136 137 if (atomic_read(&dev_priv->mm.wedged)) { 138 /* GPU is hung, bump the completion count to account for 139 * the token we just consumed so that we never hit zero and 140 * end up waiting upon a subsequent completion event that 141 * will never happen. 142 */ 143 #ifdef __NetBSD__ 144 /* XXX Hope it's not a problem that we might wake someone. */ 145 complete(x); 146 #else 147 spin_lock_irqsave(&x->wait.lock, flags); 148 x->done++; 149 spin_unlock_irqrestore(&x->wait.lock, flags); 150 #endif 151 } 152 return 0; 153 } 154 155 int i915_mutex_lock_interruptible(struct drm_device *dev) 156 { 157 int ret; 158 159 ret = i915_gem_wait_for_error(dev); 160 if (ret) 161 return ret; 162 163 ret = mutex_lock_interruptible(&dev->struct_mutex); 164 if (ret) 165 return ret; 166 167 WARN_ON(i915_verify_lists(dev)); 168 return 0; 169 } 170 171 static inline bool 172 i915_gem_object_is_inactive(struct drm_i915_gem_object *obj) 173 { 174 return obj->gtt_space && !obj->active; 175 } 176 177 int 178 i915_gem_init_ioctl(struct drm_device *dev, void *data, 179 struct drm_file *file) 180 { 181 struct drm_i915_gem_init *args = data; 182 183 if (drm_core_check_feature(dev, DRIVER_MODESET)) 184 return -ENODEV; 185 186 if (args->gtt_start >= args->gtt_end || 187 (args->gtt_end | args->gtt_start) & (PAGE_SIZE - 1)) 188 return -EINVAL; 189 190 /* GEM with user mode setting was never supported on ilk and later. */ 191 if (INTEL_INFO(dev)->gen >= 5) 192 return -ENODEV; 193 194 mutex_lock(&dev->struct_mutex); 195 i915_gem_init_global_gtt(dev, args->gtt_start, 196 args->gtt_end, args->gtt_end); 197 mutex_unlock(&dev->struct_mutex); 198 199 return 0; 200 } 201 202 int 203 i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, 204 struct drm_file *file) 205 { 206 struct drm_i915_private *dev_priv = dev->dev_private; 207 struct drm_i915_gem_get_aperture *args = data; 208 struct drm_i915_gem_object *obj; 209 size_t pinned; 210 211 pinned = 0; 212 mutex_lock(&dev->struct_mutex); 213 list_for_each_entry(obj, &dev_priv->mm.bound_list, gtt_list) 214 if (obj->pin_count) 215 pinned += obj->gtt_space->size; 216 mutex_unlock(&dev->struct_mutex); 217 218 args->aper_size = dev_priv->mm.gtt_total; 219 args->aper_available_size = args->aper_size - pinned; 220 221 return 0; 222 } 223 224 static int 225 i915_gem_create(struct drm_file *file, 226 struct drm_device *dev, 227 uint64_t size, 228 uint32_t *handle_p) 229 { 230 struct drm_i915_gem_object *obj; 231 int ret; 232 u32 handle; 233 234 size = roundup(size, PAGE_SIZE); 235 if (size == 0) 236 return -EINVAL; 237 238 /* Allocate the new object */ 239 obj = i915_gem_alloc_object(dev, size); 240 if (obj == NULL) 241 return -ENOMEM; 242 243 ret = drm_gem_handle_create(file, &obj->base, &handle); 244 if (ret) { 245 drm_gem_object_release(&obj->base); 246 i915_gem_info_remove_obj(dev->dev_private, obj->base.size); 247 kfree(obj); 248 return ret; 249 } 250 251 /* drop reference from allocate - handle holds it now */ 252 drm_gem_object_unreference(&obj->base); 253 trace_i915_gem_object_create(obj); 254 255 *handle_p = handle; 256 return 0; 257 } 258 259 int 260 i915_gem_dumb_create(struct drm_file *file, 261 struct drm_device *dev, 262 struct drm_mode_create_dumb *args) 263 { 264 /* have to work out size/pitch and return them */ 265 #ifdef __NetBSD__ /* ALIGN already means something. */ 266 args->pitch = round_up(args->width * ((args->bpp + 7) / 8), 64); 267 #else 268 args->pitch = ALIGN(args->width * ((args->bpp + 7) / 8), 64); 269 #endif 270 args->size = args->pitch * args->height; 271 return i915_gem_create(file, dev, 272 args->size, &args->handle); 273 } 274 275 int i915_gem_dumb_destroy(struct drm_file *file, 276 struct drm_device *dev, 277 uint32_t handle) 278 { 279 return drm_gem_handle_delete(file, handle); 280 } 281 282 /** 283 * Creates a new mm object and returns a handle to it. 284 */ 285 int 286 i915_gem_create_ioctl(struct drm_device *dev, void *data, 287 struct drm_file *file) 288 { 289 struct drm_i915_gem_create *args = data; 290 291 return i915_gem_create(file, dev, 292 args->size, &args->handle); 293 } 294 295 static int i915_gem_object_needs_bit17_swizzle(struct drm_i915_gem_object *obj) 296 { 297 drm_i915_private_t *dev_priv = obj->base.dev->dev_private; 298 299 return dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_9_10_17 && 300 obj->tiling_mode != I915_TILING_NONE; 301 } 302 303 static inline int 304 __copy_to_user_swizzled(char __user *cpu_vaddr, 305 const char *gpu_vaddr, int gpu_offset, 306 int length) 307 { 308 int ret, cpu_offset = 0; 309 310 while (length > 0) { 311 #ifdef __NetBSD__ 312 int cacheline_end = round_up(gpu_offset + 1, 64); 313 #else 314 int cacheline_end = ALIGN(gpu_offset + 1, 64); 315 #endif 316 int this_length = min(cacheline_end - gpu_offset, length); 317 int swizzled_gpu_offset = gpu_offset ^ 64; 318 319 ret = __copy_to_user(cpu_vaddr + cpu_offset, 320 gpu_vaddr + swizzled_gpu_offset, 321 this_length); 322 if (ret) 323 return ret + length; 324 325 cpu_offset += this_length; 326 gpu_offset += this_length; 327 length -= this_length; 328 } 329 330 return 0; 331 } 332 333 static inline int 334 __copy_from_user_swizzled(char *gpu_vaddr, int gpu_offset, 335 const char __user *cpu_vaddr, 336 int length) 337 { 338 int ret, cpu_offset = 0; 339 340 while (length > 0) { 341 #ifdef __NetBSD__ 342 int cacheline_end = round_up(gpu_offset + 1, 64); 343 #else 344 int cacheline_end = ALIGN(gpu_offset + 1, 64); 345 #endif 346 int this_length = min(cacheline_end - gpu_offset, length); 347 int swizzled_gpu_offset = gpu_offset ^ 64; 348 349 ret = __copy_from_user(gpu_vaddr + swizzled_gpu_offset, 350 cpu_vaddr + cpu_offset, 351 this_length); 352 if (ret) 353 return ret + length; 354 355 cpu_offset += this_length; 356 gpu_offset += this_length; 357 length -= this_length; 358 } 359 360 return 0; 361 } 362 363 /* Per-page copy function for the shmem pread fastpath. 364 * Flushes invalid cachelines before reading the target if 365 * needs_clflush is set. */ 366 static int 367 shmem_pread_fast(struct page *page, int shmem_page_offset, int page_length, 368 char __user *user_data, 369 bool page_do_bit17_swizzling, bool needs_clflush) 370 { 371 #ifdef __NetBSD__ /* XXX atomic shmem fast path */ 372 return -EFAULT; 373 #else 374 char *vaddr; 375 int ret; 376 377 if (unlikely(page_do_bit17_swizzling)) 378 return -EINVAL; 379 380 vaddr = kmap_atomic(page); 381 if (needs_clflush) 382 drm_clflush_virt_range(vaddr + shmem_page_offset, 383 page_length); 384 ret = __copy_to_user_inatomic(user_data, 385 vaddr + shmem_page_offset, 386 page_length); 387 kunmap_atomic(vaddr); 388 389 return ret ? -EFAULT : 0; 390 #endif 391 } 392 393 static void 394 shmem_clflush_swizzled_range(char *addr, unsigned long length, 395 bool swizzled) 396 { 397 if (unlikely(swizzled)) { 398 unsigned long start = (unsigned long) addr; 399 unsigned long end = (unsigned long) addr + length; 400 401 /* For swizzling simply ensure that we always flush both 402 * channels. Lame, but simple and it works. Swizzled 403 * pwrite/pread is far from a hotpath - current userspace 404 * doesn't use it at all. */ 405 start = round_down(start, 128); 406 end = round_up(end, 128); 407 408 drm_clflush_virt_range((void *)start, end - start); 409 } else { 410 drm_clflush_virt_range(addr, length); 411 } 412 413 } 414 415 /* Only difference to the fast-path function is that this can handle bit17 416 * and uses non-atomic copy and kmap functions. */ 417 static int 418 shmem_pread_slow(struct page *page, int shmem_page_offset, int page_length, 419 char __user *user_data, 420 bool page_do_bit17_swizzling, bool needs_clflush) 421 { 422 char *vaddr; 423 int ret; 424 425 vaddr = kmap(page); 426 if (needs_clflush) 427 shmem_clflush_swizzled_range(vaddr + shmem_page_offset, 428 page_length, 429 page_do_bit17_swizzling); 430 431 if (page_do_bit17_swizzling) 432 ret = __copy_to_user_swizzled(user_data, 433 vaddr, shmem_page_offset, 434 page_length); 435 else 436 ret = __copy_to_user(user_data, 437 vaddr + shmem_page_offset, 438 page_length); 439 kunmap(page); 440 441 return ret ? - EFAULT : 0; 442 } 443 444 static int 445 i915_gem_shmem_pread(struct drm_device *dev, 446 struct drm_i915_gem_object *obj, 447 struct drm_i915_gem_pread *args, 448 struct drm_file *file) 449 { 450 char __user *user_data; 451 ssize_t remain; 452 loff_t offset; 453 int shmem_page_offset, page_length, ret = 0; 454 int obj_do_bit17_swizzling, page_do_bit17_swizzling; 455 int hit_slowpath = 0; 456 #ifndef __NetBSD__ /* XXX */ 457 int prefaulted = 0; 458 #endif 459 int needs_clflush = 0; 460 #ifndef __NetBSD__ 461 struct scatterlist *sg; 462 int i; 463 #endif 464 465 user_data = (char __user *) (uintptr_t) args->data_ptr; 466 remain = args->size; 467 468 obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj); 469 470 if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)) { 471 /* If we're not in the cpu read domain, set ourself into the gtt 472 * read domain and manually flush cachelines (if required). This 473 * optimizes for the case when the gpu will dirty the data 474 * anyway again before the next pread happens. */ 475 if (obj->cache_level == I915_CACHE_NONE) 476 needs_clflush = 1; 477 if (obj->gtt_space) { 478 ret = i915_gem_object_set_to_gtt_domain(obj, false); 479 if (ret) 480 return ret; 481 } 482 } 483 484 ret = i915_gem_object_get_pages(obj); 485 if (ret) 486 return ret; 487 488 i915_gem_object_pin_pages(obj); 489 490 offset = args->offset; 491 492 #ifdef __NetBSD__ 493 /* 494 * XXX This is a big #ifdef with a lot of duplicated code, but 495 * factoring out the loop head -- which is all that 496 * substantially differs -- is probably more trouble than it's 497 * worth at the moment. 498 */ 499 while (0 < remain) { 500 /* Get the next page. */ 501 shmem_page_offset = offset_in_page(offset); 502 KASSERT(shmem_page_offset < PAGE_SIZE); 503 page_length = MIN(remain, (PAGE_SIZE - shmem_page_offset)); 504 struct page *const page = i915_gem_object_get_page(obj, 505 atop(offset)); 506 507 /* Decide whether to swizzle bit 17. */ 508 page_do_bit17_swizzling = obj_do_bit17_swizzling && 509 (page_to_phys(page) & (1 << 17)) != 0; 510 511 /* Try the fast path. */ 512 ret = shmem_pread_fast(page, shmem_page_offset, page_length, 513 user_data, page_do_bit17_swizzling, needs_clflush); 514 if (ret == 0) 515 goto next_page; 516 517 /* Fast path failed. Try the slow path. */ 518 hit_slowpath = 1; 519 mutex_unlock(&dev->struct_mutex); 520 /* XXX prefault */ 521 ret = shmem_pread_slow(page, shmem_page_offset, page_length, 522 user_data, page_do_bit17_swizzling, needs_clflush); 523 mutex_lock(&dev->struct_mutex); 524 525 next_page: 526 /* XXX mark page accessed */ 527 if (ret) 528 goto out; 529 530 KASSERT(page_length <= remain); 531 remain -= page_length; 532 user_data += page_length; 533 offset += page_length; 534 } 535 #else 536 for_each_sg(obj->pages->sgl, sg, obj->pages->nents, i) { 537 struct page *page; 538 539 if (i < offset >> PAGE_SHIFT) 540 continue; 541 542 if (remain <= 0) 543 break; 544 545 /* Operation in this page 546 * 547 * shmem_page_offset = offset within page in shmem file 548 * page_length = bytes to copy for this page 549 */ 550 shmem_page_offset = offset_in_page(offset); 551 page_length = remain; 552 if ((shmem_page_offset + page_length) > PAGE_SIZE) 553 page_length = PAGE_SIZE - shmem_page_offset; 554 555 page = sg_page(sg); 556 page_do_bit17_swizzling = obj_do_bit17_swizzling && 557 (page_to_phys(page) & (1 << 17)) != 0; 558 559 ret = shmem_pread_fast(page, shmem_page_offset, page_length, 560 user_data, page_do_bit17_swizzling, 561 needs_clflush); 562 if (ret == 0) 563 goto next_page; 564 565 hit_slowpath = 1; 566 mutex_unlock(&dev->struct_mutex); 567 568 if (!prefaulted) { 569 ret = fault_in_multipages_writeable(user_data, remain); 570 /* Userspace is tricking us, but we've already clobbered 571 * its pages with the prefault and promised to write the 572 * data up to the first fault. Hence ignore any errors 573 * and just continue. */ 574 (void)ret; 575 prefaulted = 1; 576 } 577 578 ret = shmem_pread_slow(page, shmem_page_offset, page_length, 579 user_data, page_do_bit17_swizzling, 580 needs_clflush); 581 582 mutex_lock(&dev->struct_mutex); 583 584 next_page: 585 mark_page_accessed(page); 586 587 if (ret) 588 goto out; 589 590 remain -= page_length; 591 user_data += page_length; 592 offset += page_length; 593 } 594 #endif 595 596 out: 597 i915_gem_object_unpin_pages(obj); 598 599 if (hit_slowpath) { 600 /* Fixup: Kill any reinstated backing storage pages */ 601 if (obj->madv == __I915_MADV_PURGED) 602 i915_gem_object_truncate(obj); 603 } 604 605 return ret; 606 } 607 608 /** 609 * Reads data from the object referenced by handle. 610 * 611 * On error, the contents of *data are undefined. 612 */ 613 int 614 i915_gem_pread_ioctl(struct drm_device *dev, void *data, 615 struct drm_file *file) 616 { 617 struct drm_i915_gem_pread *args = data; 618 struct drm_i915_gem_object *obj; 619 int ret = 0; 620 621 if (args->size == 0) 622 return 0; 623 624 if (!access_ok(VERIFY_WRITE, 625 (char __user *)(uintptr_t)args->data_ptr, 626 args->size)) 627 return -EFAULT; 628 629 ret = i915_mutex_lock_interruptible(dev); 630 if (ret) 631 return ret; 632 633 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 634 if (&obj->base == NULL) { 635 ret = -ENOENT; 636 goto unlock; 637 } 638 639 /* Bounds check source. */ 640 if (args->offset > obj->base.size || 641 args->size > obj->base.size - args->offset) { 642 ret = -EINVAL; 643 goto out; 644 } 645 646 #ifndef __NetBSD__ /* XXX drm prime */ 647 /* prime objects have no backing filp to GEM pread/pwrite 648 * pages from. 649 */ 650 if (!obj->base.filp) { 651 ret = -EINVAL; 652 goto out; 653 } 654 #endif 655 656 trace_i915_gem_object_pread(obj, args->offset, args->size); 657 658 ret = i915_gem_shmem_pread(dev, obj, args, file); 659 660 out: 661 drm_gem_object_unreference(&obj->base); 662 unlock: 663 mutex_unlock(&dev->struct_mutex); 664 return ret; 665 } 666 667 /* This is the fast write path which cannot handle 668 * page faults in the source data 669 */ 670 671 static inline int 672 fast_user_write(struct io_mapping *mapping, 673 loff_t page_base, int page_offset, 674 char __user *user_data, 675 int length) 676 { 677 #ifdef __NetBSD__ /* XXX atomic shmem fast path */ 678 return -EFAULT; 679 #else 680 void __iomem *vaddr_atomic; 681 void *vaddr; 682 unsigned long unwritten; 683 684 vaddr_atomic = io_mapping_map_atomic_wc(mapping, page_base); 685 /* We can use the cpu mem copy function because this is X86. */ 686 vaddr = (void __force*)vaddr_atomic + page_offset; 687 unwritten = __copy_from_user_inatomic_nocache(vaddr, 688 user_data, length); 689 io_mapping_unmap_atomic(vaddr_atomic); 690 return unwritten; 691 #endif 692 } 693 694 /** 695 * This is the fast pwrite path, where we copy the data directly from the 696 * user into the GTT, uncached. 697 */ 698 static int 699 i915_gem_gtt_pwrite_fast(struct drm_device *dev, 700 struct drm_i915_gem_object *obj, 701 struct drm_i915_gem_pwrite *args, 702 struct drm_file *file) 703 { 704 drm_i915_private_t *dev_priv = dev->dev_private; 705 ssize_t remain; 706 loff_t offset, page_base; 707 char __user *user_data; 708 int page_offset, page_length, ret; 709 710 ret = i915_gem_object_pin(obj, 0, true, true); 711 if (ret) 712 goto out; 713 714 ret = i915_gem_object_set_to_gtt_domain(obj, true); 715 if (ret) 716 goto out_unpin; 717 718 ret = i915_gem_object_put_fence(obj); 719 if (ret) 720 goto out_unpin; 721 722 user_data = (char __user *) (uintptr_t) args->data_ptr; 723 remain = args->size; 724 725 offset = obj->gtt_offset + args->offset; 726 727 while (remain > 0) { 728 /* Operation in this page 729 * 730 * page_base = page offset within aperture 731 * page_offset = offset within page 732 * page_length = bytes to copy for this page 733 */ 734 page_base = offset & PAGE_MASK; 735 page_offset = offset_in_page(offset); 736 page_length = remain; 737 if ((page_offset + remain) > PAGE_SIZE) 738 page_length = PAGE_SIZE - page_offset; 739 740 /* If we get a fault while copying data, then (presumably) our 741 * source page isn't available. Return the error and we'll 742 * retry in the slow path. 743 */ 744 if (fast_user_write(dev_priv->mm.gtt_mapping, page_base, 745 page_offset, user_data, page_length)) { 746 ret = -EFAULT; 747 goto out_unpin; 748 } 749 750 remain -= page_length; 751 user_data += page_length; 752 offset += page_length; 753 } 754 755 out_unpin: 756 i915_gem_object_unpin(obj); 757 out: 758 return ret; 759 } 760 761 /* Per-page copy function for the shmem pwrite fastpath. 762 * Flushes invalid cachelines before writing to the target if 763 * needs_clflush_before is set and flushes out any written cachelines after 764 * writing if needs_clflush is set. */ 765 static int 766 shmem_pwrite_fast(struct page *page, int shmem_page_offset, int page_length, 767 char __user *user_data, 768 bool page_do_bit17_swizzling, 769 bool needs_clflush_before, 770 bool needs_clflush_after) 771 { 772 #ifdef __NetBSD__ 773 return -EFAULT; 774 #else 775 char *vaddr; 776 int ret; 777 778 if (unlikely(page_do_bit17_swizzling)) 779 return -EINVAL; 780 781 vaddr = kmap_atomic(page); 782 if (needs_clflush_before) 783 drm_clflush_virt_range(vaddr + shmem_page_offset, 784 page_length); 785 ret = __copy_from_user_inatomic_nocache(vaddr + shmem_page_offset, 786 user_data, 787 page_length); 788 if (needs_clflush_after) 789 drm_clflush_virt_range(vaddr + shmem_page_offset, 790 page_length); 791 kunmap_atomic(vaddr); 792 793 return ret ? -EFAULT : 0; 794 #endif 795 } 796 797 /* Only difference to the fast-path function is that this can handle bit17 798 * and uses non-atomic copy and kmap functions. */ 799 static int 800 shmem_pwrite_slow(struct page *page, int shmem_page_offset, int page_length, 801 char __user *user_data, 802 bool page_do_bit17_swizzling, 803 bool needs_clflush_before, 804 bool needs_clflush_after) 805 { 806 char *vaddr; 807 int ret; 808 809 vaddr = kmap(page); 810 if (unlikely(needs_clflush_before || page_do_bit17_swizzling)) 811 shmem_clflush_swizzled_range(vaddr + shmem_page_offset, 812 page_length, 813 page_do_bit17_swizzling); 814 if (page_do_bit17_swizzling) 815 ret = __copy_from_user_swizzled(vaddr, shmem_page_offset, 816 user_data, 817 page_length); 818 else 819 ret = __copy_from_user(vaddr + shmem_page_offset, 820 user_data, 821 page_length); 822 if (needs_clflush_after) 823 shmem_clflush_swizzled_range(vaddr + shmem_page_offset, 824 page_length, 825 page_do_bit17_swizzling); 826 kunmap(page); 827 828 return ret ? -EFAULT : 0; 829 } 830 831 static int 832 i915_gem_shmem_pwrite(struct drm_device *dev, 833 struct drm_i915_gem_object *obj, 834 struct drm_i915_gem_pwrite *args, 835 struct drm_file *file) 836 { 837 ssize_t remain; 838 loff_t offset; 839 char __user *user_data; 840 int shmem_page_offset, page_length, ret = 0; 841 int obj_do_bit17_swizzling, page_do_bit17_swizzling; 842 int hit_slowpath = 0; 843 int needs_clflush_after = 0; 844 int needs_clflush_before = 0; 845 #ifndef __NetBSD__ 846 int i; 847 struct scatterlist *sg; 848 #endif 849 850 user_data = (char __user *) (uintptr_t) args->data_ptr; 851 remain = args->size; 852 853 obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj); 854 855 if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) { 856 /* If we're not in the cpu write domain, set ourself into the gtt 857 * write domain and manually flush cachelines (if required). This 858 * optimizes for the case when the gpu will use the data 859 * right away and we therefore have to clflush anyway. */ 860 if (obj->cache_level == I915_CACHE_NONE) 861 needs_clflush_after = 1; 862 if (obj->gtt_space) { 863 ret = i915_gem_object_set_to_gtt_domain(obj, true); 864 if (ret) 865 return ret; 866 } 867 } 868 /* Same trick applies for invalidate partially written cachelines before 869 * writing. */ 870 if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU) 871 && obj->cache_level == I915_CACHE_NONE) 872 needs_clflush_before = 1; 873 874 ret = i915_gem_object_get_pages(obj); 875 if (ret) 876 return ret; 877 878 i915_gem_object_pin_pages(obj); 879 880 offset = args->offset; 881 obj->dirty = 1; 882 883 #ifdef __NetBSD__ 884 while (0 < remain) { 885 /* Get the next page. */ 886 shmem_page_offset = offset_in_page(offset); 887 KASSERT(shmem_page_offset < PAGE_SIZE); 888 page_length = MIN(remain, (PAGE_SIZE - shmem_page_offset)); 889 struct page *const page = i915_gem_object_get_page(obj, 890 atop(offset)); 891 892 /* Decide whether to flush the cache or swizzle bit 17. */ 893 const bool partial_cacheline_write = needs_clflush_before && 894 ((shmem_page_offset | page_length) 895 & (cpu_info_primary.ci_cflush_lsize - 1)); 896 page_do_bit17_swizzling = obj_do_bit17_swizzling && 897 (page_to_phys(page) & (1 << 17)) != 0; 898 899 /* Try the fast path. */ 900 ret = shmem_pwrite_fast(page, shmem_page_offset, page_length, 901 user_data, page_do_bit17_swizzling, 902 partial_cacheline_write, needs_clflush_after); 903 if (ret == 0) 904 goto next_page; 905 906 /* Fast path failed. Try the slow path. */ 907 hit_slowpath = 1; 908 mutex_unlock(&dev->struct_mutex); 909 ret = shmem_pwrite_slow(page, shmem_page_offset, page_length, 910 user_data, page_do_bit17_swizzling, 911 partial_cacheline_write, needs_clflush_after); 912 mutex_lock(&dev->struct_mutex); 913 914 next_page: 915 page->p_vmp.flags &= ~PG_CLEAN; 916 /* XXX mark page accessed */ 917 if (ret) 918 goto out; 919 920 KASSERT(page_length <= remain); 921 remain -= page_length; 922 user_data += page_length; 923 offset += page_length; 924 } 925 #else 926 for_each_sg(obj->pages->sgl, sg, obj->pages->nents, i) { 927 struct page *page; 928 int partial_cacheline_write; 929 930 if (i < offset >> PAGE_SHIFT) 931 continue; 932 933 if (remain <= 0) 934 break; 935 936 /* Operation in this page 937 * 938 * shmem_page_offset = offset within page in shmem file 939 * page_length = bytes to copy for this page 940 */ 941 shmem_page_offset = offset_in_page(offset); 942 943 page_length = remain; 944 if ((shmem_page_offset + page_length) > PAGE_SIZE) 945 page_length = PAGE_SIZE - shmem_page_offset; 946 947 /* If we don't overwrite a cacheline completely we need to be 948 * careful to have up-to-date data by first clflushing. Don't 949 * overcomplicate things and flush the entire patch. */ 950 partial_cacheline_write = needs_clflush_before && 951 ((shmem_page_offset | page_length) 952 & (boot_cpu_data.x86_clflush_size - 1)); 953 954 page = sg_page(sg); 955 page_do_bit17_swizzling = obj_do_bit17_swizzling && 956 (page_to_phys(page) & (1 << 17)) != 0; 957 958 ret = shmem_pwrite_fast(page, shmem_page_offset, page_length, 959 user_data, page_do_bit17_swizzling, 960 partial_cacheline_write, 961 needs_clflush_after); 962 if (ret == 0) 963 goto next_page; 964 965 hit_slowpath = 1; 966 mutex_unlock(&dev->struct_mutex); 967 ret = shmem_pwrite_slow(page, shmem_page_offset, page_length, 968 user_data, page_do_bit17_swizzling, 969 partial_cacheline_write, 970 needs_clflush_after); 971 972 mutex_lock(&dev->struct_mutex); 973 974 next_page: 975 set_page_dirty(page); 976 mark_page_accessed(page); 977 978 if (ret) 979 goto out; 980 981 remain -= page_length; 982 user_data += page_length; 983 offset += page_length; 984 } 985 #endif 986 987 out: 988 i915_gem_object_unpin_pages(obj); 989 990 if (hit_slowpath) { 991 /* Fixup: Kill any reinstated backing storage pages */ 992 if (obj->madv == __I915_MADV_PURGED) 993 i915_gem_object_truncate(obj); 994 /* and flush dirty cachelines in case the object isn't in the cpu write 995 * domain anymore. */ 996 if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) { 997 i915_gem_clflush_object(obj); 998 i915_gem_chipset_flush(dev); 999 } 1000 } 1001 1002 if (needs_clflush_after) 1003 i915_gem_chipset_flush(dev); 1004 1005 return ret; 1006 } 1007 1008 /** 1009 * Writes data to the object referenced by handle. 1010 * 1011 * On error, the contents of the buffer that were to be modified are undefined. 1012 */ 1013 int 1014 i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, 1015 struct drm_file *file) 1016 { 1017 struct drm_i915_gem_pwrite *args = data; 1018 struct drm_i915_gem_object *obj; 1019 int ret; 1020 1021 if (args->size == 0) 1022 return 0; 1023 1024 if (!access_ok(VERIFY_READ, 1025 (char __user *)(uintptr_t)args->data_ptr, 1026 args->size)) 1027 return -EFAULT; 1028 1029 #ifndef __NetBSD__ /* XXX prefault */ 1030 ret = fault_in_multipages_readable((char __user *)(uintptr_t)args->data_ptr, 1031 args->size); 1032 if (ret) 1033 return -EFAULT; 1034 #endif 1035 1036 ret = i915_mutex_lock_interruptible(dev); 1037 if (ret) 1038 return ret; 1039 1040 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 1041 if (&obj->base == NULL) { 1042 ret = -ENOENT; 1043 goto unlock; 1044 } 1045 1046 /* Bounds check destination. */ 1047 if (args->offset > obj->base.size || 1048 args->size > obj->base.size - args->offset) { 1049 ret = -EINVAL; 1050 goto out; 1051 } 1052 1053 #ifndef __NetBSD__ /* XXX drm prime */ 1054 /* prime objects have no backing filp to GEM pread/pwrite 1055 * pages from. 1056 */ 1057 if (!obj->base.filp) { 1058 ret = -EINVAL; 1059 goto out; 1060 } 1061 #endif 1062 1063 trace_i915_gem_object_pwrite(obj, args->offset, args->size); 1064 1065 ret = -EFAULT; 1066 /* We can only do the GTT pwrite on untiled buffers, as otherwise 1067 * it would end up going through the fenced access, and we'll get 1068 * different detiling behavior between reading and writing. 1069 * pread/pwrite currently are reading and writing from the CPU 1070 * perspective, requiring manual detiling by the client. 1071 */ 1072 if (obj->phys_obj) { 1073 ret = i915_gem_phys_pwrite(dev, obj, args, file); 1074 goto out; 1075 } 1076 1077 if (obj->cache_level == I915_CACHE_NONE && 1078 obj->tiling_mode == I915_TILING_NONE && 1079 obj->base.write_domain != I915_GEM_DOMAIN_CPU) { 1080 ret = i915_gem_gtt_pwrite_fast(dev, obj, args, file); 1081 /* Note that the gtt paths might fail with non-page-backed user 1082 * pointers (e.g. gtt mappings when moving data between 1083 * textures). Fallback to the shmem path in that case. */ 1084 } 1085 1086 if (ret == -EFAULT || ret == -ENOSPC) 1087 ret = i915_gem_shmem_pwrite(dev, obj, args, file); 1088 1089 out: 1090 drm_gem_object_unreference(&obj->base); 1091 unlock: 1092 mutex_unlock(&dev->struct_mutex); 1093 return ret; 1094 } 1095 1096 int 1097 i915_gem_check_wedge(struct drm_i915_private *dev_priv, 1098 bool interruptible) 1099 { 1100 if (atomic_read(&dev_priv->mm.wedged)) { 1101 struct completion *x = &dev_priv->error_completion; 1102 bool recovery_complete; 1103 #ifndef __NetBSD__ 1104 unsigned long flags; 1105 #endif 1106 1107 #ifdef __NetBSD__ 1108 /* 1109 * XXX This is a horrible kludge. Reading internal 1110 * fields is no good, nor is reading them unlocked, and 1111 * neither is locking it and then unlocking it before 1112 * making a decision. 1113 */ 1114 recovery_complete = x->c_done > 0; 1115 #else 1116 /* Give the error handler a chance to run. */ 1117 spin_lock_irqsave(&x->wait.lock, flags); 1118 recovery_complete = x->done > 0; 1119 spin_unlock_irqrestore(&x->wait.lock, flags); 1120 #endif 1121 1122 /* Non-interruptible callers can't handle -EAGAIN, hence return 1123 * -EIO unconditionally for these. */ 1124 if (!interruptible) 1125 return -EIO; 1126 1127 /* Recovery complete, but still wedged means reset failure. */ 1128 if (recovery_complete) 1129 return -EIO; 1130 1131 return -EAGAIN; 1132 } 1133 1134 return 0; 1135 } 1136 1137 /* 1138 * Compare seqno against outstanding lazy request. Emit a request if they are 1139 * equal. 1140 */ 1141 static int 1142 i915_gem_check_olr(struct intel_ring_buffer *ring, u32 seqno) 1143 { 1144 int ret; 1145 1146 BUG_ON(!mutex_is_locked(&ring->dev->struct_mutex)); 1147 1148 ret = 0; 1149 if (seqno == ring->outstanding_lazy_request) 1150 ret = i915_add_request(ring, NULL, NULL); 1151 1152 return ret; 1153 } 1154 1155 /** 1156 * __wait_seqno - wait until execution of seqno has finished 1157 * @ring: the ring expected to report seqno 1158 * @seqno: duh! 1159 * @interruptible: do an interruptible wait (normally yes) 1160 * @timeout: in - how long to wait (NULL forever); out - how much time remaining 1161 * 1162 * Returns 0 if the seqno was found within the alloted time. Else returns the 1163 * errno with remaining time filled in timeout argument. 1164 */ 1165 static int __wait_seqno(struct intel_ring_buffer *ring, u32 seqno, 1166 bool interruptible, struct timespec *timeout) 1167 { 1168 drm_i915_private_t *dev_priv = ring->dev->dev_private; 1169 struct timespec before, now, wait_time={1,0}; 1170 unsigned long timeout_jiffies; 1171 long end; 1172 bool wait_forever = true; 1173 int ret; 1174 1175 if (i915_seqno_passed(ring->get_seqno(ring, true), seqno)) 1176 return 0; 1177 1178 trace_i915_gem_request_wait_begin(ring, seqno); 1179 1180 if (timeout != NULL) { 1181 wait_time = *timeout; 1182 wait_forever = false; 1183 } 1184 1185 timeout_jiffies = timespec_to_jiffies(&wait_time); 1186 1187 if (WARN_ON(!ring->irq_get(ring))) 1188 return -ENODEV; 1189 1190 /* Record current time in case interrupted by signal, or wedged * */ 1191 getrawmonotonic(&before); 1192 1193 #define EXIT_COND \ 1194 (i915_seqno_passed(ring->get_seqno(ring, false), seqno) || \ 1195 atomic_read(&dev_priv->mm.wedged)) 1196 do { 1197 #ifdef __NetBSD__ 1198 unsigned long flags; 1199 spin_lock_irqsave(&dev_priv->irq_lock, flags); 1200 if (interruptible) 1201 DRM_SPIN_TIMED_WAIT_UNTIL(end, &ring->irq_queue, 1202 &dev_priv->irq_lock, 1203 timeout_jiffies, 1204 EXIT_COND); 1205 else 1206 DRM_SPIN_TIMED_WAIT_NOINTR_UNTIL(end, &ring->irq_queue, 1207 &dev_priv->irq_lock, 1208 timeout_jiffies, 1209 EXIT_COND); 1210 spin_unlock_irqrestore(&dev_priv->irq_lock, flags); 1211 #else 1212 if (interruptible) 1213 end = wait_event_interruptible_timeout(ring->irq_queue, 1214 EXIT_COND, 1215 timeout_jiffies); 1216 else 1217 end = wait_event_timeout(ring->irq_queue, EXIT_COND, 1218 timeout_jiffies); 1219 1220 #endif 1221 ret = i915_gem_check_wedge(dev_priv, interruptible); 1222 if (ret) 1223 end = ret; 1224 } while (end == 0 && wait_forever); 1225 1226 getrawmonotonic(&now); 1227 1228 ring->irq_put(ring); 1229 trace_i915_gem_request_wait_end(ring, seqno); 1230 #undef EXIT_COND 1231 1232 if (timeout) { 1233 struct timespec sleep_time = timespec_sub(now, before); 1234 *timeout = timespec_sub(*timeout, sleep_time); 1235 } 1236 1237 switch (end) { 1238 case -EIO: 1239 case -EAGAIN: /* Wedged */ 1240 case -ERESTARTSYS: /* Signal */ 1241 case -EINTR: 1242 return (int)end; 1243 case 0: /* Timeout */ 1244 if (timeout) 1245 set_normalized_timespec(timeout, 0, 0); 1246 return -ETIME; 1247 default: /* Completed */ 1248 WARN_ON(end < 0); /* We're not aware of other errors */ 1249 return 0; 1250 } 1251 } 1252 1253 /** 1254 * Waits for a sequence number to be signaled, and cleans up the 1255 * request and object lists appropriately for that event. 1256 */ 1257 int 1258 i915_wait_seqno(struct intel_ring_buffer *ring, uint32_t seqno) 1259 { 1260 struct drm_device *dev = ring->dev; 1261 struct drm_i915_private *dev_priv = dev->dev_private; 1262 bool interruptible = dev_priv->mm.interruptible; 1263 int ret; 1264 1265 BUG_ON(!mutex_is_locked(&dev->struct_mutex)); 1266 BUG_ON(seqno == 0); 1267 1268 ret = i915_gem_check_wedge(dev_priv, interruptible); 1269 if (ret) 1270 return ret; 1271 1272 ret = i915_gem_check_olr(ring, seqno); 1273 if (ret) 1274 return ret; 1275 1276 return __wait_seqno(ring, seqno, interruptible, NULL); 1277 } 1278 1279 /** 1280 * Ensures that all rendering to the object has completed and the object is 1281 * safe to unbind from the GTT or access from the CPU. 1282 */ 1283 static __must_check int 1284 i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj, 1285 bool readonly) 1286 { 1287 struct intel_ring_buffer *ring = obj->ring; 1288 u32 seqno; 1289 int ret; 1290 1291 seqno = readonly ? obj->last_write_seqno : obj->last_read_seqno; 1292 if (seqno == 0) 1293 return 0; 1294 1295 ret = i915_wait_seqno(ring, seqno); 1296 if (ret) 1297 return ret; 1298 1299 i915_gem_retire_requests_ring(ring); 1300 1301 /* Manually manage the write flush as we may have not yet 1302 * retired the buffer. 1303 */ 1304 if (obj->last_write_seqno && 1305 i915_seqno_passed(seqno, obj->last_write_seqno)) { 1306 obj->last_write_seqno = 0; 1307 obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS; 1308 } 1309 1310 return 0; 1311 } 1312 1313 /* A nonblocking variant of the above wait. This is a highly dangerous routine 1314 * as the object state may change during this call. 1315 */ 1316 static __must_check int 1317 i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj, 1318 bool readonly) 1319 { 1320 struct drm_device *dev = obj->base.dev; 1321 struct drm_i915_private *dev_priv = dev->dev_private; 1322 struct intel_ring_buffer *ring = obj->ring; 1323 u32 seqno; 1324 int ret; 1325 1326 BUG_ON(!mutex_is_locked(&dev->struct_mutex)); 1327 BUG_ON(!dev_priv->mm.interruptible); 1328 1329 seqno = readonly ? obj->last_write_seqno : obj->last_read_seqno; 1330 if (seqno == 0) 1331 return 0; 1332 1333 ret = i915_gem_check_wedge(dev_priv, true); 1334 if (ret) 1335 return ret; 1336 1337 ret = i915_gem_check_olr(ring, seqno); 1338 if (ret) 1339 return ret; 1340 1341 mutex_unlock(&dev->struct_mutex); 1342 ret = __wait_seqno(ring, seqno, true, NULL); 1343 mutex_lock(&dev->struct_mutex); 1344 1345 i915_gem_retire_requests_ring(ring); 1346 1347 /* Manually manage the write flush as we may have not yet 1348 * retired the buffer. 1349 */ 1350 if (obj->last_write_seqno && 1351 i915_seqno_passed(seqno, obj->last_write_seqno)) { 1352 obj->last_write_seqno = 0; 1353 obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS; 1354 } 1355 1356 return ret; 1357 } 1358 1359 /** 1360 * Called when user space prepares to use an object with the CPU, either 1361 * through the mmap ioctl's mapping or a GTT mapping. 1362 */ 1363 int 1364 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, 1365 struct drm_file *file) 1366 { 1367 struct drm_i915_gem_set_domain *args = data; 1368 struct drm_i915_gem_object *obj; 1369 uint32_t read_domains = args->read_domains; 1370 uint32_t write_domain = args->write_domain; 1371 int ret; 1372 1373 /* Only handle setting domains to types used by the CPU. */ 1374 if (write_domain & I915_GEM_GPU_DOMAINS) 1375 return -EINVAL; 1376 1377 if (read_domains & I915_GEM_GPU_DOMAINS) 1378 return -EINVAL; 1379 1380 /* Having something in the write domain implies it's in the read 1381 * domain, and only that read domain. Enforce that in the request. 1382 */ 1383 if (write_domain != 0 && read_domains != write_domain) 1384 return -EINVAL; 1385 1386 ret = i915_mutex_lock_interruptible(dev); 1387 if (ret) 1388 return ret; 1389 1390 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 1391 if (&obj->base == NULL) { 1392 ret = -ENOENT; 1393 goto unlock; 1394 } 1395 1396 /* Try to flush the object off the GPU without holding the lock. 1397 * We will repeat the flush holding the lock in the normal manner 1398 * to catch cases where we are gazumped. 1399 */ 1400 ret = i915_gem_object_wait_rendering__nonblocking(obj, !write_domain); 1401 if (ret) 1402 goto unref; 1403 1404 if (read_domains & I915_GEM_DOMAIN_GTT) { 1405 ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0); 1406 1407 /* Silently promote "you're not bound, there was nothing to do" 1408 * to success, since the client was just asking us to 1409 * make sure everything was done. 1410 */ 1411 if (ret == -EINVAL) 1412 ret = 0; 1413 } else { 1414 ret = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0); 1415 } 1416 1417 unref: 1418 drm_gem_object_unreference(&obj->base); 1419 unlock: 1420 mutex_unlock(&dev->struct_mutex); 1421 return ret; 1422 } 1423 1424 /** 1425 * Called when user space has done writes to this buffer 1426 */ 1427 int 1428 i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data, 1429 struct drm_file *file) 1430 { 1431 struct drm_i915_gem_sw_finish *args = data; 1432 struct drm_i915_gem_object *obj; 1433 int ret = 0; 1434 1435 ret = i915_mutex_lock_interruptible(dev); 1436 if (ret) 1437 return ret; 1438 1439 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 1440 if (&obj->base == NULL) { 1441 ret = -ENOENT; 1442 goto unlock; 1443 } 1444 1445 /* Pinned buffers may be scanout, so flush the cache */ 1446 if (obj->pin_count) 1447 i915_gem_object_flush_cpu_write_domain(obj); 1448 1449 drm_gem_object_unreference(&obj->base); 1450 unlock: 1451 mutex_unlock(&dev->struct_mutex); 1452 return ret; 1453 } 1454 1455 /** 1456 * Maps the contents of an object, returning the address it is mapped 1457 * into. 1458 * 1459 * While the mapping holds a reference on the contents of the object, it doesn't 1460 * imply a ref on the object itself. 1461 */ 1462 int 1463 i915_gem_mmap_ioctl(struct drm_device *dev, void *data, 1464 struct drm_file *file) 1465 { 1466 struct drm_i915_gem_mmap *args = data; 1467 struct drm_gem_object *obj; 1468 unsigned long addr; 1469 #ifdef __NetBSD__ 1470 int ret; 1471 #endif 1472 1473 obj = drm_gem_object_lookup(dev, file, args->handle); 1474 if (obj == NULL) 1475 return -ENOENT; 1476 1477 #ifndef __NetBSD__ /* XXX drm prime */ 1478 /* prime objects have no backing filp to GEM mmap 1479 * pages from. 1480 */ 1481 if (!obj->filp) { 1482 drm_gem_object_unreference_unlocked(obj); 1483 return -EINVAL; 1484 } 1485 #endif 1486 1487 #ifdef __NetBSD__ 1488 addr = (*curproc->p_emul->e_vm_default_addr)(curproc, 1489 (vaddr_t)curproc->p_vmspace->vm_daddr, args->size); 1490 /* XXX errno NetBSD->Linux */ 1491 ret = -uvm_map(&curproc->p_vmspace->vm_map, &addr, args->size, 1492 obj->gemo_shm_uao, args->offset, 0, 1493 UVM_MAPFLAG((VM_PROT_READ | VM_PROT_WRITE), 1494 (VM_PROT_READ | VM_PROT_WRITE), UVM_INH_COPY, UVM_ADV_NORMAL, 1495 0)); 1496 if (ret) { 1497 drm_gem_object_unreference_unlocked(obj); 1498 return ret; 1499 } 1500 uao_reference(obj->gemo_shm_uao); 1501 drm_gem_object_unreference_unlocked(obj); 1502 #else 1503 addr = vm_mmap(obj->filp, 0, args->size, 1504 PROT_READ | PROT_WRITE, MAP_SHARED, 1505 args->offset); 1506 drm_gem_object_unreference_unlocked(obj); 1507 if (IS_ERR((void *)addr)) 1508 return addr; 1509 #endif 1510 1511 args->addr_ptr = (uint64_t) addr; 1512 1513 return 0; 1514 } 1515 1516 #ifdef __NetBSD__ /* XXX gem gtt fault */ 1517 static int i915_udv_fault(struct uvm_faultinfo *, vaddr_t, 1518 struct vm_page **, int, int, vm_prot_t, int, paddr_t); 1519 1520 int 1521 i915_gem_fault(struct uvm_faultinfo *ufi, vaddr_t vaddr, struct vm_page **pps, 1522 int npages, int centeridx, vm_prot_t access_type, int flags) 1523 { 1524 struct uvm_object *uobj = ufi->entry->object.uvm_obj; 1525 struct drm_gem_object *gem_obj = 1526 container_of(uobj, struct drm_gem_object, gemo_uvmobj); 1527 struct drm_i915_gem_object *obj = to_intel_bo(gem_obj); 1528 struct drm_device *dev = obj->base.dev; 1529 struct drm_i915_private *dev_priv = dev->dev_private; 1530 voff_t byte_offset; 1531 pgoff_t page_offset; 1532 int ret = 0; 1533 bool write = ISSET(access_type, VM_PROT_WRITE)? 1 : 0; 1534 1535 byte_offset = (ufi->entry->offset + (vaddr - ufi->entry->start)); 1536 KASSERT(byte_offset <= obj->base.size); 1537 page_offset = (byte_offset >> PAGE_SHIFT); 1538 1539 ret = i915_mutex_lock_interruptible(dev); 1540 if (ret) 1541 goto out; 1542 1543 trace_i915_gem_object_fault(obj, page_offset, true, write); 1544 1545 /* Now bind it into the GTT if needed */ 1546 ret = i915_gem_object_pin(obj, 0, true, false); 1547 if (ret) 1548 goto unlock; 1549 1550 ret = i915_gem_object_set_to_gtt_domain(obj, write); 1551 if (ret) 1552 goto unpin; 1553 1554 ret = i915_gem_object_get_fence(obj); 1555 if (ret) 1556 goto unpin; 1557 1558 obj->fault_mappable = true; 1559 1560 /* Finally, remap it using the new GTT offset */ 1561 /* XXX errno NetBSD->Linux */ 1562 ret = -i915_udv_fault(ufi, vaddr, pps, npages, centeridx, access_type, 1563 flags, (dev_priv->mm.gtt_base_addr + obj->gtt_offset)); 1564 unpin: 1565 i915_gem_object_unpin(obj); 1566 unlock: 1567 mutex_unlock(&dev->struct_mutex); 1568 out: 1569 uvmfault_unlockall(ufi, ufi->entry->aref.ar_amap, uobj); 1570 if (ret == -ERESTART) 1571 uvm_wait("i915flt"); 1572 return ret; 1573 } 1574 1575 /* 1576 * XXX i915_udv_fault is copypasta of udv_fault from uvm_device.c. 1577 * 1578 * XXX pmap_enter_default instead of pmap_enter because of a problem 1579 * with using weak aliases in kernel modules or something. 1580 */ 1581 int pmap_enter_default(pmap_t, vaddr_t, paddr_t, vm_prot_t, unsigned); 1582 1583 static int 1584 i915_udv_fault(struct uvm_faultinfo *ufi, vaddr_t vaddr, struct vm_page **pps, 1585 int npages, int centeridx, vm_prot_t access_type, int flags, 1586 paddr_t gtt_paddr) 1587 { 1588 struct vm_map_entry *entry = ufi->entry; 1589 vaddr_t curr_va; 1590 off_t curr_offset; 1591 paddr_t paddr; 1592 u_int mmapflags; 1593 int lcv, retval; 1594 vm_prot_t mapprot; 1595 UVMHIST_FUNC("i915_udv_fault"); UVMHIST_CALLED(maphist); 1596 UVMHIST_LOG(maphist," flags=%d", flags,0,0,0); 1597 1598 /* 1599 * we do not allow device mappings to be mapped copy-on-write 1600 * so we kill any attempt to do so here. 1601 */ 1602 1603 if (UVM_ET_ISCOPYONWRITE(entry)) { 1604 UVMHIST_LOG(maphist, "<- failed -- COW entry (etype=0x%x)", 1605 entry->etype, 0,0,0); 1606 return(EIO); 1607 } 1608 1609 /* 1610 * now we must determine the offset in udv to use and the VA to 1611 * use for pmap_enter. note that we always use orig_map's pmap 1612 * for pmap_enter (even if we have a submap). since virtual 1613 * addresses in a submap must match the main map, this is ok. 1614 */ 1615 1616 /* udv offset = (offset from start of entry) + entry's offset */ 1617 curr_offset = entry->offset + (vaddr - entry->start); 1618 /* pmap va = vaddr (virtual address of pps[0]) */ 1619 curr_va = vaddr; 1620 1621 /* 1622 * loop over the page range entering in as needed 1623 */ 1624 1625 retval = 0; 1626 for (lcv = 0 ; lcv < npages ; lcv++, curr_offset += PAGE_SIZE, 1627 curr_va += PAGE_SIZE) { 1628 if ((flags & PGO_ALLPAGES) == 0 && lcv != centeridx) 1629 continue; 1630 1631 if (pps[lcv] == PGO_DONTCARE) 1632 continue; 1633 1634 paddr = (gtt_paddr + curr_offset); 1635 mmapflags = 0; 1636 mapprot = ufi->entry->protection; 1637 UVMHIST_LOG(maphist, 1638 " MAPPING: device: pm=0x%x, va=0x%x, pa=0x%lx, at=%d", 1639 ufi->orig_map->pmap, curr_va, paddr, mapprot); 1640 if (pmap_enter_default(ufi->orig_map->pmap, curr_va, paddr, mapprot, 1641 PMAP_CANFAIL | mapprot | mmapflags) != 0) { 1642 /* 1643 * pmap_enter() didn't have the resource to 1644 * enter this mapping. Unlock everything, 1645 * wait for the pagedaemon to free up some 1646 * pages, and then tell uvm_fault() to start 1647 * the fault again. 1648 * 1649 * XXX Needs some rethinking for the PGO_ALLPAGES 1650 * XXX case. 1651 */ 1652 pmap_update(ufi->orig_map->pmap); /* sync what we have so far */ 1653 return (ERESTART); 1654 } 1655 } 1656 1657 pmap_update(ufi->orig_map->pmap); 1658 return (retval); 1659 } 1660 #else 1661 /** 1662 * i915_gem_fault - fault a page into the GTT 1663 * vma: VMA in question 1664 * vmf: fault info 1665 * 1666 * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped 1667 * from userspace. The fault handler takes care of binding the object to 1668 * the GTT (if needed), allocating and programming a fence register (again, 1669 * only if needed based on whether the old reg is still valid or the object 1670 * is tiled) and inserting a new PTE into the faulting process. 1671 * 1672 * Note that the faulting process may involve evicting existing objects 1673 * from the GTT and/or fence registers to make room. So performance may 1674 * suffer if the GTT working set is large or there are few fence registers 1675 * left. 1676 */ 1677 int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) 1678 { 1679 struct drm_i915_gem_object *obj = to_intel_bo(vma->vm_private_data); 1680 struct drm_device *dev = obj->base.dev; 1681 drm_i915_private_t *dev_priv = dev->dev_private; 1682 pgoff_t page_offset; 1683 unsigned long pfn; 1684 int ret = 0; 1685 bool write = !!(vmf->flags & FAULT_FLAG_WRITE); 1686 1687 /* We don't use vmf->pgoff since that has the fake offset */ 1688 page_offset = ((unsigned long)vmf->virtual_address - vma->vm_start) >> 1689 PAGE_SHIFT; 1690 1691 ret = i915_mutex_lock_interruptible(dev); 1692 if (ret) 1693 goto out; 1694 1695 trace_i915_gem_object_fault(obj, page_offset, true, write); 1696 1697 /* Now bind it into the GTT if needed */ 1698 ret = i915_gem_object_pin(obj, 0, true, false); 1699 if (ret) 1700 goto unlock; 1701 1702 ret = i915_gem_object_set_to_gtt_domain(obj, write); 1703 if (ret) 1704 goto unpin; 1705 1706 ret = i915_gem_object_get_fence(obj); 1707 if (ret) 1708 goto unpin; 1709 1710 obj->fault_mappable = true; 1711 1712 pfn = ((dev_priv->mm.gtt_base_addr + obj->gtt_offset) >> PAGE_SHIFT) + 1713 page_offset; 1714 1715 /* Finally, remap it using the new GTT offset */ 1716 ret = vm_insert_pfn(vma, (unsigned long)vmf->virtual_address, pfn); 1717 unpin: 1718 i915_gem_object_unpin(obj); 1719 unlock: 1720 mutex_unlock(&dev->struct_mutex); 1721 out: 1722 switch (ret) { 1723 case -EIO: 1724 /* If this -EIO is due to a gpu hang, give the reset code a 1725 * chance to clean up the mess. Otherwise return the proper 1726 * SIGBUS. */ 1727 if (!atomic_read(&dev_priv->mm.wedged)) 1728 return VM_FAULT_SIGBUS; 1729 case -EAGAIN: 1730 /* Give the error handler a chance to run and move the 1731 * objects off the GPU active list. Next time we service the 1732 * fault, we should be able to transition the page into the 1733 * GTT without touching the GPU (and so avoid further 1734 * EIO/EGAIN). If the GPU is wedged, then there is no issue 1735 * with coherency, just lost writes. 1736 */ 1737 set_need_resched(); 1738 case 0: 1739 case -ERESTARTSYS: 1740 case -EINTR: 1741 case -EBUSY: 1742 /* 1743 * EBUSY is ok: this just means that another thread 1744 * already did the job. 1745 */ 1746 return VM_FAULT_NOPAGE; 1747 case -ENOMEM: 1748 return VM_FAULT_OOM; 1749 case -ENOSPC: 1750 return VM_FAULT_SIGBUS; 1751 default: 1752 WARN_ONCE(ret, "unhandled error in i915_gem_fault: %i\n", ret); 1753 return VM_FAULT_SIGBUS; 1754 } 1755 } 1756 #endif 1757 1758 /** 1759 * i915_gem_release_mmap - remove physical page mappings 1760 * @obj: obj in question 1761 * 1762 * Preserve the reservation of the mmapping with the DRM core code, but 1763 * relinquish ownership of the pages back to the system. 1764 * 1765 * It is vital that we remove the page mapping if we have mapped a tiled 1766 * object through the GTT and then lose the fence register due to 1767 * resource pressure. Similarly if the object has been moved out of the 1768 * aperture, than pages mapped into userspace must be revoked. Removing the 1769 * mapping will then trigger a page fault on the next user access, allowing 1770 * fixup by i915_gem_fault(). 1771 */ 1772 void 1773 i915_gem_release_mmap(struct drm_i915_gem_object *obj) 1774 { 1775 if (!obj->fault_mappable) 1776 return; 1777 1778 #ifdef __NetBSD__ /* XXX gem gtt fault */ 1779 { 1780 struct vm_page *page; 1781 1782 mutex_enter(obj->base.gemo_shm_uao->vmobjlock); 1783 KASSERT(obj->pages != NULL); 1784 /* Force a fresh fault for each page. */ 1785 TAILQ_FOREACH(page, &obj->igo_pageq, pageq.queue) 1786 pmap_page_protect(page, VM_PROT_NONE); 1787 mutex_exit(obj->base.gemo_shm_uao->vmobjlock); 1788 } 1789 #else 1790 if (obj->base.dev->dev_mapping) 1791 unmap_mapping_range(obj->base.dev->dev_mapping, 1792 (loff_t)obj->base.map_list.hash.key<<PAGE_SHIFT, 1793 obj->base.size, 1); 1794 #endif 1795 1796 obj->fault_mappable = false; 1797 } 1798 1799 static uint32_t 1800 i915_gem_get_gtt_size(struct drm_device *dev, uint32_t size, int tiling_mode) 1801 { 1802 uint32_t gtt_size; 1803 1804 if (INTEL_INFO(dev)->gen >= 4 || 1805 tiling_mode == I915_TILING_NONE) 1806 return size; 1807 1808 /* Previous chips need a power-of-two fence region when tiling */ 1809 if (INTEL_INFO(dev)->gen == 3) 1810 gtt_size = 1024*1024; 1811 else 1812 gtt_size = 512*1024; 1813 1814 while (gtt_size < size) 1815 gtt_size <<= 1; 1816 1817 return gtt_size; 1818 } 1819 1820 /** 1821 * i915_gem_get_gtt_alignment - return required GTT alignment for an object 1822 * @obj: object to check 1823 * 1824 * Return the required GTT alignment for an object, taking into account 1825 * potential fence register mapping. 1826 */ 1827 static uint32_t 1828 i915_gem_get_gtt_alignment(struct drm_device *dev, 1829 uint32_t size, 1830 int tiling_mode) 1831 { 1832 /* 1833 * Minimum alignment is 4k (GTT page size), but might be greater 1834 * if a fence register is needed for the object. 1835 */ 1836 if (INTEL_INFO(dev)->gen >= 4 || 1837 tiling_mode == I915_TILING_NONE) 1838 return 4096; 1839 1840 /* 1841 * Previous chips need to be aligned to the size of the smallest 1842 * fence register that can contain the object. 1843 */ 1844 return i915_gem_get_gtt_size(dev, size, tiling_mode); 1845 } 1846 1847 /** 1848 * i915_gem_get_unfenced_gtt_alignment - return required GTT alignment for an 1849 * unfenced object 1850 * @dev: the device 1851 * @size: size of the object 1852 * @tiling_mode: tiling mode of the object 1853 * 1854 * Return the required GTT alignment for an object, only taking into account 1855 * unfenced tiled surface requirements. 1856 */ 1857 uint32_t 1858 i915_gem_get_unfenced_gtt_alignment(struct drm_device *dev, 1859 uint32_t size, 1860 int tiling_mode) 1861 { 1862 /* 1863 * Minimum alignment is 4k (GTT page size) for sane hw. 1864 */ 1865 if (INTEL_INFO(dev)->gen >= 4 || IS_G33(dev) || 1866 tiling_mode == I915_TILING_NONE) 1867 return 4096; 1868 1869 /* Previous hardware however needs to be aligned to a power-of-two 1870 * tile height. The simplest method for determining this is to reuse 1871 * the power-of-tile object size. 1872 */ 1873 return i915_gem_get_gtt_size(dev, size, tiling_mode); 1874 } 1875 1876 static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj) 1877 { 1878 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 1879 int ret; 1880 1881 if (obj->base.map_list.map) 1882 return 0; 1883 1884 dev_priv->mm.shrinker_no_lock_stealing = true; 1885 1886 ret = drm_gem_create_mmap_offset(&obj->base); 1887 if (ret != -ENOSPC) 1888 goto out; 1889 1890 /* Badly fragmented mmap space? The only way we can recover 1891 * space is by destroying unwanted objects. We can't randomly release 1892 * mmap_offsets as userspace expects them to be persistent for the 1893 * lifetime of the objects. The closest we can is to release the 1894 * offsets on purgeable objects by truncating it and marking it purged, 1895 * which prevents userspace from ever using that object again. 1896 */ 1897 i915_gem_purge(dev_priv, obj->base.size >> PAGE_SHIFT); 1898 ret = drm_gem_create_mmap_offset(&obj->base); 1899 if (ret != -ENOSPC) 1900 goto out; 1901 1902 i915_gem_shrink_all(dev_priv); 1903 ret = drm_gem_create_mmap_offset(&obj->base); 1904 out: 1905 dev_priv->mm.shrinker_no_lock_stealing = false; 1906 1907 return ret; 1908 } 1909 1910 static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj) 1911 { 1912 if (!obj->base.map_list.map) 1913 return; 1914 1915 drm_gem_free_mmap_offset(&obj->base); 1916 } 1917 1918 int 1919 i915_gem_mmap_gtt(struct drm_file *file, 1920 struct drm_device *dev, 1921 uint32_t handle, 1922 uint64_t *offset) 1923 { 1924 struct drm_i915_private *dev_priv = dev->dev_private; 1925 struct drm_i915_gem_object *obj; 1926 int ret; 1927 1928 ret = i915_mutex_lock_interruptible(dev); 1929 if (ret) 1930 return ret; 1931 1932 obj = to_intel_bo(drm_gem_object_lookup(dev, file, handle)); 1933 if (&obj->base == NULL) { 1934 ret = -ENOENT; 1935 goto unlock; 1936 } 1937 1938 if (obj->base.size > dev_priv->mm.gtt_mappable_end) { 1939 ret = -E2BIG; 1940 goto out; 1941 } 1942 1943 if (obj->madv != I915_MADV_WILLNEED) { 1944 DRM_ERROR("Attempting to mmap a purgeable buffer\n"); 1945 ret = -EINVAL; 1946 goto out; 1947 } 1948 1949 ret = i915_gem_object_create_mmap_offset(obj); 1950 if (ret) 1951 goto out; 1952 1953 *offset = (u64)obj->base.map_list.hash.key << PAGE_SHIFT; 1954 1955 out: 1956 drm_gem_object_unreference(&obj->base); 1957 unlock: 1958 mutex_unlock(&dev->struct_mutex); 1959 return ret; 1960 } 1961 1962 /** 1963 * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing 1964 * @dev: DRM device 1965 * @data: GTT mapping ioctl data 1966 * @file: GEM object info 1967 * 1968 * Simply returns the fake offset to userspace so it can mmap it. 1969 * The mmap call will end up in drm_gem_mmap(), which will set things 1970 * up so we can get faults in the handler above. 1971 * 1972 * The fault handler will take care of binding the object into the GTT 1973 * (since it may have been evicted to make room for something), allocating 1974 * a fence register, and mapping the appropriate aperture address into 1975 * userspace. 1976 */ 1977 int 1978 i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data, 1979 struct drm_file *file) 1980 { 1981 struct drm_i915_gem_mmap_gtt *args = data; 1982 1983 return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset); 1984 } 1985 1986 /* Immediately discard the backing storage */ 1987 static void 1988 i915_gem_object_truncate(struct drm_i915_gem_object *obj) 1989 { 1990 #ifndef __NetBSD__ 1991 struct inode *inode; 1992 #endif 1993 1994 i915_gem_object_free_mmap_offset(obj); 1995 1996 #ifdef __NetBSD__ 1997 { 1998 struct uvm_object *const uobj = obj->base.gemo_shm_uao; 1999 2000 if (uobj != NULL) { 2001 /* XXX Calling pgo_put like this is bogus. */ 2002 mutex_enter(uobj->vmobjlock); 2003 (*uobj->pgops->pgo_put)(uobj, 0, obj->base.size, 2004 (PGO_ALLPAGES | PGO_FREE)); 2005 } 2006 } 2007 #else 2008 if (obj->base.filp == NULL) 2009 return; 2010 2011 /* Our goal here is to return as much of the memory as 2012 * is possible back to the system as we are called from OOM. 2013 * To do this we must instruct the shmfs to drop all of its 2014 * backing pages, *now*. 2015 */ 2016 inode = obj->base.filp->f_path.dentry->d_inode; 2017 shmem_truncate_range(inode, 0, (loff_t)-1); 2018 #endif 2019 2020 obj->madv = __I915_MADV_PURGED; 2021 } 2022 2023 static inline int 2024 i915_gem_object_is_purgeable(struct drm_i915_gem_object *obj) 2025 { 2026 return obj->madv == I915_MADV_DONTNEED; 2027 } 2028 2029 #ifdef __NetBSD__ 2030 static void 2031 i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj) 2032 { 2033 struct drm_device *const dev = obj->base.dev; 2034 int ret; 2035 2036 /* XXX Cargo-culted from the Linux code. */ 2037 BUG_ON(obj->madv == __I915_MADV_PURGED); 2038 2039 ret = i915_gem_object_set_to_cpu_domain(obj, true); 2040 if (ret) { 2041 WARN_ON(ret != -EIO); 2042 i915_gem_clflush_object(obj); 2043 obj->base.read_domains = obj->base.write_domain = 2044 I915_GEM_DOMAIN_CPU; 2045 } 2046 2047 if (i915_gem_object_needs_bit17_swizzle(obj)) 2048 i915_gem_object_save_bit_17_swizzle(obj); 2049 2050 /* XXX Maintain dirty flag? */ 2051 2052 bus_dmamap_destroy(dev->dmat, obj->igo_dmamap); 2053 bus_dmamem_unwire_uvm_object(dev->dmat, obj->base.gemo_shm_uao, 0, 2054 obj->base.size, obj->pages, obj->igo_nsegs); 2055 2056 kfree(obj->pages); 2057 } 2058 #else 2059 static void 2060 i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj) 2061 { 2062 int page_count = obj->base.size / PAGE_SIZE; 2063 struct scatterlist *sg; 2064 int ret, i; 2065 2066 BUG_ON(obj->madv == __I915_MADV_PURGED); 2067 2068 ret = i915_gem_object_set_to_cpu_domain(obj, true); 2069 if (ret) { 2070 /* In the event of a disaster, abandon all caches and 2071 * hope for the best. 2072 */ 2073 WARN_ON(ret != -EIO); 2074 i915_gem_clflush_object(obj); 2075 obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU; 2076 } 2077 2078 if (i915_gem_object_needs_bit17_swizzle(obj)) 2079 i915_gem_object_save_bit_17_swizzle(obj); 2080 2081 if (obj->madv == I915_MADV_DONTNEED) 2082 obj->dirty = 0; 2083 2084 for_each_sg(obj->pages->sgl, sg, page_count, i) { 2085 struct page *page = sg_page(sg); 2086 2087 if (obj->dirty) 2088 set_page_dirty(page); 2089 2090 if (obj->madv == I915_MADV_WILLNEED) 2091 mark_page_accessed(page); 2092 2093 page_cache_release(page); 2094 } 2095 obj->dirty = 0; 2096 2097 sg_free_table(obj->pages); 2098 kfree(obj->pages); 2099 } 2100 #endif 2101 2102 static int 2103 i915_gem_object_put_pages(struct drm_i915_gem_object *obj) 2104 { 2105 const struct drm_i915_gem_object_ops *ops = obj->ops; 2106 2107 if (obj->pages == NULL) 2108 return 0; 2109 2110 BUG_ON(obj->gtt_space); 2111 2112 if (obj->pages_pin_count) 2113 return -EBUSY; 2114 2115 /* ->put_pages might need to allocate memory for the bit17 swizzle 2116 * array, hence protect them from being reaped by removing them from gtt 2117 * lists early. */ 2118 list_del(&obj->gtt_list); 2119 2120 ops->put_pages(obj); 2121 obj->pages = NULL; 2122 2123 if (i915_gem_object_is_purgeable(obj)) 2124 i915_gem_object_truncate(obj); 2125 2126 return 0; 2127 } 2128 2129 static long 2130 __i915_gem_shrink(struct drm_i915_private *dev_priv, long target, 2131 bool purgeable_only) 2132 { 2133 struct drm_i915_gem_object *obj, *next; 2134 long count = 0; 2135 2136 list_for_each_entry_safe(obj, next, 2137 &dev_priv->mm.unbound_list, 2138 gtt_list) { 2139 if ((i915_gem_object_is_purgeable(obj) || !purgeable_only) && 2140 i915_gem_object_put_pages(obj) == 0) { 2141 count += obj->base.size >> PAGE_SHIFT; 2142 if (count >= target) 2143 return count; 2144 } 2145 } 2146 2147 list_for_each_entry_safe(obj, next, 2148 &dev_priv->mm.inactive_list, 2149 mm_list) { 2150 if ((i915_gem_object_is_purgeable(obj) || !purgeable_only) && 2151 i915_gem_object_unbind(obj) == 0 && 2152 i915_gem_object_put_pages(obj) == 0) { 2153 count += obj->base.size >> PAGE_SHIFT; 2154 if (count >= target) 2155 return count; 2156 } 2157 } 2158 2159 return count; 2160 } 2161 2162 static long 2163 i915_gem_purge(struct drm_i915_private *dev_priv, long target) 2164 { 2165 return __i915_gem_shrink(dev_priv, target, true); 2166 } 2167 2168 static void 2169 i915_gem_shrink_all(struct drm_i915_private *dev_priv) 2170 { 2171 struct drm_i915_gem_object *obj, *next; 2172 2173 i915_gem_evict_everything(dev_priv->dev); 2174 2175 list_for_each_entry_safe(obj, next, &dev_priv->mm.unbound_list, gtt_list) 2176 i915_gem_object_put_pages(obj); 2177 } 2178 2179 #ifdef __NetBSD__ 2180 static int 2181 i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) 2182 { 2183 struct drm_device *const dev = obj->base.dev; 2184 struct vm_page *page; 2185 int error; 2186 2187 /* XXX Cargo-culted from the Linux code. */ 2188 BUG_ON(obj->base.read_domains & I915_GEM_GPU_DOMAINS); 2189 BUG_ON(obj->base.write_domain & I915_GEM_GPU_DOMAINS); 2190 2191 KASSERT(obj->pages == NULL); 2192 TAILQ_INIT(&obj->igo_pageq); 2193 obj->pages = kcalloc((obj->base.size / PAGE_SIZE), 2194 sizeof(obj->pages[0]), GFP_KERNEL); 2195 if (obj->pages == NULL) { 2196 error = -ENOMEM; 2197 goto fail0; 2198 } 2199 2200 /* XXX errno NetBSD->Linux */ 2201 error = -bus_dmamem_wire_uvm_object(dev->dmat, obj->base.gemo_shm_uao, 2202 0, obj->base.size, &obj->igo_pageq, PAGE_SIZE, 0, obj->pages, 2203 (obj->base.size / PAGE_SIZE), &obj->igo_nsegs, BUS_DMA_NOWAIT); 2204 if (error) 2205 /* XXX Try i915_gem_purge, i915_gem_shrink_all. */ 2206 goto fail1; 2207 KASSERT(0 < obj->igo_nsegs); 2208 KASSERT(obj->igo_nsegs <= (obj->base.size / PAGE_SIZE)); 2209 2210 /* 2211 * Check that the paddrs will fit in 40 bits, or 32 bits on i965. 2212 * 2213 * XXX This is wrong; we ought to pass this constraint to 2214 * bus_dmamem_wire_uvm_object instead. 2215 */ 2216 TAILQ_FOREACH(page, &obj->igo_pageq, pageq.queue) { 2217 const uint64_t mask = 2218 (IS_BROADWATER(dev) || IS_CRESTLINE(dev)? 2219 0xffffffffULL : 0xffffffffffULL); 2220 if (VM_PAGE_TO_PHYS(page) & ~mask) { 2221 DRM_ERROR("GEM physical address exceeds %u bits" 2222 ": %"PRIxMAX"\n", 2223 popcount64(mask), 2224 (uintmax_t)VM_PAGE_TO_PHYS(page)); 2225 error = -EIO; 2226 goto fail2; 2227 } 2228 } 2229 2230 /* XXX errno NetBSD->Linux */ 2231 error = -bus_dmamap_create(dev->dmat, obj->base.size, obj->igo_nsegs, 2232 PAGE_SIZE, 0, BUS_DMA_NOWAIT, &obj->igo_dmamap); 2233 if (error) 2234 goto fail2; 2235 2236 /* XXX Cargo-culted from the Linux code. */ 2237 if (i915_gem_object_needs_bit17_swizzle(obj)) 2238 i915_gem_object_do_bit_17_swizzle(obj); 2239 2240 /* Success! */ 2241 return 0; 2242 2243 fail2: bus_dmamem_unwire_uvm_object(dev->dmat, obj->base.gemo_shm_uao, 0, 2244 obj->base.size, obj->pages, (obj->base.size / PAGE_SIZE)); 2245 fail1: kfree(obj->pages); 2246 obj->pages = NULL; 2247 fail0: KASSERT(error); 2248 return error; 2249 } 2250 #else 2251 static int 2252 i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) 2253 { 2254 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 2255 int page_count, i; 2256 struct address_space *mapping; 2257 struct sg_table *st; 2258 struct scatterlist *sg; 2259 struct page *page; 2260 gfp_t gfp; 2261 2262 /* Assert that the object is not currently in any GPU domain. As it 2263 * wasn't in the GTT, there shouldn't be any way it could have been in 2264 * a GPU cache 2265 */ 2266 BUG_ON(obj->base.read_domains & I915_GEM_GPU_DOMAINS); 2267 BUG_ON(obj->base.write_domain & I915_GEM_GPU_DOMAINS); 2268 2269 st = kmalloc(sizeof(*st), GFP_KERNEL); 2270 if (st == NULL) 2271 return -ENOMEM; 2272 2273 page_count = obj->base.size / PAGE_SIZE; 2274 if (sg_alloc_table(st, page_count, GFP_KERNEL)) { 2275 sg_free_table(st); 2276 kfree(st); 2277 return -ENOMEM; 2278 } 2279 2280 /* Get the list of pages out of our struct file. They'll be pinned 2281 * at this point until we release them. 2282 * 2283 * Fail silently without starting the shrinker 2284 */ 2285 mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping; 2286 gfp = mapping_gfp_mask(mapping); 2287 gfp |= __GFP_NORETRY | __GFP_NOWARN | __GFP_NO_KSWAPD; 2288 gfp &= ~(__GFP_IO | __GFP_WAIT); 2289 for_each_sg(st->sgl, sg, page_count, i) { 2290 page = shmem_read_mapping_page_gfp(mapping, i, gfp); 2291 if (IS_ERR(page)) { 2292 i915_gem_purge(dev_priv, page_count); 2293 page = shmem_read_mapping_page_gfp(mapping, i, gfp); 2294 } 2295 if (IS_ERR(page)) { 2296 /* We've tried hard to allocate the memory by reaping 2297 * our own buffer, now let the real VM do its job and 2298 * go down in flames if truly OOM. 2299 */ 2300 gfp &= ~(__GFP_NORETRY | __GFP_NOWARN | __GFP_NO_KSWAPD); 2301 gfp |= __GFP_IO | __GFP_WAIT; 2302 2303 i915_gem_shrink_all(dev_priv); 2304 page = shmem_read_mapping_page_gfp(mapping, i, gfp); 2305 if (IS_ERR(page)) 2306 goto err_pages; 2307 2308 gfp |= __GFP_NORETRY | __GFP_NOWARN | __GFP_NO_KSWAPD; 2309 gfp &= ~(__GFP_IO | __GFP_WAIT); 2310 } 2311 2312 sg_set_page(sg, page, PAGE_SIZE, 0); 2313 } 2314 2315 obj->pages = st; 2316 2317 if (i915_gem_object_needs_bit17_swizzle(obj)) 2318 i915_gem_object_do_bit_17_swizzle(obj); 2319 2320 return 0; 2321 2322 err_pages: 2323 for_each_sg(st->sgl, sg, i, page_count) 2324 page_cache_release(sg_page(sg)); 2325 sg_free_table(st); 2326 kfree(st); 2327 return PTR_ERR(page); 2328 } 2329 #endif 2330 2331 /* Ensure that the associated pages are gathered from the backing storage 2332 * and pinned into our object. i915_gem_object_get_pages() may be called 2333 * multiple times before they are released by a single call to 2334 * i915_gem_object_put_pages() - once the pages are no longer referenced 2335 * either as a result of memory pressure (reaping pages under the shrinker) 2336 * or as the object is itself released. 2337 */ 2338 int 2339 i915_gem_object_get_pages(struct drm_i915_gem_object *obj) 2340 { 2341 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 2342 const struct drm_i915_gem_object_ops *ops = obj->ops; 2343 int ret; 2344 2345 if (obj->pages) 2346 return 0; 2347 2348 BUG_ON(obj->pages_pin_count); 2349 2350 ret = ops->get_pages(obj); 2351 if (ret) 2352 return ret; 2353 2354 list_add_tail(&obj->gtt_list, &dev_priv->mm.unbound_list); 2355 return 0; 2356 } 2357 2358 void 2359 i915_gem_object_move_to_active(struct drm_i915_gem_object *obj, 2360 struct intel_ring_buffer *ring) 2361 { 2362 struct drm_device *dev = obj->base.dev; 2363 struct drm_i915_private *dev_priv = dev->dev_private; 2364 u32 seqno = intel_ring_get_seqno(ring); 2365 2366 BUG_ON(ring == NULL); 2367 obj->ring = ring; 2368 2369 /* Add a reference if we're newly entering the active list. */ 2370 if (!obj->active) { 2371 drm_gem_object_reference(&obj->base); 2372 obj->active = 1; 2373 } 2374 2375 /* Move from whatever list we were on to the tail of execution. */ 2376 list_move_tail(&obj->mm_list, &dev_priv->mm.active_list); 2377 list_move_tail(&obj->ring_list, &ring->active_list); 2378 2379 obj->last_read_seqno = seqno; 2380 2381 if (obj->fenced_gpu_access) { 2382 obj->last_fenced_seqno = seqno; 2383 2384 /* Bump MRU to take account of the delayed flush */ 2385 if (obj->fence_reg != I915_FENCE_REG_NONE) { 2386 struct drm_i915_fence_reg *reg; 2387 2388 reg = &dev_priv->fence_regs[obj->fence_reg]; 2389 list_move_tail(®->lru_list, 2390 &dev_priv->mm.fence_list); 2391 } 2392 } 2393 } 2394 2395 static void 2396 i915_gem_object_move_to_inactive(struct drm_i915_gem_object *obj) 2397 { 2398 struct drm_device *dev = obj->base.dev; 2399 struct drm_i915_private *dev_priv = dev->dev_private; 2400 2401 BUG_ON(obj->base.write_domain & ~I915_GEM_GPU_DOMAINS); 2402 BUG_ON(!obj->active); 2403 2404 if (obj->pin_count) /* are we a framebuffer? */ 2405 intel_mark_fb_idle(obj); 2406 2407 list_move_tail(&obj->mm_list, &dev_priv->mm.inactive_list); 2408 2409 list_del_init(&obj->ring_list); 2410 obj->ring = NULL; 2411 2412 obj->last_read_seqno = 0; 2413 obj->last_write_seqno = 0; 2414 obj->base.write_domain = 0; 2415 2416 obj->last_fenced_seqno = 0; 2417 obj->fenced_gpu_access = false; 2418 2419 obj->active = 0; 2420 drm_gem_object_unreference(&obj->base); 2421 2422 WARN_ON(i915_verify_lists(dev)); 2423 } 2424 2425 static int 2426 i915_gem_handle_seqno_wrap(struct drm_device *dev) 2427 { 2428 struct drm_i915_private *dev_priv = dev->dev_private; 2429 struct intel_ring_buffer *ring; 2430 int ret, i, j; 2431 2432 /* The hardware uses various monotonic 32-bit counters, if we 2433 * detect that they will wraparound we need to idle the GPU 2434 * and reset those counters. 2435 */ 2436 ret = 0; 2437 for_each_ring(ring, dev_priv, i) { 2438 for (j = 0; j < ARRAY_SIZE(ring->sync_seqno); j++) 2439 ret |= ring->sync_seqno[j] != 0; 2440 } 2441 if (ret == 0) 2442 return ret; 2443 2444 ret = i915_gpu_idle(dev); 2445 if (ret) 2446 return ret; 2447 2448 i915_gem_retire_requests(dev); 2449 for_each_ring(ring, dev_priv, i) { 2450 for (j = 0; j < ARRAY_SIZE(ring->sync_seqno); j++) 2451 ring->sync_seqno[j] = 0; 2452 } 2453 2454 return 0; 2455 } 2456 2457 int 2458 i915_gem_get_seqno(struct drm_device *dev, u32 *seqno) 2459 { 2460 struct drm_i915_private *dev_priv = dev->dev_private; 2461 2462 /* reserve 0 for non-seqno */ 2463 if (dev_priv->next_seqno == 0) { 2464 int ret = i915_gem_handle_seqno_wrap(dev); 2465 if (ret) 2466 return ret; 2467 2468 dev_priv->next_seqno = 1; 2469 } 2470 2471 *seqno = dev_priv->next_seqno++; 2472 return 0; 2473 } 2474 2475 int 2476 i915_add_request(struct intel_ring_buffer *ring, 2477 struct drm_file *file, 2478 u32 *out_seqno) 2479 { 2480 drm_i915_private_t *dev_priv = ring->dev->dev_private; 2481 struct drm_i915_gem_request *request; 2482 u32 request_ring_position; 2483 int was_empty; 2484 int ret; 2485 2486 /* 2487 * Emit any outstanding flushes - execbuf can fail to emit the flush 2488 * after having emitted the batchbuffer command. Hence we need to fix 2489 * things up similar to emitting the lazy request. The difference here 2490 * is that the flush _must_ happen before the next request, no matter 2491 * what. 2492 */ 2493 ret = intel_ring_flush_all_caches(ring); 2494 if (ret) 2495 return ret; 2496 2497 request = kmalloc(sizeof(*request), GFP_KERNEL); 2498 if (request == NULL) 2499 return -ENOMEM; 2500 2501 2502 /* Record the position of the start of the request so that 2503 * should we detect the updated seqno part-way through the 2504 * GPU processing the request, we never over-estimate the 2505 * position of the head. 2506 */ 2507 request_ring_position = intel_ring_get_tail(ring); 2508 2509 ret = ring->add_request(ring); 2510 if (ret) { 2511 kfree(request); 2512 return ret; 2513 } 2514 2515 request->seqno = intel_ring_get_seqno(ring); 2516 request->ring = ring; 2517 request->tail = request_ring_position; 2518 request->emitted_jiffies = jiffies; 2519 was_empty = list_empty(&ring->request_list); 2520 list_add_tail(&request->list, &ring->request_list); 2521 request->file_priv = NULL; 2522 2523 if (file) { 2524 struct drm_i915_file_private *file_priv = file->driver_priv; 2525 2526 spin_lock(&file_priv->mm.lock); 2527 request->file_priv = file_priv; 2528 list_add_tail(&request->client_list, 2529 &file_priv->mm.request_list); 2530 spin_unlock(&file_priv->mm.lock); 2531 } 2532 2533 trace_i915_gem_request_add(ring, request->seqno); 2534 ring->outstanding_lazy_request = 0; 2535 2536 if (!dev_priv->mm.suspended) { 2537 if (i915_enable_hangcheck) { 2538 mod_timer(&dev_priv->hangcheck_timer, 2539 round_jiffies_up(jiffies + DRM_I915_HANGCHECK_JIFFIES)); 2540 } 2541 if (was_empty) { 2542 queue_delayed_work(dev_priv->wq, 2543 &dev_priv->mm.retire_work, 2544 round_jiffies_up_relative(HZ)); 2545 intel_mark_busy(dev_priv->dev); 2546 } 2547 } 2548 2549 if (out_seqno) 2550 *out_seqno = request->seqno; 2551 return 0; 2552 } 2553 2554 static inline void 2555 i915_gem_request_remove_from_client(struct drm_i915_gem_request *request) 2556 { 2557 struct drm_i915_file_private *file_priv = request->file_priv; 2558 2559 if (!file_priv) 2560 return; 2561 2562 spin_lock(&file_priv->mm.lock); 2563 if (request->file_priv) { 2564 list_del(&request->client_list); 2565 request->file_priv = NULL; 2566 } 2567 spin_unlock(&file_priv->mm.lock); 2568 } 2569 2570 static void i915_gem_reset_ring_lists(struct drm_i915_private *dev_priv, 2571 struct intel_ring_buffer *ring) 2572 { 2573 while (!list_empty(&ring->request_list)) { 2574 struct drm_i915_gem_request *request; 2575 2576 request = list_first_entry(&ring->request_list, 2577 struct drm_i915_gem_request, 2578 list); 2579 2580 list_del(&request->list); 2581 i915_gem_request_remove_from_client(request); 2582 kfree(request); 2583 } 2584 2585 while (!list_empty(&ring->active_list)) { 2586 struct drm_i915_gem_object *obj; 2587 2588 obj = list_first_entry(&ring->active_list, 2589 struct drm_i915_gem_object, 2590 ring_list); 2591 2592 i915_gem_object_move_to_inactive(obj); 2593 } 2594 } 2595 2596 static void i915_gem_reset_fences(struct drm_device *dev) 2597 { 2598 struct drm_i915_private *dev_priv = dev->dev_private; 2599 int i; 2600 2601 for (i = 0; i < dev_priv->num_fence_regs; i++) { 2602 struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i]; 2603 2604 i915_gem_write_fence(dev, i, NULL); 2605 2606 if (reg->obj) 2607 i915_gem_object_fence_lost(reg->obj); 2608 2609 reg->pin_count = 0; 2610 reg->obj = NULL; 2611 INIT_LIST_HEAD(®->lru_list); 2612 } 2613 2614 INIT_LIST_HEAD(&dev_priv->mm.fence_list); 2615 } 2616 2617 void i915_gem_reset(struct drm_device *dev) 2618 { 2619 struct drm_i915_private *dev_priv = dev->dev_private; 2620 struct drm_i915_gem_object *obj; 2621 struct intel_ring_buffer *ring; 2622 int i; 2623 2624 for_each_ring(ring, dev_priv, i) 2625 i915_gem_reset_ring_lists(dev_priv, ring); 2626 2627 /* Move everything out of the GPU domains to ensure we do any 2628 * necessary invalidation upon reuse. 2629 */ 2630 list_for_each_entry(obj, 2631 &dev_priv->mm.inactive_list, 2632 mm_list) 2633 { 2634 obj->base.read_domains &= ~I915_GEM_GPU_DOMAINS; 2635 } 2636 2637 /* The fence registers are invalidated so clear them out */ 2638 i915_gem_reset_fences(dev); 2639 } 2640 2641 /** 2642 * This function clears the request list as sequence numbers are passed. 2643 */ 2644 void 2645 i915_gem_retire_requests_ring(struct intel_ring_buffer *ring) 2646 { 2647 uint32_t seqno; 2648 2649 if (list_empty(&ring->request_list)) 2650 return; 2651 2652 WARN_ON(i915_verify_lists(ring->dev)); 2653 2654 seqno = ring->get_seqno(ring, true); 2655 2656 while (!list_empty(&ring->request_list)) { 2657 struct drm_i915_gem_request *request; 2658 2659 request = list_first_entry(&ring->request_list, 2660 struct drm_i915_gem_request, 2661 list); 2662 2663 if (!i915_seqno_passed(seqno, request->seqno)) 2664 break; 2665 2666 trace_i915_gem_request_retire(ring, request->seqno); 2667 /* We know the GPU must have read the request to have 2668 * sent us the seqno + interrupt, so use the position 2669 * of tail of the request to update the last known position 2670 * of the GPU head. 2671 */ 2672 ring->last_retired_head = request->tail; 2673 2674 list_del(&request->list); 2675 i915_gem_request_remove_from_client(request); 2676 kfree(request); 2677 } 2678 2679 /* Move any buffers on the active list that are no longer referenced 2680 * by the ringbuffer to the flushing/inactive lists as appropriate. 2681 */ 2682 while (!list_empty(&ring->active_list)) { 2683 struct drm_i915_gem_object *obj; 2684 2685 obj = list_first_entry(&ring->active_list, 2686 struct drm_i915_gem_object, 2687 ring_list); 2688 2689 if (!i915_seqno_passed(seqno, obj->last_read_seqno)) 2690 break; 2691 2692 i915_gem_object_move_to_inactive(obj); 2693 } 2694 2695 if (unlikely(ring->trace_irq_seqno && 2696 i915_seqno_passed(seqno, ring->trace_irq_seqno))) { 2697 ring->irq_put(ring); 2698 ring->trace_irq_seqno = 0; 2699 } 2700 2701 WARN_ON(i915_verify_lists(ring->dev)); 2702 } 2703 2704 void 2705 i915_gem_retire_requests(struct drm_device *dev) 2706 { 2707 drm_i915_private_t *dev_priv = dev->dev_private; 2708 struct intel_ring_buffer *ring; 2709 int i; 2710 2711 for_each_ring(ring, dev_priv, i) 2712 i915_gem_retire_requests_ring(ring); 2713 } 2714 2715 static void 2716 i915_gem_retire_work_handler(struct work_struct *work) 2717 { 2718 drm_i915_private_t *dev_priv; 2719 struct drm_device *dev; 2720 struct intel_ring_buffer *ring; 2721 bool idle; 2722 int i; 2723 2724 dev_priv = container_of(work, drm_i915_private_t, 2725 mm.retire_work.work); 2726 dev = dev_priv->dev; 2727 2728 /* Come back later if the device is busy... */ 2729 if (!mutex_trylock(&dev->struct_mutex)) { 2730 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 2731 round_jiffies_up_relative(HZ)); 2732 return; 2733 } 2734 2735 i915_gem_retire_requests(dev); 2736 2737 /* Send a periodic flush down the ring so we don't hold onto GEM 2738 * objects indefinitely. 2739 */ 2740 idle = true; 2741 for_each_ring(ring, dev_priv, i) { 2742 if (ring->gpu_caches_dirty) 2743 i915_add_request(ring, NULL, NULL); 2744 2745 idle &= list_empty(&ring->request_list); 2746 } 2747 2748 if (!dev_priv->mm.suspended && !idle) 2749 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 2750 round_jiffies_up_relative(HZ)); 2751 if (idle) 2752 intel_mark_idle(dev); 2753 2754 mutex_unlock(&dev->struct_mutex); 2755 } 2756 2757 /** 2758 * Ensures that an object will eventually get non-busy by flushing any required 2759 * write domains, emitting any outstanding lazy request and retiring and 2760 * completed requests. 2761 */ 2762 static int 2763 i915_gem_object_flush_active(struct drm_i915_gem_object *obj) 2764 { 2765 int ret; 2766 2767 if (obj->active) { 2768 ret = i915_gem_check_olr(obj->ring, obj->last_read_seqno); 2769 if (ret) 2770 return ret; 2771 2772 i915_gem_retire_requests_ring(obj->ring); 2773 } 2774 2775 return 0; 2776 } 2777 2778 /** 2779 * i915_gem_wait_ioctl - implements DRM_IOCTL_I915_GEM_WAIT 2780 * @DRM_IOCTL_ARGS: standard ioctl arguments 2781 * 2782 * Returns 0 if successful, else an error is returned with the remaining time in 2783 * the timeout parameter. 2784 * -ETIME: object is still busy after timeout 2785 * -ERESTARTSYS: signal interrupted the wait 2786 * -ENONENT: object doesn't exist 2787 * Also possible, but rare: 2788 * -EAGAIN: GPU wedged 2789 * -ENOMEM: damn 2790 * -ENODEV: Internal IRQ fail 2791 * -E?: The add request failed 2792 * 2793 * The wait ioctl with a timeout of 0 reimplements the busy ioctl. With any 2794 * non-zero timeout parameter the wait ioctl will wait for the given number of 2795 * nanoseconds on an object becoming unbusy. Since the wait itself does so 2796 * without holding struct_mutex the object may become re-busied before this 2797 * function completes. A similar but shorter * race condition exists in the busy 2798 * ioctl 2799 */ 2800 int 2801 i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) 2802 { 2803 struct drm_i915_gem_wait *args = data; 2804 struct drm_i915_gem_object *obj; 2805 struct intel_ring_buffer *ring = NULL; 2806 struct timespec timeout_stack, *timeout = NULL; 2807 u32 seqno = 0; 2808 int ret = 0; 2809 2810 if (args->timeout_ns >= 0) { 2811 timeout_stack = ns_to_timespec(args->timeout_ns); 2812 timeout = &timeout_stack; 2813 } 2814 2815 ret = i915_mutex_lock_interruptible(dev); 2816 if (ret) 2817 return ret; 2818 2819 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->bo_handle)); 2820 if (&obj->base == NULL) { 2821 mutex_unlock(&dev->struct_mutex); 2822 return -ENOENT; 2823 } 2824 2825 /* Need to make sure the object gets inactive eventually. */ 2826 ret = i915_gem_object_flush_active(obj); 2827 if (ret) 2828 goto out; 2829 2830 if (obj->active) { 2831 seqno = obj->last_read_seqno; 2832 ring = obj->ring; 2833 } 2834 2835 if (seqno == 0) 2836 goto out; 2837 2838 /* Do this after OLR check to make sure we make forward progress polling 2839 * on this IOCTL with a 0 timeout (like busy ioctl) 2840 */ 2841 if (!args->timeout_ns) { 2842 ret = -ETIME; 2843 goto out; 2844 } 2845 2846 drm_gem_object_unreference(&obj->base); 2847 mutex_unlock(&dev->struct_mutex); 2848 2849 ret = __wait_seqno(ring, seqno, true, timeout); 2850 if (timeout) { 2851 WARN_ON(!timespec_valid(timeout)); 2852 args->timeout_ns = timespec_to_ns(timeout); 2853 } 2854 return ret; 2855 2856 out: 2857 drm_gem_object_unreference(&obj->base); 2858 mutex_unlock(&dev->struct_mutex); 2859 return ret; 2860 } 2861 2862 /** 2863 * i915_gem_object_sync - sync an object to a ring. 2864 * 2865 * @obj: object which may be in use on another ring. 2866 * @to: ring we wish to use the object on. May be NULL. 2867 * 2868 * This code is meant to abstract object synchronization with the GPU. 2869 * Calling with NULL implies synchronizing the object with the CPU 2870 * rather than a particular GPU ring. 2871 * 2872 * Returns 0 if successful, else propagates up the lower layer error. 2873 */ 2874 int 2875 i915_gem_object_sync(struct drm_i915_gem_object *obj, 2876 struct intel_ring_buffer *to) 2877 { 2878 struct intel_ring_buffer *from = obj->ring; 2879 u32 seqno; 2880 int ret, idx; 2881 2882 if (from == NULL || to == from) 2883 return 0; 2884 2885 if (to == NULL || !i915_semaphore_is_enabled(obj->base.dev)) 2886 return i915_gem_object_wait_rendering(obj, false); 2887 2888 idx = intel_ring_sync_index(from, to); 2889 2890 seqno = obj->last_read_seqno; 2891 if (seqno <= from->sync_seqno[idx]) 2892 return 0; 2893 2894 ret = i915_gem_check_olr(obj->ring, seqno); 2895 if (ret) 2896 return ret; 2897 2898 ret = to->sync_to(to, from, seqno); 2899 if (!ret) 2900 /* We use last_read_seqno because sync_to() 2901 * might have just caused seqno wrap under 2902 * the radar. 2903 */ 2904 from->sync_seqno[idx] = obj->last_read_seqno; 2905 2906 return ret; 2907 } 2908 2909 static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj) 2910 { 2911 u32 old_write_domain, old_read_domains; 2912 2913 /* Act a barrier for all accesses through the GTT */ 2914 mb(); 2915 2916 /* Force a pagefault for domain tracking on next user access */ 2917 i915_gem_release_mmap(obj); 2918 2919 if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0) 2920 return; 2921 2922 old_read_domains = obj->base.read_domains; 2923 old_write_domain = obj->base.write_domain; 2924 2925 obj->base.read_domains &= ~I915_GEM_DOMAIN_GTT; 2926 obj->base.write_domain &= ~I915_GEM_DOMAIN_GTT; 2927 2928 trace_i915_gem_object_change_domain(obj, 2929 old_read_domains, 2930 old_write_domain); 2931 } 2932 2933 /** 2934 * Unbinds an object from the GTT aperture. 2935 */ 2936 int 2937 i915_gem_object_unbind(struct drm_i915_gem_object *obj) 2938 { 2939 drm_i915_private_t *dev_priv = obj->base.dev->dev_private; 2940 int ret = 0; 2941 2942 if (obj->gtt_space == NULL) 2943 return 0; 2944 2945 if (obj->pin_count) 2946 return -EBUSY; 2947 2948 BUG_ON(obj->pages == NULL); 2949 2950 ret = i915_gem_object_finish_gpu(obj); 2951 if (ret) 2952 return ret; 2953 /* Continue on if we fail due to EIO, the GPU is hung so we 2954 * should be safe and we need to cleanup or else we might 2955 * cause memory corruption through use-after-free. 2956 */ 2957 2958 i915_gem_object_finish_gtt(obj); 2959 2960 /* release the fence reg _after_ flushing */ 2961 ret = i915_gem_object_put_fence(obj); 2962 if (ret) 2963 return ret; 2964 2965 trace_i915_gem_object_unbind(obj); 2966 2967 if (obj->has_global_gtt_mapping) 2968 i915_gem_gtt_unbind_object(obj); 2969 if (obj->has_aliasing_ppgtt_mapping) { 2970 i915_ppgtt_unbind_object(dev_priv->mm.aliasing_ppgtt, obj); 2971 obj->has_aliasing_ppgtt_mapping = 0; 2972 } 2973 i915_gem_gtt_finish_object(obj); 2974 2975 list_del(&obj->mm_list); 2976 list_move_tail(&obj->gtt_list, &dev_priv->mm.unbound_list); 2977 /* Avoid an unnecessary call to unbind on rebind. */ 2978 obj->map_and_fenceable = true; 2979 2980 drm_mm_put_block(obj->gtt_space); 2981 obj->gtt_space = NULL; 2982 obj->gtt_offset = 0; 2983 2984 return 0; 2985 } 2986 2987 int i915_gpu_idle(struct drm_device *dev) 2988 { 2989 drm_i915_private_t *dev_priv = dev->dev_private; 2990 struct intel_ring_buffer *ring; 2991 int ret, i; 2992 2993 /* Flush everything onto the inactive list. */ 2994 for_each_ring(ring, dev_priv, i) { 2995 ret = i915_switch_context(ring, NULL, DEFAULT_CONTEXT_ID); 2996 if (ret) 2997 return ret; 2998 2999 ret = intel_ring_idle(ring); 3000 if (ret) 3001 return ret; 3002 } 3003 3004 return 0; 3005 } 3006 3007 static void sandybridge_write_fence_reg(struct drm_device *dev, int reg, 3008 struct drm_i915_gem_object *obj) 3009 { 3010 drm_i915_private_t *dev_priv = dev->dev_private; 3011 uint64_t val; 3012 3013 if (obj) { 3014 u32 size = obj->gtt_space->size; 3015 3016 val = (uint64_t)((obj->gtt_offset + size - 4096) & 3017 0xfffff000) << 32; 3018 val |= obj->gtt_offset & 0xfffff000; 3019 val |= (uint64_t)((obj->stride / 128) - 1) << 3020 SANDYBRIDGE_FENCE_PITCH_SHIFT; 3021 3022 if (obj->tiling_mode == I915_TILING_Y) 3023 val |= 1 << I965_FENCE_TILING_Y_SHIFT; 3024 val |= I965_FENCE_REG_VALID; 3025 } else 3026 val = 0; 3027 3028 I915_WRITE64(FENCE_REG_SANDYBRIDGE_0 + reg * 8, val); 3029 POSTING_READ(FENCE_REG_SANDYBRIDGE_0 + reg * 8); 3030 } 3031 3032 static void i965_write_fence_reg(struct drm_device *dev, int reg, 3033 struct drm_i915_gem_object *obj) 3034 { 3035 drm_i915_private_t *dev_priv = dev->dev_private; 3036 uint64_t val; 3037 3038 if (obj) { 3039 u32 size = obj->gtt_space->size; 3040 3041 val = (uint64_t)((obj->gtt_offset + size - 4096) & 3042 0xfffff000) << 32; 3043 val |= obj->gtt_offset & 0xfffff000; 3044 val |= ((obj->stride / 128) - 1) << I965_FENCE_PITCH_SHIFT; 3045 if (obj->tiling_mode == I915_TILING_Y) 3046 val |= 1 << I965_FENCE_TILING_Y_SHIFT; 3047 val |= I965_FENCE_REG_VALID; 3048 } else 3049 val = 0; 3050 3051 I915_WRITE64(FENCE_REG_965_0 + reg * 8, val); 3052 POSTING_READ(FENCE_REG_965_0 + reg * 8); 3053 } 3054 3055 static void i915_write_fence_reg(struct drm_device *dev, int reg, 3056 struct drm_i915_gem_object *obj) 3057 { 3058 drm_i915_private_t *dev_priv = dev->dev_private; 3059 u32 val; 3060 3061 if (obj) { 3062 u32 size = obj->gtt_space->size; 3063 int pitch_val; 3064 int tile_width; 3065 3066 WARN((obj->gtt_offset & ~I915_FENCE_START_MASK) || 3067 (size & -size) != size || 3068 (obj->gtt_offset & (size - 1)), 3069 "object 0x%08x [fenceable? %d] not 1M or pot-size (0x%08x) aligned\n", 3070 obj->gtt_offset, obj->map_and_fenceable, size); 3071 3072 if (obj->tiling_mode == I915_TILING_Y && HAS_128_BYTE_Y_TILING(dev)) 3073 tile_width = 128; 3074 else 3075 tile_width = 512; 3076 3077 /* Note: pitch better be a power of two tile widths */ 3078 pitch_val = obj->stride / tile_width; 3079 pitch_val = ffs(pitch_val) - 1; 3080 3081 val = obj->gtt_offset; 3082 if (obj->tiling_mode == I915_TILING_Y) 3083 val |= 1 << I830_FENCE_TILING_Y_SHIFT; 3084 val |= I915_FENCE_SIZE_BITS(size); 3085 val |= pitch_val << I830_FENCE_PITCH_SHIFT; 3086 val |= I830_FENCE_REG_VALID; 3087 } else 3088 val = 0; 3089 3090 if (reg < 8) 3091 reg = FENCE_REG_830_0 + reg * 4; 3092 else 3093 reg = FENCE_REG_945_8 + (reg - 8) * 4; 3094 3095 I915_WRITE(reg, val); 3096 POSTING_READ(reg); 3097 } 3098 3099 static void i830_write_fence_reg(struct drm_device *dev, int reg, 3100 struct drm_i915_gem_object *obj) 3101 { 3102 drm_i915_private_t *dev_priv = dev->dev_private; 3103 uint32_t val; 3104 3105 if (obj) { 3106 u32 size = obj->gtt_space->size; 3107 uint32_t pitch_val; 3108 3109 WARN((obj->gtt_offset & ~I830_FENCE_START_MASK) || 3110 (size & -size) != size || 3111 (obj->gtt_offset & (size - 1)), 3112 "object 0x%08x not 512K or pot-size 0x%08x aligned\n", 3113 obj->gtt_offset, size); 3114 3115 pitch_val = obj->stride / 128; 3116 pitch_val = ffs(pitch_val) - 1; 3117 3118 val = obj->gtt_offset; 3119 if (obj->tiling_mode == I915_TILING_Y) 3120 val |= 1 << I830_FENCE_TILING_Y_SHIFT; 3121 val |= I830_FENCE_SIZE_BITS(size); 3122 val |= pitch_val << I830_FENCE_PITCH_SHIFT; 3123 val |= I830_FENCE_REG_VALID; 3124 } else 3125 val = 0; 3126 3127 I915_WRITE(FENCE_REG_830_0 + reg * 4, val); 3128 POSTING_READ(FENCE_REG_830_0 + reg * 4); 3129 } 3130 3131 static void i915_gem_write_fence(struct drm_device *dev, int reg, 3132 struct drm_i915_gem_object *obj) 3133 { 3134 switch (INTEL_INFO(dev)->gen) { 3135 case 7: 3136 case 6: sandybridge_write_fence_reg(dev, reg, obj); break; 3137 case 5: 3138 case 4: i965_write_fence_reg(dev, reg, obj); break; 3139 case 3: i915_write_fence_reg(dev, reg, obj); break; 3140 case 2: i830_write_fence_reg(dev, reg, obj); break; 3141 default: break; 3142 } 3143 } 3144 3145 static inline int fence_number(struct drm_i915_private *dev_priv, 3146 struct drm_i915_fence_reg *fence) 3147 { 3148 return fence - dev_priv->fence_regs; 3149 } 3150 3151 static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj, 3152 struct drm_i915_fence_reg *fence, 3153 bool enable) 3154 { 3155 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 3156 int reg = fence_number(dev_priv, fence); 3157 3158 i915_gem_write_fence(obj->base.dev, reg, enable ? obj : NULL); 3159 3160 if (enable) { 3161 obj->fence_reg = reg; 3162 fence->obj = obj; 3163 list_move_tail(&fence->lru_list, &dev_priv->mm.fence_list); 3164 } else { 3165 obj->fence_reg = I915_FENCE_REG_NONE; 3166 fence->obj = NULL; 3167 list_del_init(&fence->lru_list); 3168 } 3169 } 3170 3171 static int 3172 i915_gem_object_flush_fence(struct drm_i915_gem_object *obj) 3173 { 3174 if (obj->last_fenced_seqno) { 3175 int ret = i915_wait_seqno(obj->ring, obj->last_fenced_seqno); 3176 if (ret) 3177 return ret; 3178 3179 obj->last_fenced_seqno = 0; 3180 } 3181 3182 /* Ensure that all CPU reads are completed before installing a fence 3183 * and all writes before removing the fence. 3184 */ 3185 if (obj->base.read_domains & I915_GEM_DOMAIN_GTT) 3186 mb(); 3187 3188 obj->fenced_gpu_access = false; 3189 return 0; 3190 } 3191 3192 int 3193 i915_gem_object_put_fence(struct drm_i915_gem_object *obj) 3194 { 3195 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 3196 int ret; 3197 3198 ret = i915_gem_object_flush_fence(obj); 3199 if (ret) 3200 return ret; 3201 3202 if (obj->fence_reg == I915_FENCE_REG_NONE) 3203 return 0; 3204 3205 i915_gem_object_update_fence(obj, 3206 &dev_priv->fence_regs[obj->fence_reg], 3207 false); 3208 i915_gem_object_fence_lost(obj); 3209 3210 return 0; 3211 } 3212 3213 static struct drm_i915_fence_reg * 3214 i915_find_fence_reg(struct drm_device *dev) 3215 { 3216 struct drm_i915_private *dev_priv = dev->dev_private; 3217 struct drm_i915_fence_reg *reg, *avail; 3218 int i; 3219 3220 /* First try to find a free reg */ 3221 avail = NULL; 3222 for (i = dev_priv->fence_reg_start; i < dev_priv->num_fence_regs; i++) { 3223 reg = &dev_priv->fence_regs[i]; 3224 if (!reg->obj) 3225 return reg; 3226 3227 if (!reg->pin_count) 3228 avail = reg; 3229 } 3230 3231 if (avail == NULL) 3232 return NULL; 3233 3234 /* None available, try to steal one or wait for a user to finish */ 3235 list_for_each_entry(reg, &dev_priv->mm.fence_list, lru_list) { 3236 if (reg->pin_count) 3237 continue; 3238 3239 return reg; 3240 } 3241 3242 return NULL; 3243 } 3244 3245 /** 3246 * i915_gem_object_get_fence - set up fencing for an object 3247 * @obj: object to map through a fence reg 3248 * 3249 * When mapping objects through the GTT, userspace wants to be able to write 3250 * to them without having to worry about swizzling if the object is tiled. 3251 * This function walks the fence regs looking for a free one for @obj, 3252 * stealing one if it can't find any. 3253 * 3254 * It then sets up the reg based on the object's properties: address, pitch 3255 * and tiling format. 3256 * 3257 * For an untiled surface, this removes any existing fence. 3258 */ 3259 int 3260 i915_gem_object_get_fence(struct drm_i915_gem_object *obj) 3261 { 3262 struct drm_device *dev = obj->base.dev; 3263 struct drm_i915_private *dev_priv = dev->dev_private; 3264 bool enable = obj->tiling_mode != I915_TILING_NONE; 3265 struct drm_i915_fence_reg *reg; 3266 int ret; 3267 3268 /* Have we updated the tiling parameters upon the object and so 3269 * will need to serialise the write to the associated fence register? 3270 */ 3271 if (obj->fence_dirty) { 3272 ret = i915_gem_object_flush_fence(obj); 3273 if (ret) 3274 return ret; 3275 } 3276 3277 /* Just update our place in the LRU if our fence is getting reused. */ 3278 if (obj->fence_reg != I915_FENCE_REG_NONE) { 3279 reg = &dev_priv->fence_regs[obj->fence_reg]; 3280 if (!obj->fence_dirty) { 3281 list_move_tail(®->lru_list, 3282 &dev_priv->mm.fence_list); 3283 return 0; 3284 } 3285 } else if (enable) { 3286 reg = i915_find_fence_reg(dev); 3287 if (reg == NULL) 3288 return -EDEADLK; 3289 3290 if (reg->obj) { 3291 struct drm_i915_gem_object *old = reg->obj; 3292 3293 ret = i915_gem_object_flush_fence(old); 3294 if (ret) 3295 return ret; 3296 3297 i915_gem_object_fence_lost(old); 3298 } 3299 } else 3300 return 0; 3301 3302 i915_gem_object_update_fence(obj, reg, enable); 3303 obj->fence_dirty = false; 3304 3305 return 0; 3306 } 3307 3308 static bool i915_gem_valid_gtt_space(struct drm_device *dev, 3309 struct drm_mm_node *gtt_space, 3310 unsigned long cache_level) 3311 { 3312 struct drm_mm_node *other; 3313 3314 /* On non-LLC machines we have to be careful when putting differing 3315 * types of snoopable memory together to avoid the prefetcher 3316 * crossing memory domains and dieing. 3317 */ 3318 if (HAS_LLC(dev)) 3319 return true; 3320 3321 if (gtt_space == NULL) 3322 return true; 3323 3324 if (list_empty(>t_space->node_list)) 3325 return true; 3326 3327 other = list_entry(gtt_space->node_list.prev, struct drm_mm_node, node_list); 3328 if (other->allocated && !other->hole_follows && other->color != cache_level) 3329 return false; 3330 3331 other = list_entry(gtt_space->node_list.next, struct drm_mm_node, node_list); 3332 if (other->allocated && !gtt_space->hole_follows && other->color != cache_level) 3333 return false; 3334 3335 return true; 3336 } 3337 3338 static void i915_gem_verify_gtt(struct drm_device *dev) 3339 { 3340 #if WATCH_GTT 3341 struct drm_i915_private *dev_priv = dev->dev_private; 3342 struct drm_i915_gem_object *obj; 3343 int err = 0; 3344 3345 list_for_each_entry(obj, &dev_priv->mm.gtt_list, gtt_list) { 3346 if (obj->gtt_space == NULL) { 3347 printk(KERN_ERR "object found on GTT list with no space reserved\n"); 3348 err++; 3349 continue; 3350 } 3351 3352 if (obj->cache_level != obj->gtt_space->color) { 3353 printk(KERN_ERR "object reserved space [%08lx, %08lx] with wrong color, cache_level=%x, color=%lx\n", 3354 obj->gtt_space->start, 3355 obj->gtt_space->start + obj->gtt_space->size, 3356 obj->cache_level, 3357 obj->gtt_space->color); 3358 err++; 3359 continue; 3360 } 3361 3362 if (!i915_gem_valid_gtt_space(dev, 3363 obj->gtt_space, 3364 obj->cache_level)) { 3365 printk(KERN_ERR "invalid GTT space found at [%08lx, %08lx] - color=%x\n", 3366 obj->gtt_space->start, 3367 obj->gtt_space->start + obj->gtt_space->size, 3368 obj->cache_level); 3369 err++; 3370 continue; 3371 } 3372 } 3373 3374 WARN_ON(err); 3375 #endif 3376 } 3377 3378 /** 3379 * Finds free space in the GTT aperture and binds the object there. 3380 */ 3381 static int 3382 i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj, 3383 unsigned alignment, 3384 bool map_and_fenceable, 3385 bool nonblocking) 3386 { 3387 struct drm_device *dev = obj->base.dev; 3388 drm_i915_private_t *dev_priv = dev->dev_private; 3389 struct drm_mm_node *node; 3390 u32 size, fence_size, fence_alignment, unfenced_alignment; 3391 bool mappable, fenceable; 3392 int ret; 3393 3394 if (obj->madv != I915_MADV_WILLNEED) { 3395 DRM_ERROR("Attempting to bind a purgeable object\n"); 3396 return -EINVAL; 3397 } 3398 3399 fence_size = i915_gem_get_gtt_size(dev, 3400 obj->base.size, 3401 obj->tiling_mode); 3402 fence_alignment = i915_gem_get_gtt_alignment(dev, 3403 obj->base.size, 3404 obj->tiling_mode); 3405 unfenced_alignment = 3406 i915_gem_get_unfenced_gtt_alignment(dev, 3407 obj->base.size, 3408 obj->tiling_mode); 3409 3410 if (alignment == 0) 3411 alignment = map_and_fenceable ? fence_alignment : 3412 unfenced_alignment; 3413 if (map_and_fenceable && alignment & (fence_alignment - 1)) { 3414 DRM_ERROR("Invalid object alignment requested %u\n", alignment); 3415 return -EINVAL; 3416 } 3417 3418 size = map_and_fenceable ? fence_size : obj->base.size; 3419 3420 /* If the object is bigger than the entire aperture, reject it early 3421 * before evicting everything in a vain attempt to find space. 3422 */ 3423 if (obj->base.size > 3424 (map_and_fenceable ? dev_priv->mm.gtt_mappable_end : dev_priv->mm.gtt_total)) { 3425 DRM_ERROR("Attempting to bind an object larger than the aperture\n"); 3426 return -E2BIG; 3427 } 3428 3429 ret = i915_gem_object_get_pages(obj); 3430 if (ret) 3431 return ret; 3432 3433 i915_gem_object_pin_pages(obj); 3434 3435 node = kzalloc(sizeof(*node), GFP_KERNEL); 3436 if (node == NULL) { 3437 i915_gem_object_unpin_pages(obj); 3438 return -ENOMEM; 3439 } 3440 3441 search_free: 3442 if (map_and_fenceable) 3443 ret = drm_mm_insert_node_in_range_generic(&dev_priv->mm.gtt_space, node, 3444 size, alignment, obj->cache_level, 3445 0, dev_priv->mm.gtt_mappable_end); 3446 else 3447 ret = drm_mm_insert_node_generic(&dev_priv->mm.gtt_space, node, 3448 size, alignment, obj->cache_level); 3449 if (ret) { 3450 ret = i915_gem_evict_something(dev, size, alignment, 3451 obj->cache_level, 3452 map_and_fenceable, 3453 nonblocking); 3454 if (ret == 0) 3455 goto search_free; 3456 3457 i915_gem_object_unpin_pages(obj); 3458 kfree(node); 3459 return ret; 3460 } 3461 if (WARN_ON(!i915_gem_valid_gtt_space(dev, node, obj->cache_level))) { 3462 i915_gem_object_unpin_pages(obj); 3463 drm_mm_put_block(node); 3464 return -EINVAL; 3465 } 3466 3467 ret = i915_gem_gtt_prepare_object(obj); 3468 if (ret) { 3469 i915_gem_object_unpin_pages(obj); 3470 drm_mm_put_block(node); 3471 return ret; 3472 } 3473 3474 list_move_tail(&obj->gtt_list, &dev_priv->mm.bound_list); 3475 list_add_tail(&obj->mm_list, &dev_priv->mm.inactive_list); 3476 3477 obj->gtt_space = node; 3478 obj->gtt_offset = node->start; 3479 3480 fenceable = 3481 node->size == fence_size && 3482 (node->start & (fence_alignment - 1)) == 0; 3483 3484 mappable = 3485 obj->gtt_offset + obj->base.size <= dev_priv->mm.gtt_mappable_end; 3486 3487 obj->map_and_fenceable = mappable && fenceable; 3488 3489 i915_gem_object_unpin_pages(obj); 3490 trace_i915_gem_object_bind(obj, map_and_fenceable); 3491 i915_gem_verify_gtt(dev); 3492 return 0; 3493 } 3494 3495 void 3496 i915_gem_clflush_object(struct drm_i915_gem_object *obj) 3497 { 3498 /* If we don't have a page list set up, then we're not pinned 3499 * to GPU, and we can ignore the cache flush because it'll happen 3500 * again at bind time. 3501 */ 3502 if (obj->pages == NULL) 3503 return; 3504 3505 /* If the GPU is snooping the contents of the CPU cache, 3506 * we do not need to manually clear the CPU cache lines. However, 3507 * the caches are only snooped when the render cache is 3508 * flushed/invalidated. As we always have to emit invalidations 3509 * and flushes when moving into and out of the RENDER domain, correct 3510 * snooping behaviour occurs naturally as the result of our domain 3511 * tracking. 3512 */ 3513 if (obj->cache_level != I915_CACHE_NONE) 3514 return; 3515 3516 trace_i915_gem_object_clflush(obj); 3517 3518 #ifdef __NetBSD__ 3519 drm_clflush_pglist(&obj->igo_pageq); 3520 #else 3521 drm_clflush_sg(obj->pages); 3522 #endif 3523 } 3524 3525 /** Flushes the GTT write domain for the object if it's dirty. */ 3526 static void 3527 i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj) 3528 { 3529 uint32_t old_write_domain; 3530 3531 if (obj->base.write_domain != I915_GEM_DOMAIN_GTT) 3532 return; 3533 3534 /* No actual flushing is required for the GTT write domain. Writes 3535 * to it immediately go to main memory as far as we know, so there's 3536 * no chipset flush. It also doesn't land in render cache. 3537 * 3538 * However, we do have to enforce the order so that all writes through 3539 * the GTT land before any writes to the device, such as updates to 3540 * the GATT itself. 3541 */ 3542 wmb(); 3543 3544 old_write_domain = obj->base.write_domain; 3545 obj->base.write_domain = 0; 3546 3547 trace_i915_gem_object_change_domain(obj, 3548 obj->base.read_domains, 3549 old_write_domain); 3550 } 3551 3552 /** Flushes the CPU write domain for the object if it's dirty. */ 3553 static void 3554 i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj) 3555 { 3556 uint32_t old_write_domain; 3557 3558 if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) 3559 return; 3560 3561 i915_gem_clflush_object(obj); 3562 i915_gem_chipset_flush(obj->base.dev); 3563 old_write_domain = obj->base.write_domain; 3564 obj->base.write_domain = 0; 3565 3566 trace_i915_gem_object_change_domain(obj, 3567 obj->base.read_domains, 3568 old_write_domain); 3569 } 3570 3571 /** 3572 * Moves a single object to the GTT read, and possibly write domain. 3573 * 3574 * This function returns when the move is complete, including waiting on 3575 * flushes to occur. 3576 */ 3577 int 3578 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) 3579 { 3580 drm_i915_private_t *dev_priv = obj->base.dev->dev_private; 3581 uint32_t old_write_domain, old_read_domains; 3582 int ret; 3583 3584 /* Not valid to be called on unbound objects. */ 3585 if (obj->gtt_space == NULL) 3586 return -EINVAL; 3587 3588 if (obj->base.write_domain == I915_GEM_DOMAIN_GTT) 3589 return 0; 3590 3591 ret = i915_gem_object_wait_rendering(obj, !write); 3592 if (ret) 3593 return ret; 3594 3595 i915_gem_object_flush_cpu_write_domain(obj); 3596 3597 old_write_domain = obj->base.write_domain; 3598 old_read_domains = obj->base.read_domains; 3599 3600 /* It should now be out of any other write domains, and we can update 3601 * the domain values for our changes. 3602 */ 3603 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_GTT) != 0); 3604 obj->base.read_domains |= I915_GEM_DOMAIN_GTT; 3605 if (write) { 3606 obj->base.read_domains = I915_GEM_DOMAIN_GTT; 3607 obj->base.write_domain = I915_GEM_DOMAIN_GTT; 3608 obj->dirty = 1; 3609 } 3610 3611 trace_i915_gem_object_change_domain(obj, 3612 old_read_domains, 3613 old_write_domain); 3614 3615 /* And bump the LRU for this access */ 3616 if (i915_gem_object_is_inactive(obj)) 3617 list_move_tail(&obj->mm_list, &dev_priv->mm.inactive_list); 3618 3619 return 0; 3620 } 3621 3622 int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, 3623 enum i915_cache_level cache_level) 3624 { 3625 struct drm_device *dev = obj->base.dev; 3626 drm_i915_private_t *dev_priv = dev->dev_private; 3627 int ret; 3628 3629 if (obj->cache_level == cache_level) 3630 return 0; 3631 3632 if (obj->pin_count) { 3633 DRM_DEBUG("can not change the cache level of pinned objects\n"); 3634 return -EBUSY; 3635 } 3636 3637 if (!i915_gem_valid_gtt_space(dev, obj->gtt_space, cache_level)) { 3638 ret = i915_gem_object_unbind(obj); 3639 if (ret) 3640 return ret; 3641 } 3642 3643 if (obj->gtt_space) { 3644 ret = i915_gem_object_finish_gpu(obj); 3645 if (ret) 3646 return ret; 3647 3648 i915_gem_object_finish_gtt(obj); 3649 3650 /* Before SandyBridge, you could not use tiling or fence 3651 * registers with snooped memory, so relinquish any fences 3652 * currently pointing to our region in the aperture. 3653 */ 3654 if (INTEL_INFO(dev)->gen < 6) { 3655 ret = i915_gem_object_put_fence(obj); 3656 if (ret) 3657 return ret; 3658 } 3659 3660 if (obj->has_global_gtt_mapping) 3661 i915_gem_gtt_bind_object(obj, cache_level); 3662 if (obj->has_aliasing_ppgtt_mapping) 3663 i915_ppgtt_bind_object(dev_priv->mm.aliasing_ppgtt, 3664 obj, cache_level); 3665 3666 obj->gtt_space->color = cache_level; 3667 } 3668 3669 if (cache_level == I915_CACHE_NONE) { 3670 u32 old_read_domains, old_write_domain; 3671 3672 /* If we're coming from LLC cached, then we haven't 3673 * actually been tracking whether the data is in the 3674 * CPU cache or not, since we only allow one bit set 3675 * in obj->write_domain and have been skipping the clflushes. 3676 * Just set it to the CPU cache for now. 3677 */ 3678 WARN_ON(obj->base.write_domain & ~I915_GEM_DOMAIN_CPU); 3679 WARN_ON(obj->base.read_domains & ~I915_GEM_DOMAIN_CPU); 3680 3681 old_read_domains = obj->base.read_domains; 3682 old_write_domain = obj->base.write_domain; 3683 3684 obj->base.read_domains = I915_GEM_DOMAIN_CPU; 3685 obj->base.write_domain = I915_GEM_DOMAIN_CPU; 3686 3687 trace_i915_gem_object_change_domain(obj, 3688 old_read_domains, 3689 old_write_domain); 3690 } 3691 3692 obj->cache_level = cache_level; 3693 i915_gem_verify_gtt(dev); 3694 return 0; 3695 } 3696 3697 int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data, 3698 struct drm_file *file) 3699 { 3700 struct drm_i915_gem_caching *args = data; 3701 struct drm_i915_gem_object *obj; 3702 int ret; 3703 3704 ret = i915_mutex_lock_interruptible(dev); 3705 if (ret) 3706 return ret; 3707 3708 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 3709 if (&obj->base == NULL) { 3710 ret = -ENOENT; 3711 goto unlock; 3712 } 3713 3714 args->caching = obj->cache_level != I915_CACHE_NONE; 3715 3716 drm_gem_object_unreference(&obj->base); 3717 unlock: 3718 mutex_unlock(&dev->struct_mutex); 3719 return ret; 3720 } 3721 3722 int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data, 3723 struct drm_file *file) 3724 { 3725 struct drm_i915_gem_caching *args = data; 3726 struct drm_i915_gem_object *obj; 3727 enum i915_cache_level level; 3728 int ret; 3729 3730 switch (args->caching) { 3731 case I915_CACHING_NONE: 3732 level = I915_CACHE_NONE; 3733 break; 3734 case I915_CACHING_CACHED: 3735 level = I915_CACHE_LLC; 3736 break; 3737 default: 3738 return -EINVAL; 3739 } 3740 3741 ret = i915_mutex_lock_interruptible(dev); 3742 if (ret) 3743 return ret; 3744 3745 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 3746 if (&obj->base == NULL) { 3747 ret = -ENOENT; 3748 goto unlock; 3749 } 3750 3751 ret = i915_gem_object_set_cache_level(obj, level); 3752 3753 drm_gem_object_unreference(&obj->base); 3754 unlock: 3755 mutex_unlock(&dev->struct_mutex); 3756 return ret; 3757 } 3758 3759 /* 3760 * Prepare buffer for display plane (scanout, cursors, etc). 3761 * Can be called from an uninterruptible phase (modesetting) and allows 3762 * any flushes to be pipelined (for pageflips). 3763 */ 3764 int 3765 i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, 3766 u32 alignment, 3767 struct intel_ring_buffer *pipelined) 3768 { 3769 u32 old_read_domains, old_write_domain; 3770 int ret; 3771 3772 if (pipelined != obj->ring) { 3773 ret = i915_gem_object_sync(obj, pipelined); 3774 if (ret) 3775 return ret; 3776 } 3777 3778 /* The display engine is not coherent with the LLC cache on gen6. As 3779 * a result, we make sure that the pinning that is about to occur is 3780 * done with uncached PTEs. This is lowest common denominator for all 3781 * chipsets. 3782 * 3783 * However for gen6+, we could do better by using the GFDT bit instead 3784 * of uncaching, which would allow us to flush all the LLC-cached data 3785 * with that bit in the PTE to main memory with just one PIPE_CONTROL. 3786 */ 3787 ret = i915_gem_object_set_cache_level(obj, I915_CACHE_NONE); 3788 if (ret) 3789 return ret; 3790 3791 /* As the user may map the buffer once pinned in the display plane 3792 * (e.g. libkms for the bootup splash), we have to ensure that we 3793 * always use map_and_fenceable for all scanout buffers. 3794 */ 3795 ret = i915_gem_object_pin(obj, alignment, true, false); 3796 if (ret) 3797 return ret; 3798 3799 i915_gem_object_flush_cpu_write_domain(obj); 3800 3801 old_write_domain = obj->base.write_domain; 3802 old_read_domains = obj->base.read_domains; 3803 3804 /* It should now be out of any other write domains, and we can update 3805 * the domain values for our changes. 3806 */ 3807 obj->base.write_domain = 0; 3808 obj->base.read_domains |= I915_GEM_DOMAIN_GTT; 3809 3810 trace_i915_gem_object_change_domain(obj, 3811 old_read_domains, 3812 old_write_domain); 3813 3814 return 0; 3815 } 3816 3817 int 3818 i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj) 3819 { 3820 int ret; 3821 3822 if ((obj->base.read_domains & I915_GEM_GPU_DOMAINS) == 0) 3823 return 0; 3824 3825 ret = i915_gem_object_wait_rendering(obj, false); 3826 if (ret) 3827 return ret; 3828 3829 /* Ensure that we invalidate the GPU's caches and TLBs. */ 3830 obj->base.read_domains &= ~I915_GEM_GPU_DOMAINS; 3831 return 0; 3832 } 3833 3834 /** 3835 * Moves a single object to the CPU read, and possibly write domain. 3836 * 3837 * This function returns when the move is complete, including waiting on 3838 * flushes to occur. 3839 */ 3840 int 3841 i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) 3842 { 3843 uint32_t old_write_domain, old_read_domains; 3844 int ret; 3845 3846 if (obj->base.write_domain == I915_GEM_DOMAIN_CPU) 3847 return 0; 3848 3849 ret = i915_gem_object_wait_rendering(obj, !write); 3850 if (ret) 3851 return ret; 3852 3853 i915_gem_object_flush_gtt_write_domain(obj); 3854 3855 old_write_domain = obj->base.write_domain; 3856 old_read_domains = obj->base.read_domains; 3857 3858 /* Flush the CPU cache if it's still invalid. */ 3859 if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) { 3860 i915_gem_clflush_object(obj); 3861 3862 obj->base.read_domains |= I915_GEM_DOMAIN_CPU; 3863 } 3864 3865 /* It should now be out of any other write domains, and we can update 3866 * the domain values for our changes. 3867 */ 3868 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_CPU) != 0); 3869 3870 /* If we're writing through the CPU, then the GPU read domains will 3871 * need to be invalidated at next use. 3872 */ 3873 if (write) { 3874 obj->base.read_domains = I915_GEM_DOMAIN_CPU; 3875 obj->base.write_domain = I915_GEM_DOMAIN_CPU; 3876 } 3877 3878 trace_i915_gem_object_change_domain(obj, 3879 old_read_domains, 3880 old_write_domain); 3881 3882 return 0; 3883 } 3884 3885 /* Throttle our rendering by waiting until the ring has completed our requests 3886 * emitted over 20 msec ago. 3887 * 3888 * Note that if we were to use the current jiffies each time around the loop, 3889 * we wouldn't escape the function with any frames outstanding if the time to 3890 * render a frame was over 20ms. 3891 * 3892 * This should get us reasonable parallelism between CPU and GPU but also 3893 * relatively low latency when blocking on a particular request to finish. 3894 */ 3895 static int 3896 i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file) 3897 { 3898 struct drm_i915_private *dev_priv = dev->dev_private; 3899 struct drm_i915_file_private *file_priv = file->driver_priv; 3900 unsigned long recent_enough = jiffies - msecs_to_jiffies(20); 3901 struct drm_i915_gem_request *request; 3902 struct intel_ring_buffer *ring = NULL; 3903 u32 seqno = 0; 3904 int ret; 3905 3906 if (atomic_read(&dev_priv->mm.wedged)) 3907 return -EIO; 3908 3909 spin_lock(&file_priv->mm.lock); 3910 list_for_each_entry(request, &file_priv->mm.request_list, client_list) { 3911 if (time_after_eq(request->emitted_jiffies, recent_enough)) 3912 break; 3913 3914 ring = request->ring; 3915 seqno = request->seqno; 3916 } 3917 spin_unlock(&file_priv->mm.lock); 3918 3919 if (seqno == 0) 3920 return 0; 3921 3922 ret = __wait_seqno(ring, seqno, true, NULL); 3923 if (ret == 0) 3924 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 0); 3925 3926 return ret; 3927 } 3928 3929 int 3930 i915_gem_object_pin(struct drm_i915_gem_object *obj, 3931 uint32_t alignment, 3932 bool map_and_fenceable, 3933 bool nonblocking) 3934 { 3935 int ret; 3936 3937 if (WARN_ON(obj->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT)) 3938 return -EBUSY; 3939 3940 if (obj->gtt_space != NULL) { 3941 if ((alignment && obj->gtt_offset & (alignment - 1)) || 3942 (map_and_fenceable && !obj->map_and_fenceable)) { 3943 WARN(obj->pin_count, 3944 "bo is already pinned with incorrect alignment:" 3945 " offset=%x, req.alignment=%x, req.map_and_fenceable=%d," 3946 " obj->map_and_fenceable=%d\n", 3947 obj->gtt_offset, alignment, 3948 map_and_fenceable, 3949 obj->map_and_fenceable); 3950 ret = i915_gem_object_unbind(obj); 3951 if (ret) 3952 return ret; 3953 } 3954 } 3955 3956 if (obj->gtt_space == NULL) { 3957 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 3958 3959 ret = i915_gem_object_bind_to_gtt(obj, alignment, 3960 map_and_fenceable, 3961 nonblocking); 3962 if (ret) 3963 return ret; 3964 3965 if (!dev_priv->mm.aliasing_ppgtt) 3966 i915_gem_gtt_bind_object(obj, obj->cache_level); 3967 } 3968 3969 if (!obj->has_global_gtt_mapping && map_and_fenceable) 3970 i915_gem_gtt_bind_object(obj, obj->cache_level); 3971 3972 obj->pin_count++; 3973 obj->pin_mappable |= map_and_fenceable; 3974 3975 return 0; 3976 } 3977 3978 void 3979 i915_gem_object_unpin(struct drm_i915_gem_object *obj) 3980 { 3981 BUG_ON(obj->pin_count == 0); 3982 BUG_ON(obj->gtt_space == NULL); 3983 3984 if (--obj->pin_count == 0) 3985 obj->pin_mappable = false; 3986 } 3987 3988 int 3989 i915_gem_pin_ioctl(struct drm_device *dev, void *data, 3990 struct drm_file *file) 3991 { 3992 struct drm_i915_gem_pin *args = data; 3993 struct drm_i915_gem_object *obj; 3994 int ret; 3995 3996 ret = i915_mutex_lock_interruptible(dev); 3997 if (ret) 3998 return ret; 3999 4000 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 4001 if (&obj->base == NULL) { 4002 ret = -ENOENT; 4003 goto unlock; 4004 } 4005 4006 if (obj->madv != I915_MADV_WILLNEED) { 4007 DRM_ERROR("Attempting to pin a purgeable buffer\n"); 4008 ret = -EINVAL; 4009 goto out; 4010 } 4011 4012 if (obj->pin_filp != NULL && obj->pin_filp != file) { 4013 DRM_ERROR("Already pinned in i915_gem_pin_ioctl(): %d\n", 4014 args->handle); 4015 ret = -EINVAL; 4016 goto out; 4017 } 4018 4019 if (obj->user_pin_count == 0) { 4020 ret = i915_gem_object_pin(obj, args->alignment, true, false); 4021 if (ret) 4022 goto out; 4023 } 4024 4025 obj->user_pin_count++; 4026 obj->pin_filp = file; 4027 4028 /* XXX - flush the CPU caches for pinned objects 4029 * as the X server doesn't manage domains yet 4030 */ 4031 i915_gem_object_flush_cpu_write_domain(obj); 4032 args->offset = obj->gtt_offset; 4033 out: 4034 drm_gem_object_unreference(&obj->base); 4035 unlock: 4036 mutex_unlock(&dev->struct_mutex); 4037 return ret; 4038 } 4039 4040 int 4041 i915_gem_unpin_ioctl(struct drm_device *dev, void *data, 4042 struct drm_file *file) 4043 { 4044 struct drm_i915_gem_pin *args = data; 4045 struct drm_i915_gem_object *obj; 4046 int ret; 4047 4048 ret = i915_mutex_lock_interruptible(dev); 4049 if (ret) 4050 return ret; 4051 4052 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 4053 if (&obj->base == NULL) { 4054 ret = -ENOENT; 4055 goto unlock; 4056 } 4057 4058 if (obj->pin_filp != file) { 4059 DRM_ERROR("Not pinned by caller in i915_gem_pin_ioctl(): %d\n", 4060 args->handle); 4061 ret = -EINVAL; 4062 goto out; 4063 } 4064 obj->user_pin_count--; 4065 if (obj->user_pin_count == 0) { 4066 obj->pin_filp = NULL; 4067 i915_gem_object_unpin(obj); 4068 } 4069 4070 out: 4071 drm_gem_object_unreference(&obj->base); 4072 unlock: 4073 mutex_unlock(&dev->struct_mutex); 4074 return ret; 4075 } 4076 4077 int 4078 i915_gem_busy_ioctl(struct drm_device *dev, void *data, 4079 struct drm_file *file) 4080 { 4081 struct drm_i915_gem_busy *args = data; 4082 struct drm_i915_gem_object *obj; 4083 int ret; 4084 4085 ret = i915_mutex_lock_interruptible(dev); 4086 if (ret) 4087 return ret; 4088 4089 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 4090 if (&obj->base == NULL) { 4091 ret = -ENOENT; 4092 goto unlock; 4093 } 4094 4095 /* Count all active objects as busy, even if they are currently not used 4096 * by the gpu. Users of this interface expect objects to eventually 4097 * become non-busy without any further actions, therefore emit any 4098 * necessary flushes here. 4099 */ 4100 ret = i915_gem_object_flush_active(obj); 4101 4102 args->busy = obj->active; 4103 if (obj->ring) { 4104 BUILD_BUG_ON(I915_NUM_RINGS > 16); 4105 args->busy |= intel_ring_flag(obj->ring) << 16; 4106 } 4107 4108 drm_gem_object_unreference(&obj->base); 4109 unlock: 4110 mutex_unlock(&dev->struct_mutex); 4111 return ret; 4112 } 4113 4114 int 4115 i915_gem_throttle_ioctl(struct drm_device *dev, void *data, 4116 struct drm_file *file_priv) 4117 { 4118 return i915_gem_ring_throttle(dev, file_priv); 4119 } 4120 4121 int 4122 i915_gem_madvise_ioctl(struct drm_device *dev, void *data, 4123 struct drm_file *file_priv) 4124 { 4125 struct drm_i915_gem_madvise *args = data; 4126 struct drm_i915_gem_object *obj; 4127 int ret; 4128 4129 switch (args->madv) { 4130 case I915_MADV_DONTNEED: 4131 case I915_MADV_WILLNEED: 4132 break; 4133 default: 4134 return -EINVAL; 4135 } 4136 4137 ret = i915_mutex_lock_interruptible(dev); 4138 if (ret) 4139 return ret; 4140 4141 obj = to_intel_bo(drm_gem_object_lookup(dev, file_priv, args->handle)); 4142 if (&obj->base == NULL) { 4143 ret = -ENOENT; 4144 goto unlock; 4145 } 4146 4147 if (obj->pin_count) { 4148 ret = -EINVAL; 4149 goto out; 4150 } 4151 4152 if (obj->madv != __I915_MADV_PURGED) 4153 obj->madv = args->madv; 4154 4155 /* if the object is no longer attached, discard its backing storage */ 4156 if (i915_gem_object_is_purgeable(obj) && obj->pages == NULL) 4157 i915_gem_object_truncate(obj); 4158 4159 args->retained = obj->madv != __I915_MADV_PURGED; 4160 4161 out: 4162 drm_gem_object_unreference(&obj->base); 4163 unlock: 4164 mutex_unlock(&dev->struct_mutex); 4165 return ret; 4166 } 4167 4168 void i915_gem_object_init(struct drm_i915_gem_object *obj, 4169 const struct drm_i915_gem_object_ops *ops) 4170 { 4171 INIT_LIST_HEAD(&obj->mm_list); 4172 INIT_LIST_HEAD(&obj->gtt_list); 4173 INIT_LIST_HEAD(&obj->ring_list); 4174 INIT_LIST_HEAD(&obj->exec_list); 4175 4176 obj->ops = ops; 4177 4178 obj->fence_reg = I915_FENCE_REG_NONE; 4179 obj->madv = I915_MADV_WILLNEED; 4180 /* Avoid an unnecessary call to unbind on the first bind. */ 4181 obj->map_and_fenceable = true; 4182 4183 i915_gem_info_add_obj(obj->base.dev->dev_private, obj->base.size); 4184 } 4185 4186 static const struct drm_i915_gem_object_ops i915_gem_object_ops = { 4187 .get_pages = i915_gem_object_get_pages_gtt, 4188 .put_pages = i915_gem_object_put_pages_gtt, 4189 }; 4190 4191 struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev, 4192 size_t size) 4193 { 4194 struct drm_i915_gem_object *obj; 4195 #ifdef __NetBSD__ 4196 uint64_t maxaddr; 4197 #else 4198 struct address_space *mapping; 4199 u32 mask; 4200 #endif 4201 4202 obj = kzalloc(sizeof(*obj), GFP_KERNEL); 4203 if (obj == NULL) 4204 return NULL; 4205 4206 if (drm_gem_object_init(dev, &obj->base, size) != 0) { 4207 kfree(obj); 4208 return NULL; 4209 } 4210 4211 #ifdef __NetBSD__ 4212 /* 4213 * 965GM can't handle >32-bit paddrs; all other models can't 4214 * handle >40-bit paddrs. 4215 * 4216 * XXX I think this table is incomplete. It should be 4217 * synchronized with the other DMA address constraints 4218 * scattered throughout DRM. 4219 * 4220 * XXX DMA limits 4221 */ 4222 if (IS_CRESTLINE(dev) || IS_BROADWATER(dev)) 4223 maxaddr = 0xffffffffULL; 4224 else 4225 maxaddr = 0xffffffffffULL; 4226 uao_set_pgfl(obj->base.gemo_shm_uao, x86_select_freelist(maxaddr)); 4227 #else 4228 mask = GFP_HIGHUSER | __GFP_RECLAIMABLE; 4229 if (IS_CRESTLINE(dev) || IS_BROADWATER(dev)) { 4230 /* 965gm cannot relocate objects above 4GiB. */ 4231 mask &= ~__GFP_HIGHMEM; 4232 mask |= __GFP_DMA32; 4233 } 4234 4235 mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping; 4236 mapping_set_gfp_mask(mapping, mask); 4237 #endif 4238 4239 i915_gem_object_init(obj, &i915_gem_object_ops); 4240 4241 obj->base.write_domain = I915_GEM_DOMAIN_CPU; 4242 obj->base.read_domains = I915_GEM_DOMAIN_CPU; 4243 4244 if (HAS_LLC(dev)) { 4245 /* On some devices, we can have the GPU use the LLC (the CPU 4246 * cache) for about a 10% performance improvement 4247 * compared to uncached. Graphics requests other than 4248 * display scanout are coherent with the CPU in 4249 * accessing this cache. This means in this mode we 4250 * don't need to clflush on the CPU side, and on the 4251 * GPU side we only need to flush internal caches to 4252 * get data visible to the CPU. 4253 * 4254 * However, we maintain the display planes as UC, and so 4255 * need to rebind when first used as such. 4256 */ 4257 obj->cache_level = I915_CACHE_LLC; 4258 } else 4259 obj->cache_level = I915_CACHE_NONE; 4260 4261 return obj; 4262 } 4263 4264 int i915_gem_init_object(struct drm_gem_object *obj) 4265 { 4266 BUG(); 4267 4268 return 0; 4269 } 4270 4271 void i915_gem_free_object(struct drm_gem_object *gem_obj) 4272 { 4273 struct drm_i915_gem_object *obj = to_intel_bo(gem_obj); 4274 struct drm_device *dev = obj->base.dev; 4275 drm_i915_private_t *dev_priv = dev->dev_private; 4276 4277 trace_i915_gem_object_destroy(obj); 4278 4279 if (obj->phys_obj) 4280 i915_gem_detach_phys_object(dev, obj); 4281 4282 obj->pin_count = 0; 4283 if (WARN_ON(i915_gem_object_unbind(obj) == -ERESTARTSYS)) { 4284 bool was_interruptible; 4285 4286 was_interruptible = dev_priv->mm.interruptible; 4287 dev_priv->mm.interruptible = false; 4288 4289 WARN_ON(i915_gem_object_unbind(obj)); 4290 4291 dev_priv->mm.interruptible = was_interruptible; 4292 } 4293 4294 obj->pages_pin_count = 0; 4295 i915_gem_object_put_pages(obj); 4296 i915_gem_object_free_mmap_offset(obj); 4297 4298 BUG_ON(obj->pages); 4299 4300 #ifndef __NetBSD__ /* XXX drm prime */ 4301 if (obj->base.import_attach) 4302 drm_prime_gem_destroy(&obj->base, NULL); 4303 #endif 4304 4305 drm_gem_object_release(&obj->base); 4306 i915_gem_info_remove_obj(dev_priv, obj->base.size); 4307 4308 kfree(obj->bit_17); 4309 kfree(obj); 4310 } 4311 4312 int 4313 i915_gem_idle(struct drm_device *dev) 4314 { 4315 drm_i915_private_t *dev_priv = dev->dev_private; 4316 int ret; 4317 4318 mutex_lock(&dev->struct_mutex); 4319 4320 if (dev_priv->mm.suspended) { 4321 mutex_unlock(&dev->struct_mutex); 4322 return 0; 4323 } 4324 4325 ret = i915_gpu_idle(dev); 4326 if (ret) { 4327 mutex_unlock(&dev->struct_mutex); 4328 return ret; 4329 } 4330 i915_gem_retire_requests(dev); 4331 4332 /* Under UMS, be paranoid and evict. */ 4333 if (!drm_core_check_feature(dev, DRIVER_MODESET)) 4334 i915_gem_evict_everything(dev); 4335 4336 i915_gem_reset_fences(dev); 4337 4338 /* Hack! Don't let anybody do execbuf while we don't control the chip. 4339 * We need to replace this with a semaphore, or something. 4340 * And not confound mm.suspended! 4341 */ 4342 dev_priv->mm.suspended = 1; 4343 del_timer_sync(&dev_priv->hangcheck_timer); 4344 4345 i915_kernel_lost_context(dev); 4346 i915_gem_cleanup_ringbuffer(dev); 4347 4348 mutex_unlock(&dev->struct_mutex); 4349 4350 /* Cancel the retire work handler, which should be idle now. */ 4351 cancel_delayed_work_sync(&dev_priv->mm.retire_work); 4352 4353 return 0; 4354 } 4355 4356 void i915_gem_l3_remap(struct drm_device *dev) 4357 { 4358 drm_i915_private_t *dev_priv = dev->dev_private; 4359 u32 misccpctl; 4360 int i; 4361 4362 if (!IS_IVYBRIDGE(dev)) 4363 return; 4364 4365 if (!dev_priv->l3_parity.remap_info) 4366 return; 4367 4368 misccpctl = I915_READ(GEN7_MISCCPCTL); 4369 I915_WRITE(GEN7_MISCCPCTL, misccpctl & ~GEN7_DOP_CLOCK_GATE_ENABLE); 4370 POSTING_READ(GEN7_MISCCPCTL); 4371 4372 for (i = 0; i < GEN7_L3LOG_SIZE; i += 4) { 4373 u32 remap = I915_READ(GEN7_L3LOG_BASE + i); 4374 if (remap && remap != dev_priv->l3_parity.remap_info[i/4]) 4375 DRM_DEBUG("0x%x was already programmed to %x\n", 4376 GEN7_L3LOG_BASE + i, remap); 4377 if (remap && !dev_priv->l3_parity.remap_info[i/4]) 4378 DRM_DEBUG_DRIVER("Clearing remapped register\n"); 4379 I915_WRITE(GEN7_L3LOG_BASE + i, dev_priv->l3_parity.remap_info[i/4]); 4380 } 4381 4382 /* Make sure all the writes land before disabling dop clock gating */ 4383 POSTING_READ(GEN7_L3LOG_BASE); 4384 4385 I915_WRITE(GEN7_MISCCPCTL, misccpctl); 4386 } 4387 4388 void i915_gem_init_swizzling(struct drm_device *dev) 4389 { 4390 drm_i915_private_t *dev_priv = dev->dev_private; 4391 4392 if (INTEL_INFO(dev)->gen < 5 || 4393 dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE) 4394 return; 4395 4396 I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) | 4397 DISP_TILE_SURFACE_SWIZZLING); 4398 4399 if (IS_GEN5(dev)) 4400 return; 4401 4402 I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_SWZCTL); 4403 if (IS_GEN6(dev)) 4404 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_SNB)); 4405 else 4406 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_IVB)); 4407 } 4408 4409 static bool 4410 intel_enable_blt(struct drm_device *dev) 4411 { 4412 if (!HAS_BLT(dev)) 4413 return false; 4414 4415 /* The blitter was dysfunctional on early prototypes */ 4416 if (IS_GEN6(dev) && dev->pdev->revision < 8) { 4417 DRM_INFO("BLT not supported on this pre-production hardware;" 4418 " graphics performance will be degraded.\n"); 4419 return false; 4420 } 4421 4422 return true; 4423 } 4424 4425 int 4426 i915_gem_init_hw(struct drm_device *dev) 4427 { 4428 drm_i915_private_t *dev_priv = dev->dev_private; 4429 int ret; 4430 4431 if (INTEL_INFO(dev)->gen < 6 && !intel_enable_gtt()) 4432 return -EIO; 4433 4434 if (IS_HASWELL(dev) && (I915_READ(0x120010) == 1)) 4435 I915_WRITE(0x9008, I915_READ(0x9008) | 0xf0000); 4436 4437 i915_gem_l3_remap(dev); 4438 4439 i915_gem_init_swizzling(dev); 4440 4441 ret = intel_init_render_ring_buffer(dev); 4442 if (ret) 4443 return ret; 4444 4445 if (HAS_BSD(dev)) { 4446 ret = intel_init_bsd_ring_buffer(dev); 4447 if (ret) 4448 goto cleanup_render_ring; 4449 } 4450 4451 if (intel_enable_blt(dev)) { 4452 ret = intel_init_blt_ring_buffer(dev); 4453 if (ret) 4454 goto cleanup_bsd_ring; 4455 } 4456 4457 dev_priv->next_seqno = 1; 4458 4459 /* 4460 * XXX: There was some w/a described somewhere suggesting loading 4461 * contexts before PPGTT. 4462 */ 4463 i915_gem_context_init(dev); 4464 i915_gem_init_ppgtt(dev); 4465 4466 return 0; 4467 4468 cleanup_bsd_ring: 4469 intel_cleanup_ring_buffer(&dev_priv->ring[VCS]); 4470 cleanup_render_ring: 4471 intel_cleanup_ring_buffer(&dev_priv->ring[RCS]); 4472 return ret; 4473 } 4474 4475 static bool 4476 intel_enable_ppgtt(struct drm_device *dev) 4477 { 4478 #ifdef __NetBSD__ /* XXX ppgtt */ 4479 return false; 4480 #else 4481 if (i915_enable_ppgtt >= 0) 4482 return i915_enable_ppgtt; 4483 4484 #ifdef CONFIG_INTEL_IOMMU 4485 /* Disable ppgtt on SNB if VT-d is on. */ 4486 if (INTEL_INFO(dev)->gen == 6 && intel_iommu_gfx_mapped) 4487 return false; 4488 #endif 4489 4490 return true; 4491 #endif 4492 } 4493 4494 int i915_gem_init(struct drm_device *dev) 4495 { 4496 struct drm_i915_private *dev_priv = dev->dev_private; 4497 unsigned long gtt_size, mappable_size; 4498 int ret; 4499 4500 gtt_size = dev_priv->mm.gtt->gtt_total_entries << PAGE_SHIFT; 4501 mappable_size = dev_priv->mm.gtt->gtt_mappable_entries << PAGE_SHIFT; 4502 4503 mutex_lock(&dev->struct_mutex); 4504 if (intel_enable_ppgtt(dev) && HAS_ALIASING_PPGTT(dev)) { 4505 /* PPGTT pdes are stolen from global gtt ptes, so shrink the 4506 * aperture accordingly when using aliasing ppgtt. */ 4507 gtt_size -= I915_PPGTT_PD_ENTRIES*PAGE_SIZE; 4508 4509 i915_gem_init_global_gtt(dev, 0, mappable_size, gtt_size); 4510 4511 ret = i915_gem_init_aliasing_ppgtt(dev); 4512 if (ret) { 4513 i915_gem_fini_global_gtt(dev); 4514 mutex_unlock(&dev->struct_mutex); 4515 return ret; 4516 } 4517 } else { 4518 /* Let GEM Manage all of the aperture. 4519 * 4520 * However, leave one page at the end still bound to the scratch 4521 * page. There are a number of places where the hardware 4522 * apparently prefetches past the end of the object, and we've 4523 * seen multiple hangs with the GPU head pointer stuck in a 4524 * batchbuffer bound at the last page of the aperture. One page 4525 * should be enough to keep any prefetching inside of the 4526 * aperture. 4527 */ 4528 i915_gem_init_global_gtt(dev, 0, mappable_size, 4529 gtt_size); 4530 } 4531 4532 ret = i915_gem_init_hw(dev); 4533 #ifdef __NetBSD__ /* XXX fini global gtt */ 4534 if (ret) 4535 i915_gem_fini_global_gtt(dev); 4536 #endif 4537 mutex_unlock(&dev->struct_mutex); 4538 if (ret) { 4539 i915_gem_cleanup_aliasing_ppgtt(dev); 4540 return ret; 4541 } 4542 4543 /* Allow hardware batchbuffers unless told otherwise, but not for KMS. */ 4544 if (!drm_core_check_feature(dev, DRIVER_MODESET)) 4545 dev_priv->dri1.allow_batchbuffer = 1; 4546 return 0; 4547 } 4548 4549 void 4550 i915_gem_cleanup_ringbuffer(struct drm_device *dev) 4551 { 4552 drm_i915_private_t *dev_priv = dev->dev_private; 4553 struct intel_ring_buffer *ring; 4554 int i; 4555 4556 for_each_ring(ring, dev_priv, i) 4557 intel_cleanup_ring_buffer(ring); 4558 } 4559 4560 int 4561 i915_gem_entervt_ioctl(struct drm_device *dev, void *data, 4562 struct drm_file *file_priv) 4563 { 4564 drm_i915_private_t *dev_priv = dev->dev_private; 4565 int ret; 4566 4567 if (drm_core_check_feature(dev, DRIVER_MODESET)) 4568 return 0; 4569 4570 if (atomic_read(&dev_priv->mm.wedged)) { 4571 DRM_ERROR("Reenabling wedged hardware, good luck\n"); 4572 atomic_set(&dev_priv->mm.wedged, 0); 4573 } 4574 4575 mutex_lock(&dev->struct_mutex); 4576 dev_priv->mm.suspended = 0; 4577 4578 ret = i915_gem_init_hw(dev); 4579 if (ret != 0) { 4580 mutex_unlock(&dev->struct_mutex); 4581 return ret; 4582 } 4583 4584 BUG_ON(!list_empty(&dev_priv->mm.active_list)); 4585 mutex_unlock(&dev->struct_mutex); 4586 4587 ret = drm_irq_install(dev); 4588 if (ret) 4589 goto cleanup_ringbuffer; 4590 4591 return 0; 4592 4593 cleanup_ringbuffer: 4594 mutex_lock(&dev->struct_mutex); 4595 i915_gem_cleanup_ringbuffer(dev); 4596 dev_priv->mm.suspended = 1; 4597 mutex_unlock(&dev->struct_mutex); 4598 4599 return ret; 4600 } 4601 4602 int 4603 i915_gem_leavevt_ioctl(struct drm_device *dev, void *data, 4604 struct drm_file *file_priv) 4605 { 4606 if (drm_core_check_feature(dev, DRIVER_MODESET)) 4607 return 0; 4608 4609 drm_irq_uninstall(dev); 4610 return i915_gem_idle(dev); 4611 } 4612 4613 void 4614 i915_gem_lastclose(struct drm_device *dev) 4615 { 4616 int ret; 4617 4618 if (drm_core_check_feature(dev, DRIVER_MODESET)) 4619 return; 4620 4621 ret = i915_gem_idle(dev); 4622 if (ret) 4623 DRM_ERROR("failed to idle hardware: %d\n", ret); 4624 } 4625 4626 static void 4627 init_ring_lists(struct intel_ring_buffer *ring) 4628 { 4629 INIT_LIST_HEAD(&ring->active_list); 4630 INIT_LIST_HEAD(&ring->request_list); 4631 } 4632 4633 void 4634 i915_gem_load(struct drm_device *dev) 4635 { 4636 int i; 4637 drm_i915_private_t *dev_priv = dev->dev_private; 4638 4639 INIT_LIST_HEAD(&dev_priv->mm.active_list); 4640 INIT_LIST_HEAD(&dev_priv->mm.inactive_list); 4641 INIT_LIST_HEAD(&dev_priv->mm.unbound_list); 4642 INIT_LIST_HEAD(&dev_priv->mm.bound_list); 4643 INIT_LIST_HEAD(&dev_priv->mm.fence_list); 4644 for (i = 0; i < I915_NUM_RINGS; i++) 4645 init_ring_lists(&dev_priv->ring[i]); 4646 for (i = 0; i < I915_MAX_NUM_FENCES; i++) 4647 INIT_LIST_HEAD(&dev_priv->fence_regs[i].lru_list); 4648 INIT_DELAYED_WORK(&dev_priv->mm.retire_work, 4649 i915_gem_retire_work_handler); 4650 init_completion(&dev_priv->error_completion); 4651 4652 /* On GEN3 we really need to make sure the ARB C3 LP bit is set */ 4653 if (IS_GEN3(dev)) { 4654 I915_WRITE(MI_ARB_STATE, 4655 _MASKED_BIT_ENABLE(MI_ARB_C3_LP_WRITE_ENABLE)); 4656 } 4657 4658 dev_priv->relative_constants_mode = I915_EXEC_CONSTANTS_REL_GENERAL; 4659 4660 /* Old X drivers will take 0-2 for front, back, depth buffers */ 4661 if (!drm_core_check_feature(dev, DRIVER_MODESET)) 4662 dev_priv->fence_reg_start = 3; 4663 4664 if (INTEL_INFO(dev)->gen >= 4 || IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev)) 4665 dev_priv->num_fence_regs = 16; 4666 else 4667 dev_priv->num_fence_regs = 8; 4668 4669 /* Initialize fence registers to zero */ 4670 i915_gem_reset_fences(dev); 4671 4672 i915_gem_detect_bit_6_swizzle(dev); 4673 #ifdef __NetBSD__ 4674 DRM_INIT_WAITQUEUE(&dev_priv->pending_flip_queue, "i915flip"); 4675 spin_lock_init(&dev_priv->pending_flip_lock); 4676 #else 4677 init_waitqueue_head(&dev_priv->pending_flip_queue); 4678 #endif 4679 4680 dev_priv->mm.interruptible = true; 4681 4682 dev_priv->mm.inactive_shrinker.shrink = i915_gem_inactive_shrink; 4683 dev_priv->mm.inactive_shrinker.seeks = DEFAULT_SEEKS; 4684 register_shrinker(&dev_priv->mm.inactive_shrinker); 4685 } 4686 4687 /* 4688 * Create a physically contiguous memory object for this object 4689 * e.g. for cursor + overlay regs 4690 */ 4691 static int i915_gem_init_phys_object(struct drm_device *dev, 4692 int id, int size, int align) 4693 { 4694 drm_i915_private_t *dev_priv = dev->dev_private; 4695 struct drm_i915_gem_phys_object *phys_obj; 4696 int ret; 4697 4698 if (dev_priv->mm.phys_objs[id - 1] || !size) 4699 return 0; 4700 4701 phys_obj = kzalloc(sizeof(struct drm_i915_gem_phys_object), GFP_KERNEL); 4702 if (!phys_obj) 4703 return -ENOMEM; 4704 4705 phys_obj->id = id; 4706 4707 phys_obj->handle = drm_pci_alloc(dev, size, align); 4708 if (!phys_obj->handle) { 4709 ret = -ENOMEM; 4710 goto kfree_obj; 4711 } 4712 #ifndef __NetBSD__ /* XXX x86 wc? */ 4713 #ifdef CONFIG_X86 4714 set_memory_wc((unsigned long)phys_obj->handle->vaddr, phys_obj->handle->size / PAGE_SIZE); 4715 #endif 4716 #endif 4717 4718 dev_priv->mm.phys_objs[id - 1] = phys_obj; 4719 4720 return 0; 4721 kfree_obj: 4722 kfree(phys_obj); 4723 return ret; 4724 } 4725 4726 static void i915_gem_free_phys_object(struct drm_device *dev, int id) 4727 { 4728 drm_i915_private_t *dev_priv = dev->dev_private; 4729 struct drm_i915_gem_phys_object *phys_obj; 4730 4731 if (!dev_priv->mm.phys_objs[id - 1]) 4732 return; 4733 4734 phys_obj = dev_priv->mm.phys_objs[id - 1]; 4735 if (phys_obj->cur_obj) { 4736 i915_gem_detach_phys_object(dev, phys_obj->cur_obj); 4737 } 4738 4739 #ifndef __NetBSD__ /* XXX x86 wb? */ 4740 #ifdef CONFIG_X86 4741 set_memory_wb((unsigned long)phys_obj->handle->vaddr, phys_obj->handle->size / PAGE_SIZE); 4742 #endif 4743 #endif 4744 drm_pci_free(dev, phys_obj->handle); 4745 kfree(phys_obj); 4746 dev_priv->mm.phys_objs[id - 1] = NULL; 4747 } 4748 4749 void i915_gem_free_all_phys_object(struct drm_device *dev) 4750 { 4751 int i; 4752 4753 for (i = I915_GEM_PHYS_CURSOR_0; i <= I915_MAX_PHYS_OBJECT; i++) 4754 i915_gem_free_phys_object(dev, i); 4755 } 4756 4757 void i915_gem_detach_phys_object(struct drm_device *dev, 4758 struct drm_i915_gem_object *obj) 4759 { 4760 #ifndef __NetBSD__ 4761 struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping; 4762 #endif 4763 char *vaddr; 4764 int i; 4765 int page_count; 4766 4767 if (!obj->phys_obj) 4768 return; 4769 vaddr = obj->phys_obj->handle->vaddr; 4770 4771 page_count = obj->base.size / PAGE_SIZE; 4772 for (i = 0; i < page_count; i++) { 4773 #ifdef __NetBSD__ 4774 /* XXX Just use ubc_uiomove? */ 4775 struct pglist pages; 4776 int error; 4777 4778 TAILQ_INIT(&pages); 4779 error = uvm_obj_wirepages(obj->base.gemo_shm_uao, i*PAGE_SIZE, 4780 (i+1)*PAGE_SIZE, &pages); 4781 if (error) { 4782 printf("unable to map page %d of i915 gem obj: %d\n", 4783 i, error); 4784 continue; 4785 } 4786 4787 KASSERT(!TAILQ_EMPTY(&pages)); 4788 struct vm_page *const page = TAILQ_FIRST(&pages); 4789 TAILQ_REMOVE(&pages, page, pageq.queue); 4790 KASSERT(TAILQ_EMPTY(&pages)); 4791 4792 char *const dst = kmap_atomic(container_of(page, struct page, 4793 p_vmp)); 4794 (void)memcpy(dst, vaddr + (i*PAGE_SIZE), PAGE_SIZE); 4795 kunmap_atomic(dst); 4796 4797 drm_clflush_page(container_of(page, struct page, p_vmp)); 4798 page->flags &= ~PG_CLEAN; 4799 /* XXX mark page accessed */ 4800 uvm_obj_unwirepages(obj->base.gemo_shm_uao, i*PAGE_SIZE, 4801 (i+1)*PAGE_SIZE); 4802 #else 4803 struct page *page = shmem_read_mapping_page(mapping, i); 4804 if (!IS_ERR(page)) { 4805 char *dst = kmap_atomic(page); 4806 memcpy(dst, vaddr + i*PAGE_SIZE, PAGE_SIZE); 4807 kunmap_atomic(dst); 4808 4809 drm_clflush_pages(&page, 1); 4810 4811 set_page_dirty(page); 4812 mark_page_accessed(page); 4813 page_cache_release(page); 4814 } 4815 #endif 4816 } 4817 i915_gem_chipset_flush(dev); 4818 4819 obj->phys_obj->cur_obj = NULL; 4820 obj->phys_obj = NULL; 4821 } 4822 4823 int 4824 i915_gem_attach_phys_object(struct drm_device *dev, 4825 struct drm_i915_gem_object *obj, 4826 int id, 4827 int align) 4828 { 4829 #ifndef __NetBSD__ 4830 struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping; 4831 #endif 4832 drm_i915_private_t *dev_priv = dev->dev_private; 4833 int ret = 0; 4834 int page_count; 4835 int i; 4836 4837 if (id > I915_MAX_PHYS_OBJECT) 4838 return -EINVAL; 4839 4840 if (obj->phys_obj) { 4841 if (obj->phys_obj->id == id) 4842 return 0; 4843 i915_gem_detach_phys_object(dev, obj); 4844 } 4845 4846 /* create a new object */ 4847 if (!dev_priv->mm.phys_objs[id - 1]) { 4848 ret = i915_gem_init_phys_object(dev, id, 4849 obj->base.size, align); 4850 if (ret) { 4851 DRM_ERROR("failed to init phys object %d size: %zu\n", 4852 id, obj->base.size); 4853 return ret; 4854 } 4855 } 4856 4857 /* bind to the object */ 4858 obj->phys_obj = dev_priv->mm.phys_objs[id - 1]; 4859 obj->phys_obj->cur_obj = obj; 4860 4861 page_count = obj->base.size / PAGE_SIZE; 4862 4863 for (i = 0; i < page_count; i++) { 4864 #ifdef __NetBSD__ 4865 char *const vaddr = obj->phys_obj->handle->vaddr; 4866 struct pglist pages; 4867 int error; 4868 4869 TAILQ_INIT(&pages); 4870 error = uvm_obj_wirepages(obj->base.gemo_shm_uao, i*PAGE_SIZE, 4871 (i+1)*PAGE_SIZE, &pages); 4872 if (error) 4873 /* XXX errno NetBSD->Linux */ 4874 return -error; 4875 4876 KASSERT(!TAILQ_EMPTY(&pages)); 4877 struct vm_page *const page = TAILQ_FIRST(&pages); 4878 TAILQ_REMOVE(&pages, page, pageq.queue); 4879 KASSERT(TAILQ_EMPTY(&pages)); 4880 4881 char *const src = kmap_atomic(container_of(page, struct page, 4882 p_vmp)); 4883 (void)memcpy(vaddr + (i*PAGE_SIZE), src, PAGE_SIZE); 4884 kunmap_atomic(src); 4885 4886 /* XXX mark page accessed */ 4887 uvm_obj_unwirepages(obj->base.gemo_shm_uao, i*PAGE_SIZE, 4888 (i+1)*PAGE_SIZE); 4889 #else 4890 struct page *page; 4891 char *dst, *src; 4892 4893 page = shmem_read_mapping_page(mapping, i); 4894 if (IS_ERR(page)) 4895 return PTR_ERR(page); 4896 4897 src = kmap_atomic(page); 4898 dst = obj->phys_obj->handle->vaddr + (i * PAGE_SIZE); 4899 memcpy(dst, src, PAGE_SIZE); 4900 kunmap_atomic(src); 4901 4902 mark_page_accessed(page); 4903 page_cache_release(page); 4904 #endif 4905 } 4906 4907 return 0; 4908 } 4909 4910 static int 4911 i915_gem_phys_pwrite(struct drm_device *dev, 4912 struct drm_i915_gem_object *obj, 4913 struct drm_i915_gem_pwrite *args, 4914 struct drm_file *file_priv) 4915 { 4916 void *vaddr = (char *)obj->phys_obj->handle->vaddr + args->offset; 4917 char __user *user_data = (char __user *) (uintptr_t) args->data_ptr; 4918 4919 if (__copy_from_user_inatomic_nocache(vaddr, user_data, args->size)) { 4920 unsigned long unwritten; 4921 4922 /* The physical object once assigned is fixed for the lifetime 4923 * of the obj, so we can safely drop the lock and continue 4924 * to access vaddr. 4925 */ 4926 mutex_unlock(&dev->struct_mutex); 4927 unwritten = copy_from_user(vaddr, user_data, args->size); 4928 mutex_lock(&dev->struct_mutex); 4929 if (unwritten) 4930 return -EFAULT; 4931 } 4932 4933 i915_gem_chipset_flush(dev); 4934 return 0; 4935 } 4936 4937 void i915_gem_release(struct drm_device *dev, struct drm_file *file) 4938 { 4939 struct drm_i915_file_private *file_priv = file->driver_priv; 4940 4941 /* Clean up our request list when the client is going away, so that 4942 * later retire_requests won't dereference our soon-to-be-gone 4943 * file_priv. 4944 */ 4945 spin_lock(&file_priv->mm.lock); 4946 while (!list_empty(&file_priv->mm.request_list)) { 4947 struct drm_i915_gem_request *request; 4948 4949 request = list_first_entry(&file_priv->mm.request_list, 4950 struct drm_i915_gem_request, 4951 client_list); 4952 list_del(&request->client_list); 4953 request->file_priv = NULL; 4954 } 4955 spin_unlock(&file_priv->mm.lock); 4956 } 4957 4958 #ifndef __NetBSD__ /* XXX */ 4959 static bool mutex_is_locked_by(struct mutex *mutex, struct task_struct *task) 4960 { 4961 if (!mutex_is_locked(mutex)) 4962 return false; 4963 4964 #if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_MUTEXES) 4965 return mutex->owner == task; 4966 #else 4967 /* Since UP may be pre-empted, we cannot assume that we own the lock */ 4968 return false; 4969 #endif 4970 } 4971 #endif 4972 4973 static int 4974 i915_gem_inactive_shrink(struct shrinker *shrinker, struct shrink_control *sc) 4975 { 4976 #ifdef __NetBSD__ /* XXX shrinkers */ 4977 return 0; 4978 #else 4979 struct drm_i915_private *dev_priv = 4980 container_of(shrinker, 4981 struct drm_i915_private, 4982 mm.inactive_shrinker); 4983 struct drm_device *dev = dev_priv->dev; 4984 struct drm_i915_gem_object *obj; 4985 int nr_to_scan = sc->nr_to_scan; 4986 bool unlock = true; 4987 int cnt; 4988 4989 if (!mutex_trylock(&dev->struct_mutex)) { 4990 if (!mutex_is_locked_by(&dev->struct_mutex, current)) 4991 return 0; 4992 4993 if (dev_priv->mm.shrinker_no_lock_stealing) 4994 return 0; 4995 4996 unlock = false; 4997 } 4998 4999 if (nr_to_scan) { 5000 nr_to_scan -= i915_gem_purge(dev_priv, nr_to_scan); 5001 if (nr_to_scan > 0) 5002 nr_to_scan -= __i915_gem_shrink(dev_priv, nr_to_scan, 5003 false); 5004 if (nr_to_scan > 0) 5005 i915_gem_shrink_all(dev_priv); 5006 } 5007 5008 cnt = 0; 5009 list_for_each_entry(obj, &dev_priv->mm.unbound_list, gtt_list) 5010 if (obj->pages_pin_count == 0) 5011 cnt += obj->base.size >> PAGE_SHIFT; 5012 list_for_each_entry(obj, &dev_priv->mm.inactive_list, gtt_list) 5013 if (obj->pin_count == 0 && obj->pages_pin_count == 0) 5014 cnt += obj->base.size >> PAGE_SHIFT; 5015 5016 if (unlock) 5017 mutex_unlock(&dev->struct_mutex); 5018 return cnt; 5019 #endif 5020 } 5021