1 /* 2 * Copyright © 2008 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Eric Anholt <eric@anholt.net> 25 * 26 */ 27 28 #ifdef __NetBSD__ 29 #if 0 /* XXX uvmhist option? */ 30 #include "opt_uvmhist.h" 31 #endif 32 33 #include <sys/types.h> 34 #include <sys/param.h> 35 36 #include <uvm/uvm.h> 37 #include <uvm/uvm_extern.h> 38 #include <uvm/uvm_fault.h> 39 #include <uvm/uvm_page.h> 40 #include <uvm/uvm_pmap.h> 41 #include <uvm/uvm_prot.h> 42 #endif 43 44 #include <drm/drmP.h> 45 #include <drm/i915_drm.h> 46 #include "i915_drv.h" 47 #include "i915_trace.h" 48 #include "intel_drv.h" 49 #include <linux/shmem_fs.h> 50 #include <linux/slab.h> 51 #include <linux/swap.h> 52 #include <linux/pci.h> 53 #include <linux/dma-buf.h> 54 #include <linux/errno.h> 55 #include <linux/time.h> 56 #include <linux/err.h> 57 #include <asm/param.h> 58 59 static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj); 60 static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj); 61 static __must_check int i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj, 62 unsigned alignment, 63 bool map_and_fenceable, 64 bool nonblocking); 65 static int i915_gem_phys_pwrite(struct drm_device *dev, 66 struct drm_i915_gem_object *obj, 67 struct drm_i915_gem_pwrite *args, 68 struct drm_file *file); 69 70 static void i915_gem_write_fence(struct drm_device *dev, int reg, 71 struct drm_i915_gem_object *obj); 72 static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj, 73 struct drm_i915_fence_reg *fence, 74 bool enable); 75 76 static int i915_gem_inactive_shrink(struct shrinker *shrinker, 77 struct shrink_control *sc); 78 static long i915_gem_purge(struct drm_i915_private *dev_priv, long target); 79 static void i915_gem_shrink_all(struct drm_i915_private *dev_priv); 80 static void i915_gem_object_truncate(struct drm_i915_gem_object *obj); 81 82 static inline void i915_gem_object_fence_lost(struct drm_i915_gem_object *obj) 83 { 84 if (obj->tiling_mode) 85 i915_gem_release_mmap(obj); 86 87 /* As we do not have an associated fence register, we will force 88 * a tiling change if we ever need to acquire one. 89 */ 90 obj->fence_dirty = false; 91 obj->fence_reg = I915_FENCE_REG_NONE; 92 } 93 94 /* some bookkeeping */ 95 static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv, 96 size_t size) 97 { 98 dev_priv->mm.object_count++; 99 dev_priv->mm.object_memory += size; 100 } 101 102 static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv, 103 size_t size) 104 { 105 dev_priv->mm.object_count--; 106 dev_priv->mm.object_memory -= size; 107 } 108 109 static int 110 i915_gem_wait_for_error(struct drm_device *dev) 111 { 112 struct drm_i915_private *dev_priv = dev->dev_private; 113 struct completion *x = &dev_priv->error_completion; 114 #ifndef __NetBSD__ 115 unsigned long flags; 116 #endif 117 int ret; 118 119 if (!atomic_read(&dev_priv->mm.wedged)) 120 return 0; 121 122 /* 123 * Only wait 10 seconds for the gpu reset to complete to avoid hanging 124 * userspace. If it takes that long something really bad is going on and 125 * we should simply try to bail out and fail as gracefully as possible. 126 */ 127 ret = wait_for_completion_interruptible_timeout(x, 10*HZ); 128 if (ret == 0) { 129 DRM_ERROR("Timed out waiting for the gpu reset to complete\n"); 130 return -EIO; 131 } else if (ret < 0) { 132 return ret; 133 } 134 135 if (atomic_read(&dev_priv->mm.wedged)) { 136 /* GPU is hung, bump the completion count to account for 137 * the token we just consumed so that we never hit zero and 138 * end up waiting upon a subsequent completion event that 139 * will never happen. 140 */ 141 #ifdef __NetBSD__ 142 /* XXX Hope it's not a problem that we might wake someone. */ 143 complete(x); 144 #else 145 spin_lock_irqsave(&x->wait.lock, flags); 146 x->done++; 147 spin_unlock_irqrestore(&x->wait.lock, flags); 148 #endif 149 } 150 return 0; 151 } 152 153 int i915_mutex_lock_interruptible(struct drm_device *dev) 154 { 155 int ret; 156 157 ret = i915_gem_wait_for_error(dev); 158 if (ret) 159 return ret; 160 161 ret = mutex_lock_interruptible(&dev->struct_mutex); 162 if (ret) 163 return ret; 164 165 WARN_ON(i915_verify_lists(dev)); 166 return 0; 167 } 168 169 static inline bool 170 i915_gem_object_is_inactive(struct drm_i915_gem_object *obj) 171 { 172 return obj->gtt_space && !obj->active; 173 } 174 175 int 176 i915_gem_init_ioctl(struct drm_device *dev, void *data, 177 struct drm_file *file) 178 { 179 struct drm_i915_gem_init *args = data; 180 181 if (drm_core_check_feature(dev, DRIVER_MODESET)) 182 return -ENODEV; 183 184 if (args->gtt_start >= args->gtt_end || 185 (args->gtt_end | args->gtt_start) & (PAGE_SIZE - 1)) 186 return -EINVAL; 187 188 /* GEM with user mode setting was never supported on ilk and later. */ 189 if (INTEL_INFO(dev)->gen >= 5) 190 return -ENODEV; 191 192 mutex_lock(&dev->struct_mutex); 193 i915_gem_init_global_gtt(dev, args->gtt_start, 194 args->gtt_end, args->gtt_end); 195 mutex_unlock(&dev->struct_mutex); 196 197 return 0; 198 } 199 200 int 201 i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, 202 struct drm_file *file) 203 { 204 struct drm_i915_private *dev_priv = dev->dev_private; 205 struct drm_i915_gem_get_aperture *args = data; 206 struct drm_i915_gem_object *obj; 207 size_t pinned; 208 209 pinned = 0; 210 mutex_lock(&dev->struct_mutex); 211 list_for_each_entry(obj, &dev_priv->mm.bound_list, gtt_list) 212 if (obj->pin_count) 213 pinned += obj->gtt_space->size; 214 mutex_unlock(&dev->struct_mutex); 215 216 args->aper_size = dev_priv->mm.gtt_total; 217 args->aper_available_size = args->aper_size - pinned; 218 219 return 0; 220 } 221 222 static int 223 i915_gem_create(struct drm_file *file, 224 struct drm_device *dev, 225 uint64_t size, 226 uint32_t *handle_p) 227 { 228 struct drm_i915_gem_object *obj; 229 int ret; 230 u32 handle; 231 232 size = roundup(size, PAGE_SIZE); 233 if (size == 0) 234 return -EINVAL; 235 236 /* Allocate the new object */ 237 obj = i915_gem_alloc_object(dev, size); 238 if (obj == NULL) 239 return -ENOMEM; 240 241 ret = drm_gem_handle_create(file, &obj->base, &handle); 242 if (ret) { 243 drm_gem_object_release(&obj->base); 244 i915_gem_info_remove_obj(dev->dev_private, obj->base.size); 245 kfree(obj); 246 return ret; 247 } 248 249 /* drop reference from allocate - handle holds it now */ 250 drm_gem_object_unreference(&obj->base); 251 trace_i915_gem_object_create(obj); 252 253 *handle_p = handle; 254 return 0; 255 } 256 257 int 258 i915_gem_dumb_create(struct drm_file *file, 259 struct drm_device *dev, 260 struct drm_mode_create_dumb *args) 261 { 262 /* have to work out size/pitch and return them */ 263 #ifdef __NetBSD__ /* ALIGN already means something. */ 264 args->pitch = round_up(args->width * ((args->bpp + 7) / 8), 64); 265 #else 266 args->pitch = ALIGN(args->width * ((args->bpp + 7) / 8), 64); 267 #endif 268 args->size = args->pitch * args->height; 269 return i915_gem_create(file, dev, 270 args->size, &args->handle); 271 } 272 273 int i915_gem_dumb_destroy(struct drm_file *file, 274 struct drm_device *dev, 275 uint32_t handle) 276 { 277 return drm_gem_handle_delete(file, handle); 278 } 279 280 /** 281 * Creates a new mm object and returns a handle to it. 282 */ 283 int 284 i915_gem_create_ioctl(struct drm_device *dev, void *data, 285 struct drm_file *file) 286 { 287 struct drm_i915_gem_create *args = data; 288 289 return i915_gem_create(file, dev, 290 args->size, &args->handle); 291 } 292 293 static int i915_gem_object_needs_bit17_swizzle(struct drm_i915_gem_object *obj) 294 { 295 drm_i915_private_t *dev_priv = obj->base.dev->dev_private; 296 297 return dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_9_10_17 && 298 obj->tiling_mode != I915_TILING_NONE; 299 } 300 301 static inline int 302 __copy_to_user_swizzled(char __user *cpu_vaddr, 303 const char *gpu_vaddr, int gpu_offset, 304 int length) 305 { 306 int ret, cpu_offset = 0; 307 308 while (length > 0) { 309 #ifdef __NetBSD__ 310 int cacheline_end = round_up(gpu_offset + 1, 64); 311 #else 312 int cacheline_end = ALIGN(gpu_offset + 1, 64); 313 #endif 314 int this_length = min(cacheline_end - gpu_offset, length); 315 int swizzled_gpu_offset = gpu_offset ^ 64; 316 317 ret = __copy_to_user(cpu_vaddr + cpu_offset, 318 gpu_vaddr + swizzled_gpu_offset, 319 this_length); 320 if (ret) 321 return ret + length; 322 323 cpu_offset += this_length; 324 gpu_offset += this_length; 325 length -= this_length; 326 } 327 328 return 0; 329 } 330 331 static inline int 332 __copy_from_user_swizzled(char *gpu_vaddr, int gpu_offset, 333 const char __user *cpu_vaddr, 334 int length) 335 { 336 int ret, cpu_offset = 0; 337 338 while (length > 0) { 339 #ifdef __NetBSD__ 340 int cacheline_end = round_up(gpu_offset + 1, 64); 341 #else 342 int cacheline_end = ALIGN(gpu_offset + 1, 64); 343 #endif 344 int this_length = min(cacheline_end - gpu_offset, length); 345 int swizzled_gpu_offset = gpu_offset ^ 64; 346 347 ret = __copy_from_user(gpu_vaddr + swizzled_gpu_offset, 348 cpu_vaddr + cpu_offset, 349 this_length); 350 if (ret) 351 return ret + length; 352 353 cpu_offset += this_length; 354 gpu_offset += this_length; 355 length -= this_length; 356 } 357 358 return 0; 359 } 360 361 /* Per-page copy function for the shmem pread fastpath. 362 * Flushes invalid cachelines before reading the target if 363 * needs_clflush is set. */ 364 static int 365 shmem_pread_fast(struct page *page, int shmem_page_offset, int page_length, 366 char __user *user_data, 367 bool page_do_bit17_swizzling, bool needs_clflush) 368 { 369 #ifdef __NetBSD__ /* XXX atomic shmem fast path */ 370 return -EFAULT; 371 #else 372 char *vaddr; 373 int ret; 374 375 if (unlikely(page_do_bit17_swizzling)) 376 return -EINVAL; 377 378 vaddr = kmap_atomic(page); 379 if (needs_clflush) 380 drm_clflush_virt_range(vaddr + shmem_page_offset, 381 page_length); 382 ret = __copy_to_user_inatomic(user_data, 383 vaddr + shmem_page_offset, 384 page_length); 385 kunmap_atomic(vaddr); 386 387 return ret ? -EFAULT : 0; 388 #endif 389 } 390 391 static void 392 shmem_clflush_swizzled_range(char *addr, unsigned long length, 393 bool swizzled) 394 { 395 if (unlikely(swizzled)) { 396 unsigned long start = (unsigned long) addr; 397 unsigned long end = (unsigned long) addr + length; 398 399 /* For swizzling simply ensure that we always flush both 400 * channels. Lame, but simple and it works. Swizzled 401 * pwrite/pread is far from a hotpath - current userspace 402 * doesn't use it at all. */ 403 start = round_down(start, 128); 404 end = round_up(end, 128); 405 406 drm_clflush_virt_range((void *)start, end - start); 407 } else { 408 drm_clflush_virt_range(addr, length); 409 } 410 411 } 412 413 /* Only difference to the fast-path function is that this can handle bit17 414 * and uses non-atomic copy and kmap functions. */ 415 static int 416 shmem_pread_slow(struct page *page, int shmem_page_offset, int page_length, 417 char __user *user_data, 418 bool page_do_bit17_swizzling, bool needs_clflush) 419 { 420 char *vaddr; 421 int ret; 422 423 vaddr = kmap(page); 424 if (needs_clflush) 425 shmem_clflush_swizzled_range(vaddr + shmem_page_offset, 426 page_length, 427 page_do_bit17_swizzling); 428 429 if (page_do_bit17_swizzling) 430 ret = __copy_to_user_swizzled(user_data, 431 vaddr, shmem_page_offset, 432 page_length); 433 else 434 ret = __copy_to_user(user_data, 435 vaddr + shmem_page_offset, 436 page_length); 437 kunmap(page); 438 439 return ret ? - EFAULT : 0; 440 } 441 442 static int 443 i915_gem_shmem_pread(struct drm_device *dev, 444 struct drm_i915_gem_object *obj, 445 struct drm_i915_gem_pread *args, 446 struct drm_file *file) 447 { 448 char __user *user_data; 449 ssize_t remain; 450 loff_t offset; 451 int shmem_page_offset, page_length, ret = 0; 452 int obj_do_bit17_swizzling, page_do_bit17_swizzling; 453 int hit_slowpath = 0; 454 #ifndef __NetBSD__ /* XXX */ 455 int prefaulted = 0; 456 #endif 457 int needs_clflush = 0; 458 #ifndef __NetBSD__ 459 struct scatterlist *sg; 460 int i; 461 #endif 462 463 user_data = (char __user *) (uintptr_t) args->data_ptr; 464 remain = args->size; 465 466 obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj); 467 468 if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)) { 469 /* If we're not in the cpu read domain, set ourself into the gtt 470 * read domain and manually flush cachelines (if required). This 471 * optimizes for the case when the gpu will dirty the data 472 * anyway again before the next pread happens. */ 473 if (obj->cache_level == I915_CACHE_NONE) 474 needs_clflush = 1; 475 if (obj->gtt_space) { 476 ret = i915_gem_object_set_to_gtt_domain(obj, false); 477 if (ret) 478 return ret; 479 } 480 } 481 482 ret = i915_gem_object_get_pages(obj); 483 if (ret) 484 return ret; 485 486 i915_gem_object_pin_pages(obj); 487 488 offset = args->offset; 489 490 #ifdef __NetBSD__ 491 /* 492 * XXX This is a big #ifdef with a lot of duplicated code, but 493 * factoring out the loop head -- which is all that 494 * substantially differs -- is probably more trouble than it's 495 * worth at the moment. 496 */ 497 while (0 < remain) { 498 /* Get the next page. */ 499 shmem_page_offset = offset_in_page(offset); 500 KASSERT(shmem_page_offset < PAGE_SIZE); 501 page_length = MIN(remain, (PAGE_SIZE - shmem_page_offset)); 502 struct page *const page = i915_gem_object_get_page(obj, 503 atop(offset)); 504 505 /* Decide whether to swizzle bit 17. */ 506 page_do_bit17_swizzling = obj_do_bit17_swizzling && 507 (page_to_phys(page) & (1 << 17)) != 0; 508 509 /* Try the fast path. */ 510 ret = shmem_pread_fast(page, shmem_page_offset, page_length, 511 user_data, page_do_bit17_swizzling, needs_clflush); 512 if (ret == 0) 513 goto next_page; 514 515 /* Fast path failed. Try the slow path. */ 516 hit_slowpath = 1; 517 mutex_unlock(&dev->struct_mutex); 518 /* XXX prefault */ 519 ret = shmem_pread_slow(page, shmem_page_offset, page_length, 520 user_data, page_do_bit17_swizzling, needs_clflush); 521 mutex_lock(&dev->struct_mutex); 522 523 next_page: 524 /* XXX mark page accessed */ 525 if (ret) 526 goto out; 527 528 KASSERT(page_length <= remain); 529 remain -= page_length; 530 user_data += page_length; 531 offset += page_length; 532 } 533 #else 534 for_each_sg(obj->pages->sgl, sg, obj->pages->nents, i) { 535 struct page *page; 536 537 if (i < offset >> PAGE_SHIFT) 538 continue; 539 540 if (remain <= 0) 541 break; 542 543 /* Operation in this page 544 * 545 * shmem_page_offset = offset within page in shmem file 546 * page_length = bytes to copy for this page 547 */ 548 shmem_page_offset = offset_in_page(offset); 549 page_length = remain; 550 if ((shmem_page_offset + page_length) > PAGE_SIZE) 551 page_length = PAGE_SIZE - shmem_page_offset; 552 553 page = sg_page(sg); 554 page_do_bit17_swizzling = obj_do_bit17_swizzling && 555 (page_to_phys(page) & (1 << 17)) != 0; 556 557 ret = shmem_pread_fast(page, shmem_page_offset, page_length, 558 user_data, page_do_bit17_swizzling, 559 needs_clflush); 560 if (ret == 0) 561 goto next_page; 562 563 hit_slowpath = 1; 564 mutex_unlock(&dev->struct_mutex); 565 566 if (!prefaulted) { 567 ret = fault_in_multipages_writeable(user_data, remain); 568 /* Userspace is tricking us, but we've already clobbered 569 * its pages with the prefault and promised to write the 570 * data up to the first fault. Hence ignore any errors 571 * and just continue. */ 572 (void)ret; 573 prefaulted = 1; 574 } 575 576 ret = shmem_pread_slow(page, shmem_page_offset, page_length, 577 user_data, page_do_bit17_swizzling, 578 needs_clflush); 579 580 mutex_lock(&dev->struct_mutex); 581 582 next_page: 583 mark_page_accessed(page); 584 585 if (ret) 586 goto out; 587 588 remain -= page_length; 589 user_data += page_length; 590 offset += page_length; 591 } 592 #endif 593 594 out: 595 i915_gem_object_unpin_pages(obj); 596 597 if (hit_slowpath) { 598 /* Fixup: Kill any reinstated backing storage pages */ 599 if (obj->madv == __I915_MADV_PURGED) 600 i915_gem_object_truncate(obj); 601 } 602 603 return ret; 604 } 605 606 /** 607 * Reads data from the object referenced by handle. 608 * 609 * On error, the contents of *data are undefined. 610 */ 611 int 612 i915_gem_pread_ioctl(struct drm_device *dev, void *data, 613 struct drm_file *file) 614 { 615 struct drm_i915_gem_pread *args = data; 616 struct drm_i915_gem_object *obj; 617 int ret = 0; 618 619 if (args->size == 0) 620 return 0; 621 622 if (!access_ok(VERIFY_WRITE, 623 (char __user *)(uintptr_t)args->data_ptr, 624 args->size)) 625 return -EFAULT; 626 627 ret = i915_mutex_lock_interruptible(dev); 628 if (ret) 629 return ret; 630 631 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 632 if (&obj->base == NULL) { 633 ret = -ENOENT; 634 goto unlock; 635 } 636 637 /* Bounds check source. */ 638 if (args->offset > obj->base.size || 639 args->size > obj->base.size - args->offset) { 640 ret = -EINVAL; 641 goto out; 642 } 643 644 #ifndef __NetBSD__ /* XXX drm prime */ 645 /* prime objects have no backing filp to GEM pread/pwrite 646 * pages from. 647 */ 648 if (!obj->base.filp) { 649 ret = -EINVAL; 650 goto out; 651 } 652 #endif 653 654 trace_i915_gem_object_pread(obj, args->offset, args->size); 655 656 ret = i915_gem_shmem_pread(dev, obj, args, file); 657 658 out: 659 drm_gem_object_unreference(&obj->base); 660 unlock: 661 mutex_unlock(&dev->struct_mutex); 662 return ret; 663 } 664 665 /* This is the fast write path which cannot handle 666 * page faults in the source data 667 */ 668 669 static inline int 670 fast_user_write(struct io_mapping *mapping, 671 loff_t page_base, int page_offset, 672 char __user *user_data, 673 int length) 674 { 675 #ifdef __NetBSD__ /* XXX atomic shmem fast path */ 676 return -EFAULT; 677 #else 678 void __iomem *vaddr_atomic; 679 void *vaddr; 680 unsigned long unwritten; 681 682 vaddr_atomic = io_mapping_map_atomic_wc(mapping, page_base); 683 /* We can use the cpu mem copy function because this is X86. */ 684 vaddr = (void __force*)vaddr_atomic + page_offset; 685 unwritten = __copy_from_user_inatomic_nocache(vaddr, 686 user_data, length); 687 io_mapping_unmap_atomic(vaddr_atomic); 688 return unwritten; 689 #endif 690 } 691 692 /** 693 * This is the fast pwrite path, where we copy the data directly from the 694 * user into the GTT, uncached. 695 */ 696 static int 697 i915_gem_gtt_pwrite_fast(struct drm_device *dev, 698 struct drm_i915_gem_object *obj, 699 struct drm_i915_gem_pwrite *args, 700 struct drm_file *file) 701 { 702 drm_i915_private_t *dev_priv = dev->dev_private; 703 ssize_t remain; 704 loff_t offset, page_base; 705 char __user *user_data; 706 int page_offset, page_length, ret; 707 708 ret = i915_gem_object_pin(obj, 0, true, true); 709 if (ret) 710 goto out; 711 712 ret = i915_gem_object_set_to_gtt_domain(obj, true); 713 if (ret) 714 goto out_unpin; 715 716 ret = i915_gem_object_put_fence(obj); 717 if (ret) 718 goto out_unpin; 719 720 user_data = (char __user *) (uintptr_t) args->data_ptr; 721 remain = args->size; 722 723 offset = obj->gtt_offset + args->offset; 724 725 while (remain > 0) { 726 /* Operation in this page 727 * 728 * page_base = page offset within aperture 729 * page_offset = offset within page 730 * page_length = bytes to copy for this page 731 */ 732 page_base = offset & PAGE_MASK; 733 page_offset = offset_in_page(offset); 734 page_length = remain; 735 if ((page_offset + remain) > PAGE_SIZE) 736 page_length = PAGE_SIZE - page_offset; 737 738 /* If we get a fault while copying data, then (presumably) our 739 * source page isn't available. Return the error and we'll 740 * retry in the slow path. 741 */ 742 if (fast_user_write(dev_priv->mm.gtt_mapping, page_base, 743 page_offset, user_data, page_length)) { 744 ret = -EFAULT; 745 goto out_unpin; 746 } 747 748 remain -= page_length; 749 user_data += page_length; 750 offset += page_length; 751 } 752 753 out_unpin: 754 i915_gem_object_unpin(obj); 755 out: 756 return ret; 757 } 758 759 /* Per-page copy function for the shmem pwrite fastpath. 760 * Flushes invalid cachelines before writing to the target if 761 * needs_clflush_before is set and flushes out any written cachelines after 762 * writing if needs_clflush is set. */ 763 static int 764 shmem_pwrite_fast(struct page *page, int shmem_page_offset, int page_length, 765 char __user *user_data, 766 bool page_do_bit17_swizzling, 767 bool needs_clflush_before, 768 bool needs_clflush_after) 769 { 770 #ifdef __NetBSD__ 771 return -EFAULT; 772 #else 773 char *vaddr; 774 int ret; 775 776 if (unlikely(page_do_bit17_swizzling)) 777 return -EINVAL; 778 779 vaddr = kmap_atomic(page); 780 if (needs_clflush_before) 781 drm_clflush_virt_range(vaddr + shmem_page_offset, 782 page_length); 783 ret = __copy_from_user_inatomic_nocache(vaddr + shmem_page_offset, 784 user_data, 785 page_length); 786 if (needs_clflush_after) 787 drm_clflush_virt_range(vaddr + shmem_page_offset, 788 page_length); 789 kunmap_atomic(vaddr); 790 791 return ret ? -EFAULT : 0; 792 #endif 793 } 794 795 /* Only difference to the fast-path function is that this can handle bit17 796 * and uses non-atomic copy and kmap functions. */ 797 static int 798 shmem_pwrite_slow(struct page *page, int shmem_page_offset, int page_length, 799 char __user *user_data, 800 bool page_do_bit17_swizzling, 801 bool needs_clflush_before, 802 bool needs_clflush_after) 803 { 804 char *vaddr; 805 int ret; 806 807 vaddr = kmap(page); 808 if (unlikely(needs_clflush_before || page_do_bit17_swizzling)) 809 shmem_clflush_swizzled_range(vaddr + shmem_page_offset, 810 page_length, 811 page_do_bit17_swizzling); 812 if (page_do_bit17_swizzling) 813 ret = __copy_from_user_swizzled(vaddr, shmem_page_offset, 814 user_data, 815 page_length); 816 else 817 ret = __copy_from_user(vaddr + shmem_page_offset, 818 user_data, 819 page_length); 820 if (needs_clflush_after) 821 shmem_clflush_swizzled_range(vaddr + shmem_page_offset, 822 page_length, 823 page_do_bit17_swizzling); 824 kunmap(page); 825 826 return ret ? -EFAULT : 0; 827 } 828 829 static int 830 i915_gem_shmem_pwrite(struct drm_device *dev, 831 struct drm_i915_gem_object *obj, 832 struct drm_i915_gem_pwrite *args, 833 struct drm_file *file) 834 { 835 ssize_t remain; 836 loff_t offset; 837 char __user *user_data; 838 int shmem_page_offset, page_length, ret = 0; 839 int obj_do_bit17_swizzling, page_do_bit17_swizzling; 840 int hit_slowpath = 0; 841 int needs_clflush_after = 0; 842 int needs_clflush_before = 0; 843 #ifndef __NetBSD__ 844 int i; 845 struct scatterlist *sg; 846 #endif 847 848 user_data = (char __user *) (uintptr_t) args->data_ptr; 849 remain = args->size; 850 851 obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj); 852 853 if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) { 854 /* If we're not in the cpu write domain, set ourself into the gtt 855 * write domain and manually flush cachelines (if required). This 856 * optimizes for the case when the gpu will use the data 857 * right away and we therefore have to clflush anyway. */ 858 if (obj->cache_level == I915_CACHE_NONE) 859 needs_clflush_after = 1; 860 if (obj->gtt_space) { 861 ret = i915_gem_object_set_to_gtt_domain(obj, true); 862 if (ret) 863 return ret; 864 } 865 } 866 /* Same trick applies for invalidate partially written cachelines before 867 * writing. */ 868 if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU) 869 && obj->cache_level == I915_CACHE_NONE) 870 needs_clflush_before = 1; 871 872 ret = i915_gem_object_get_pages(obj); 873 if (ret) 874 return ret; 875 876 i915_gem_object_pin_pages(obj); 877 878 offset = args->offset; 879 obj->dirty = 1; 880 881 #ifdef __NetBSD__ 882 while (0 < remain) { 883 /* Get the next page. */ 884 shmem_page_offset = offset_in_page(offset); 885 KASSERT(shmem_page_offset < PAGE_SIZE); 886 page_length = MIN(remain, (PAGE_SIZE - shmem_page_offset)); 887 struct page *const page = i915_gem_object_get_page(obj, 888 atop(offset)); 889 890 /* Decide whether to flush the cache or swizzle bit 17. */ 891 const bool partial_cacheline_write = needs_clflush_before && 892 ((shmem_page_offset | page_length) 893 & (cpu_info_primary.ci_cflush_lsize - 1)); 894 page_do_bit17_swizzling = obj_do_bit17_swizzling && 895 (page_to_phys(page) & (1 << 17)) != 0; 896 897 /* Try the fast path. */ 898 ret = shmem_pwrite_fast(page, shmem_page_offset, page_length, 899 user_data, page_do_bit17_swizzling, 900 partial_cacheline_write, needs_clflush_after); 901 if (ret == 0) 902 goto next_page; 903 904 /* Fast path failed. Try the slow path. */ 905 hit_slowpath = 1; 906 mutex_unlock(&dev->struct_mutex); 907 ret = shmem_pwrite_slow(page, shmem_page_offset, page_length, 908 user_data, page_do_bit17_swizzling, 909 partial_cacheline_write, needs_clflush_after); 910 mutex_lock(&dev->struct_mutex); 911 912 next_page: 913 page->p_vmp.flags &= ~PG_CLEAN; 914 /* XXX mark page accessed */ 915 if (ret) 916 goto out; 917 918 KASSERT(page_length <= remain); 919 remain -= page_length; 920 user_data += page_length; 921 offset += page_length; 922 } 923 #else 924 for_each_sg(obj->pages->sgl, sg, obj->pages->nents, i) { 925 struct page *page; 926 int partial_cacheline_write; 927 928 if (i < offset >> PAGE_SHIFT) 929 continue; 930 931 if (remain <= 0) 932 break; 933 934 /* Operation in this page 935 * 936 * shmem_page_offset = offset within page in shmem file 937 * page_length = bytes to copy for this page 938 */ 939 shmem_page_offset = offset_in_page(offset); 940 941 page_length = remain; 942 if ((shmem_page_offset + page_length) > PAGE_SIZE) 943 page_length = PAGE_SIZE - shmem_page_offset; 944 945 /* If we don't overwrite a cacheline completely we need to be 946 * careful to have up-to-date data by first clflushing. Don't 947 * overcomplicate things and flush the entire patch. */ 948 partial_cacheline_write = needs_clflush_before && 949 ((shmem_page_offset | page_length) 950 & (boot_cpu_data.x86_clflush_size - 1)); 951 952 page = sg_page(sg); 953 page_do_bit17_swizzling = obj_do_bit17_swizzling && 954 (page_to_phys(page) & (1 << 17)) != 0; 955 956 ret = shmem_pwrite_fast(page, shmem_page_offset, page_length, 957 user_data, page_do_bit17_swizzling, 958 partial_cacheline_write, 959 needs_clflush_after); 960 if (ret == 0) 961 goto next_page; 962 963 hit_slowpath = 1; 964 mutex_unlock(&dev->struct_mutex); 965 ret = shmem_pwrite_slow(page, shmem_page_offset, page_length, 966 user_data, page_do_bit17_swizzling, 967 partial_cacheline_write, 968 needs_clflush_after); 969 970 mutex_lock(&dev->struct_mutex); 971 972 next_page: 973 set_page_dirty(page); 974 mark_page_accessed(page); 975 976 if (ret) 977 goto out; 978 979 remain -= page_length; 980 user_data += page_length; 981 offset += page_length; 982 } 983 #endif 984 985 out: 986 i915_gem_object_unpin_pages(obj); 987 988 if (hit_slowpath) { 989 /* Fixup: Kill any reinstated backing storage pages */ 990 if (obj->madv == __I915_MADV_PURGED) 991 i915_gem_object_truncate(obj); 992 /* and flush dirty cachelines in case the object isn't in the cpu write 993 * domain anymore. */ 994 if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) { 995 i915_gem_clflush_object(obj); 996 i915_gem_chipset_flush(dev); 997 } 998 } 999 1000 if (needs_clflush_after) 1001 i915_gem_chipset_flush(dev); 1002 1003 return ret; 1004 } 1005 1006 /** 1007 * Writes data to the object referenced by handle. 1008 * 1009 * On error, the contents of the buffer that were to be modified are undefined. 1010 */ 1011 int 1012 i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, 1013 struct drm_file *file) 1014 { 1015 struct drm_i915_gem_pwrite *args = data; 1016 struct drm_i915_gem_object *obj; 1017 int ret; 1018 1019 if (args->size == 0) 1020 return 0; 1021 1022 if (!access_ok(VERIFY_READ, 1023 (char __user *)(uintptr_t)args->data_ptr, 1024 args->size)) 1025 return -EFAULT; 1026 1027 #ifndef __NetBSD__ /* XXX prefault */ 1028 ret = fault_in_multipages_readable((char __user *)(uintptr_t)args->data_ptr, 1029 args->size); 1030 if (ret) 1031 return -EFAULT; 1032 #endif 1033 1034 ret = i915_mutex_lock_interruptible(dev); 1035 if (ret) 1036 return ret; 1037 1038 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 1039 if (&obj->base == NULL) { 1040 ret = -ENOENT; 1041 goto unlock; 1042 } 1043 1044 /* Bounds check destination. */ 1045 if (args->offset > obj->base.size || 1046 args->size > obj->base.size - args->offset) { 1047 ret = -EINVAL; 1048 goto out; 1049 } 1050 1051 #ifndef __NetBSD__ /* XXX drm prime */ 1052 /* prime objects have no backing filp to GEM pread/pwrite 1053 * pages from. 1054 */ 1055 if (!obj->base.filp) { 1056 ret = -EINVAL; 1057 goto out; 1058 } 1059 #endif 1060 1061 trace_i915_gem_object_pwrite(obj, args->offset, args->size); 1062 1063 ret = -EFAULT; 1064 /* We can only do the GTT pwrite on untiled buffers, as otherwise 1065 * it would end up going through the fenced access, and we'll get 1066 * different detiling behavior between reading and writing. 1067 * pread/pwrite currently are reading and writing from the CPU 1068 * perspective, requiring manual detiling by the client. 1069 */ 1070 if (obj->phys_obj) { 1071 ret = i915_gem_phys_pwrite(dev, obj, args, file); 1072 goto out; 1073 } 1074 1075 if (obj->cache_level == I915_CACHE_NONE && 1076 obj->tiling_mode == I915_TILING_NONE && 1077 obj->base.write_domain != I915_GEM_DOMAIN_CPU) { 1078 ret = i915_gem_gtt_pwrite_fast(dev, obj, args, file); 1079 /* Note that the gtt paths might fail with non-page-backed user 1080 * pointers (e.g. gtt mappings when moving data between 1081 * textures). Fallback to the shmem path in that case. */ 1082 } 1083 1084 if (ret == -EFAULT || ret == -ENOSPC) 1085 ret = i915_gem_shmem_pwrite(dev, obj, args, file); 1086 1087 out: 1088 drm_gem_object_unreference(&obj->base); 1089 unlock: 1090 mutex_unlock(&dev->struct_mutex); 1091 return ret; 1092 } 1093 1094 int 1095 i915_gem_check_wedge(struct drm_i915_private *dev_priv, 1096 bool interruptible) 1097 { 1098 if (atomic_read(&dev_priv->mm.wedged)) { 1099 struct completion *x = &dev_priv->error_completion; 1100 bool recovery_complete; 1101 #ifndef __NetBSD__ 1102 unsigned long flags; 1103 #endif 1104 1105 #ifdef __NetBSD__ 1106 /* 1107 * XXX This is a horrible kludge. Reading internal 1108 * fields is no good, nor is reading them unlocked, and 1109 * neither is locking it and then unlocking it before 1110 * making a decision. 1111 */ 1112 recovery_complete = x->c_done > 0; 1113 #else 1114 /* Give the error handler a chance to run. */ 1115 spin_lock_irqsave(&x->wait.lock, flags); 1116 recovery_complete = x->done > 0; 1117 spin_unlock_irqrestore(&x->wait.lock, flags); 1118 #endif 1119 1120 /* Non-interruptible callers can't handle -EAGAIN, hence return 1121 * -EIO unconditionally for these. */ 1122 if (!interruptible) 1123 return -EIO; 1124 1125 /* Recovery complete, but still wedged means reset failure. */ 1126 if (recovery_complete) 1127 return -EIO; 1128 1129 return -EAGAIN; 1130 } 1131 1132 return 0; 1133 } 1134 1135 /* 1136 * Compare seqno against outstanding lazy request. Emit a request if they are 1137 * equal. 1138 */ 1139 static int 1140 i915_gem_check_olr(struct intel_ring_buffer *ring, u32 seqno) 1141 { 1142 int ret; 1143 1144 BUG_ON(!mutex_is_locked(&ring->dev->struct_mutex)); 1145 1146 ret = 0; 1147 if (seqno == ring->outstanding_lazy_request) 1148 ret = i915_add_request(ring, NULL, NULL); 1149 1150 return ret; 1151 } 1152 1153 /** 1154 * __wait_seqno - wait until execution of seqno has finished 1155 * @ring: the ring expected to report seqno 1156 * @seqno: duh! 1157 * @interruptible: do an interruptible wait (normally yes) 1158 * @timeout: in - how long to wait (NULL forever); out - how much time remaining 1159 * 1160 * Returns 0 if the seqno was found within the alloted time. Else returns the 1161 * errno with remaining time filled in timeout argument. 1162 */ 1163 static int __wait_seqno(struct intel_ring_buffer *ring, u32 seqno, 1164 bool interruptible, struct timespec *timeout) 1165 { 1166 drm_i915_private_t *dev_priv = ring->dev->dev_private; 1167 struct timespec before, now, wait_time={1,0}; 1168 unsigned long timeout_jiffies; 1169 long end; 1170 bool wait_forever = true; 1171 int ret; 1172 1173 if (i915_seqno_passed(ring->get_seqno(ring, true), seqno)) 1174 return 0; 1175 1176 trace_i915_gem_request_wait_begin(ring, seqno); 1177 1178 if (timeout != NULL) { 1179 wait_time = *timeout; 1180 wait_forever = false; 1181 } 1182 1183 timeout_jiffies = timespec_to_jiffies(&wait_time); 1184 1185 if (WARN_ON(!ring->irq_get(ring))) 1186 return -ENODEV; 1187 1188 /* Record current time in case interrupted by signal, or wedged * */ 1189 getrawmonotonic(&before); 1190 1191 #define EXIT_COND \ 1192 (i915_seqno_passed(ring->get_seqno(ring, false), seqno) || \ 1193 atomic_read(&dev_priv->mm.wedged)) 1194 do { 1195 #ifdef __NetBSD__ 1196 unsigned long flags; 1197 spin_lock_irqsave(&dev_priv->irq_lock, flags); 1198 if (interruptible) 1199 DRM_SPIN_TIMED_WAIT_UNTIL(end, &ring->irq_queue, 1200 &dev_priv->irq_lock, 1201 timeout_jiffies, 1202 EXIT_COND); 1203 else 1204 DRM_SPIN_TIMED_WAIT_NOINTR_UNTIL(end, &ring->irq_queue, 1205 &dev_priv->irq_lock, 1206 timeout_jiffies, 1207 EXIT_COND); 1208 spin_unlock_irqrestore(&dev_priv->irq_lock, flags); 1209 #else 1210 if (interruptible) 1211 end = wait_event_interruptible_timeout(ring->irq_queue, 1212 EXIT_COND, 1213 timeout_jiffies); 1214 else 1215 end = wait_event_timeout(ring->irq_queue, EXIT_COND, 1216 timeout_jiffies); 1217 1218 #endif 1219 ret = i915_gem_check_wedge(dev_priv, interruptible); 1220 if (ret) 1221 end = ret; 1222 } while (end == 0 && wait_forever); 1223 1224 getrawmonotonic(&now); 1225 1226 ring->irq_put(ring); 1227 trace_i915_gem_request_wait_end(ring, seqno); 1228 #undef EXIT_COND 1229 1230 if (timeout) { 1231 struct timespec sleep_time = timespec_sub(now, before); 1232 *timeout = timespec_sub(*timeout, sleep_time); 1233 } 1234 1235 switch (end) { 1236 case -EIO: 1237 case -EAGAIN: /* Wedged */ 1238 case -ERESTARTSYS: /* Signal */ 1239 case -EINTR: 1240 return (int)end; 1241 case 0: /* Timeout */ 1242 if (timeout) 1243 set_normalized_timespec(timeout, 0, 0); 1244 return -ETIME; 1245 default: /* Completed */ 1246 WARN_ON(end < 0); /* We're not aware of other errors */ 1247 return 0; 1248 } 1249 } 1250 1251 /** 1252 * Waits for a sequence number to be signaled, and cleans up the 1253 * request and object lists appropriately for that event. 1254 */ 1255 int 1256 i915_wait_seqno(struct intel_ring_buffer *ring, uint32_t seqno) 1257 { 1258 struct drm_device *dev = ring->dev; 1259 struct drm_i915_private *dev_priv = dev->dev_private; 1260 bool interruptible = dev_priv->mm.interruptible; 1261 int ret; 1262 1263 BUG_ON(!mutex_is_locked(&dev->struct_mutex)); 1264 BUG_ON(seqno == 0); 1265 1266 ret = i915_gem_check_wedge(dev_priv, interruptible); 1267 if (ret) 1268 return ret; 1269 1270 ret = i915_gem_check_olr(ring, seqno); 1271 if (ret) 1272 return ret; 1273 1274 return __wait_seqno(ring, seqno, interruptible, NULL); 1275 } 1276 1277 /** 1278 * Ensures that all rendering to the object has completed and the object is 1279 * safe to unbind from the GTT or access from the CPU. 1280 */ 1281 static __must_check int 1282 i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj, 1283 bool readonly) 1284 { 1285 struct intel_ring_buffer *ring = obj->ring; 1286 u32 seqno; 1287 int ret; 1288 1289 seqno = readonly ? obj->last_write_seqno : obj->last_read_seqno; 1290 if (seqno == 0) 1291 return 0; 1292 1293 ret = i915_wait_seqno(ring, seqno); 1294 if (ret) 1295 return ret; 1296 1297 i915_gem_retire_requests_ring(ring); 1298 1299 /* Manually manage the write flush as we may have not yet 1300 * retired the buffer. 1301 */ 1302 if (obj->last_write_seqno && 1303 i915_seqno_passed(seqno, obj->last_write_seqno)) { 1304 obj->last_write_seqno = 0; 1305 obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS; 1306 } 1307 1308 return 0; 1309 } 1310 1311 /* A nonblocking variant of the above wait. This is a highly dangerous routine 1312 * as the object state may change during this call. 1313 */ 1314 static __must_check int 1315 i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj, 1316 bool readonly) 1317 { 1318 struct drm_device *dev = obj->base.dev; 1319 struct drm_i915_private *dev_priv = dev->dev_private; 1320 struct intel_ring_buffer *ring = obj->ring; 1321 u32 seqno; 1322 int ret; 1323 1324 BUG_ON(!mutex_is_locked(&dev->struct_mutex)); 1325 BUG_ON(!dev_priv->mm.interruptible); 1326 1327 seqno = readonly ? obj->last_write_seqno : obj->last_read_seqno; 1328 if (seqno == 0) 1329 return 0; 1330 1331 ret = i915_gem_check_wedge(dev_priv, true); 1332 if (ret) 1333 return ret; 1334 1335 ret = i915_gem_check_olr(ring, seqno); 1336 if (ret) 1337 return ret; 1338 1339 mutex_unlock(&dev->struct_mutex); 1340 ret = __wait_seqno(ring, seqno, true, NULL); 1341 mutex_lock(&dev->struct_mutex); 1342 1343 i915_gem_retire_requests_ring(ring); 1344 1345 /* Manually manage the write flush as we may have not yet 1346 * retired the buffer. 1347 */ 1348 if (obj->last_write_seqno && 1349 i915_seqno_passed(seqno, obj->last_write_seqno)) { 1350 obj->last_write_seqno = 0; 1351 obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS; 1352 } 1353 1354 return ret; 1355 } 1356 1357 /** 1358 * Called when user space prepares to use an object with the CPU, either 1359 * through the mmap ioctl's mapping or a GTT mapping. 1360 */ 1361 int 1362 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, 1363 struct drm_file *file) 1364 { 1365 struct drm_i915_gem_set_domain *args = data; 1366 struct drm_i915_gem_object *obj; 1367 uint32_t read_domains = args->read_domains; 1368 uint32_t write_domain = args->write_domain; 1369 int ret; 1370 1371 /* Only handle setting domains to types used by the CPU. */ 1372 if (write_domain & I915_GEM_GPU_DOMAINS) 1373 return -EINVAL; 1374 1375 if (read_domains & I915_GEM_GPU_DOMAINS) 1376 return -EINVAL; 1377 1378 /* Having something in the write domain implies it's in the read 1379 * domain, and only that read domain. Enforce that in the request. 1380 */ 1381 if (write_domain != 0 && read_domains != write_domain) 1382 return -EINVAL; 1383 1384 ret = i915_mutex_lock_interruptible(dev); 1385 if (ret) 1386 return ret; 1387 1388 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 1389 if (&obj->base == NULL) { 1390 ret = -ENOENT; 1391 goto unlock; 1392 } 1393 1394 /* Try to flush the object off the GPU without holding the lock. 1395 * We will repeat the flush holding the lock in the normal manner 1396 * to catch cases where we are gazumped. 1397 */ 1398 ret = i915_gem_object_wait_rendering__nonblocking(obj, !write_domain); 1399 if (ret) 1400 goto unref; 1401 1402 if (read_domains & I915_GEM_DOMAIN_GTT) { 1403 ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0); 1404 1405 /* Silently promote "you're not bound, there was nothing to do" 1406 * to success, since the client was just asking us to 1407 * make sure everything was done. 1408 */ 1409 if (ret == -EINVAL) 1410 ret = 0; 1411 } else { 1412 ret = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0); 1413 } 1414 1415 unref: 1416 drm_gem_object_unreference(&obj->base); 1417 unlock: 1418 mutex_unlock(&dev->struct_mutex); 1419 return ret; 1420 } 1421 1422 /** 1423 * Called when user space has done writes to this buffer 1424 */ 1425 int 1426 i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data, 1427 struct drm_file *file) 1428 { 1429 struct drm_i915_gem_sw_finish *args = data; 1430 struct drm_i915_gem_object *obj; 1431 int ret = 0; 1432 1433 ret = i915_mutex_lock_interruptible(dev); 1434 if (ret) 1435 return ret; 1436 1437 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 1438 if (&obj->base == NULL) { 1439 ret = -ENOENT; 1440 goto unlock; 1441 } 1442 1443 /* Pinned buffers may be scanout, so flush the cache */ 1444 if (obj->pin_count) 1445 i915_gem_object_flush_cpu_write_domain(obj); 1446 1447 drm_gem_object_unreference(&obj->base); 1448 unlock: 1449 mutex_unlock(&dev->struct_mutex); 1450 return ret; 1451 } 1452 1453 /** 1454 * Maps the contents of an object, returning the address it is mapped 1455 * into. 1456 * 1457 * While the mapping holds a reference on the contents of the object, it doesn't 1458 * imply a ref on the object itself. 1459 */ 1460 int 1461 i915_gem_mmap_ioctl(struct drm_device *dev, void *data, 1462 struct drm_file *file) 1463 { 1464 struct drm_i915_gem_mmap *args = data; 1465 struct drm_gem_object *obj; 1466 unsigned long addr; 1467 #ifdef __NetBSD__ 1468 int ret; 1469 #endif 1470 1471 obj = drm_gem_object_lookup(dev, file, args->handle); 1472 if (obj == NULL) 1473 return -ENOENT; 1474 1475 #ifndef __NetBSD__ /* XXX drm prime */ 1476 /* prime objects have no backing filp to GEM mmap 1477 * pages from. 1478 */ 1479 if (!obj->filp) { 1480 drm_gem_object_unreference_unlocked(obj); 1481 return -EINVAL; 1482 } 1483 #endif 1484 1485 #ifdef __NetBSD__ 1486 addr = (*curproc->p_emul->e_vm_default_addr)(curproc, 1487 (vaddr_t)curproc->p_vmspace->vm_daddr, args->size); 1488 /* XXX errno NetBSD->Linux */ 1489 ret = -uvm_map(&curproc->p_vmspace->vm_map, &addr, args->size, 1490 obj->gemo_shm_uao, args->offset, 0, 1491 UVM_MAPFLAG((VM_PROT_READ | VM_PROT_WRITE), 1492 (VM_PROT_READ | VM_PROT_WRITE), UVM_INH_COPY, UVM_ADV_NORMAL, 1493 0)); 1494 if (ret) { 1495 drm_gem_object_unreference_unlocked(obj); 1496 return ret; 1497 } 1498 uao_reference(obj->gemo_shm_uao); 1499 drm_gem_object_unreference_unlocked(obj); 1500 #else 1501 addr = vm_mmap(obj->filp, 0, args->size, 1502 PROT_READ | PROT_WRITE, MAP_SHARED, 1503 args->offset); 1504 drm_gem_object_unreference_unlocked(obj); 1505 if (IS_ERR((void *)addr)) 1506 return addr; 1507 #endif 1508 1509 args->addr_ptr = (uint64_t) addr; 1510 1511 return 0; 1512 } 1513 1514 #ifdef __NetBSD__ /* XXX gem gtt fault */ 1515 static int i915_udv_fault(struct uvm_faultinfo *, vaddr_t, 1516 struct vm_page **, int, int, vm_prot_t, int, paddr_t); 1517 1518 int 1519 i915_gem_fault(struct uvm_faultinfo *ufi, vaddr_t vaddr, struct vm_page **pps, 1520 int npages, int centeridx, vm_prot_t access_type, int flags) 1521 { 1522 struct uvm_object *uobj = ufi->entry->object.uvm_obj; 1523 struct drm_gem_object *gem_obj = 1524 container_of(uobj, struct drm_gem_object, gemo_uvmobj); 1525 struct drm_i915_gem_object *obj = to_intel_bo(gem_obj); 1526 struct drm_device *dev = obj->base.dev; 1527 struct drm_i915_private *dev_priv = dev->dev_private; 1528 voff_t byte_offset; 1529 pgoff_t page_offset; 1530 int ret = 0; 1531 bool write = ISSET(access_type, VM_PROT_WRITE)? 1 : 0; 1532 1533 byte_offset = (ufi->entry->offset + (vaddr - ufi->entry->start)); 1534 KASSERT(byte_offset <= obj->base.size); 1535 page_offset = (byte_offset >> PAGE_SHIFT); 1536 1537 ret = i915_mutex_lock_interruptible(dev); 1538 if (ret) 1539 goto out; 1540 1541 trace_i915_gem_object_fault(obj, page_offset, true, write); 1542 1543 /* Now bind it into the GTT if needed */ 1544 ret = i915_gem_object_pin(obj, 0, true, false); 1545 if (ret) 1546 goto unlock; 1547 1548 ret = i915_gem_object_set_to_gtt_domain(obj, write); 1549 if (ret) 1550 goto unpin; 1551 1552 ret = i915_gem_object_get_fence(obj); 1553 if (ret) 1554 goto unpin; 1555 1556 obj->fault_mappable = true; 1557 1558 /* Finally, remap it using the new GTT offset */ 1559 /* XXX errno NetBSD->Linux */ 1560 ret = -i915_udv_fault(ufi, vaddr, pps, npages, centeridx, access_type, 1561 flags, (dev_priv->mm.gtt_base_addr + obj->gtt_offset)); 1562 unpin: 1563 i915_gem_object_unpin(obj); 1564 unlock: 1565 mutex_unlock(&dev->struct_mutex); 1566 out: 1567 return ret; 1568 } 1569 1570 /* 1571 * XXX i915_udv_fault is copypasta of udv_fault from uvm_device.c. 1572 * 1573 * XXX pmap_enter_default instead of pmap_enter because of a problem 1574 * with using weak aliases in kernel modules or something. 1575 */ 1576 int pmap_enter_default(pmap_t, vaddr_t, paddr_t, vm_prot_t, unsigned); 1577 1578 static int 1579 i915_udv_fault(struct uvm_faultinfo *ufi, vaddr_t vaddr, struct vm_page **pps, 1580 int npages, int centeridx, vm_prot_t access_type, int flags, 1581 paddr_t gtt_paddr) 1582 { 1583 struct vm_map_entry *entry = ufi->entry; 1584 struct uvm_object *uobj = entry->object.uvm_obj; 1585 vaddr_t curr_va; 1586 off_t curr_offset; 1587 paddr_t paddr; 1588 u_int mmapflags; 1589 int lcv, retval; 1590 vm_prot_t mapprot; 1591 UVMHIST_FUNC("i915_udv_fault"); UVMHIST_CALLED(maphist); 1592 UVMHIST_LOG(maphist," flags=%d", flags,0,0,0); 1593 1594 /* 1595 * we do not allow device mappings to be mapped copy-on-write 1596 * so we kill any attempt to do so here. 1597 */ 1598 1599 if (UVM_ET_ISCOPYONWRITE(entry)) { 1600 UVMHIST_LOG(maphist, "<- failed -- COW entry (etype=0x%x)", 1601 entry->etype, 0,0,0); 1602 uvmfault_unlockall(ufi, ufi->entry->aref.ar_amap, uobj); 1603 return(EIO); 1604 } 1605 1606 /* 1607 * now we must determine the offset in udv to use and the VA to 1608 * use for pmap_enter. note that we always use orig_map's pmap 1609 * for pmap_enter (even if we have a submap). since virtual 1610 * addresses in a submap must match the main map, this is ok. 1611 */ 1612 1613 /* udv offset = (offset from start of entry) + entry's offset */ 1614 curr_offset = entry->offset + (vaddr - entry->start); 1615 /* pmap va = vaddr (virtual address of pps[0]) */ 1616 curr_va = vaddr; 1617 1618 /* 1619 * loop over the page range entering in as needed 1620 */ 1621 1622 retval = 0; 1623 for (lcv = 0 ; lcv < npages ; lcv++, curr_offset += PAGE_SIZE, 1624 curr_va += PAGE_SIZE) { 1625 if ((flags & PGO_ALLPAGES) == 0 && lcv != centeridx) 1626 continue; 1627 1628 if (pps[lcv] == PGO_DONTCARE) 1629 continue; 1630 1631 paddr = (gtt_paddr + curr_offset); 1632 mmapflags = 0; 1633 mapprot = ufi->entry->protection; 1634 UVMHIST_LOG(maphist, 1635 " MAPPING: device: pm=0x%x, va=0x%x, pa=0x%lx, at=%d", 1636 ufi->orig_map->pmap, curr_va, paddr, mapprot); 1637 if (pmap_enter_default(ufi->orig_map->pmap, curr_va, paddr, mapprot, 1638 PMAP_CANFAIL | mapprot | mmapflags) != 0) { 1639 /* 1640 * pmap_enter() didn't have the resource to 1641 * enter this mapping. Unlock everything, 1642 * wait for the pagedaemon to free up some 1643 * pages, and then tell uvm_fault() to start 1644 * the fault again. 1645 * 1646 * XXX Needs some rethinking for the PGO_ALLPAGES 1647 * XXX case. 1648 */ 1649 pmap_update(ufi->orig_map->pmap); /* sync what we have so far */ 1650 uvmfault_unlockall(ufi, ufi->entry->aref.ar_amap, 1651 uobj); 1652 uvm_wait("i915flt"); 1653 return (ERESTART); 1654 } 1655 } 1656 1657 pmap_update(ufi->orig_map->pmap); 1658 uvmfault_unlockall(ufi, ufi->entry->aref.ar_amap, uobj); 1659 return (retval); 1660 } 1661 #else 1662 /** 1663 * i915_gem_fault - fault a page into the GTT 1664 * vma: VMA in question 1665 * vmf: fault info 1666 * 1667 * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped 1668 * from userspace. The fault handler takes care of binding the object to 1669 * the GTT (if needed), allocating and programming a fence register (again, 1670 * only if needed based on whether the old reg is still valid or the object 1671 * is tiled) and inserting a new PTE into the faulting process. 1672 * 1673 * Note that the faulting process may involve evicting existing objects 1674 * from the GTT and/or fence registers to make room. So performance may 1675 * suffer if the GTT working set is large or there are few fence registers 1676 * left. 1677 */ 1678 int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) 1679 { 1680 struct drm_i915_gem_object *obj = to_intel_bo(vma->vm_private_data); 1681 struct drm_device *dev = obj->base.dev; 1682 drm_i915_private_t *dev_priv = dev->dev_private; 1683 pgoff_t page_offset; 1684 unsigned long pfn; 1685 int ret = 0; 1686 bool write = !!(vmf->flags & FAULT_FLAG_WRITE); 1687 1688 /* We don't use vmf->pgoff since that has the fake offset */ 1689 page_offset = ((unsigned long)vmf->virtual_address - vma->vm_start) >> 1690 PAGE_SHIFT; 1691 1692 ret = i915_mutex_lock_interruptible(dev); 1693 if (ret) 1694 goto out; 1695 1696 trace_i915_gem_object_fault(obj, page_offset, true, write); 1697 1698 /* Now bind it into the GTT if needed */ 1699 ret = i915_gem_object_pin(obj, 0, true, false); 1700 if (ret) 1701 goto unlock; 1702 1703 ret = i915_gem_object_set_to_gtt_domain(obj, write); 1704 if (ret) 1705 goto unpin; 1706 1707 ret = i915_gem_object_get_fence(obj); 1708 if (ret) 1709 goto unpin; 1710 1711 obj->fault_mappable = true; 1712 1713 pfn = ((dev_priv->mm.gtt_base_addr + obj->gtt_offset) >> PAGE_SHIFT) + 1714 page_offset; 1715 1716 /* Finally, remap it using the new GTT offset */ 1717 ret = vm_insert_pfn(vma, (unsigned long)vmf->virtual_address, pfn); 1718 unpin: 1719 i915_gem_object_unpin(obj); 1720 unlock: 1721 mutex_unlock(&dev->struct_mutex); 1722 out: 1723 switch (ret) { 1724 case -EIO: 1725 /* If this -EIO is due to a gpu hang, give the reset code a 1726 * chance to clean up the mess. Otherwise return the proper 1727 * SIGBUS. */ 1728 if (!atomic_read(&dev_priv->mm.wedged)) 1729 return VM_FAULT_SIGBUS; 1730 case -EAGAIN: 1731 /* Give the error handler a chance to run and move the 1732 * objects off the GPU active list. Next time we service the 1733 * fault, we should be able to transition the page into the 1734 * GTT without touching the GPU (and so avoid further 1735 * EIO/EGAIN). If the GPU is wedged, then there is no issue 1736 * with coherency, just lost writes. 1737 */ 1738 set_need_resched(); 1739 case 0: 1740 case -ERESTARTSYS: 1741 case -EINTR: 1742 case -EBUSY: 1743 /* 1744 * EBUSY is ok: this just means that another thread 1745 * already did the job. 1746 */ 1747 return VM_FAULT_NOPAGE; 1748 case -ENOMEM: 1749 return VM_FAULT_OOM; 1750 case -ENOSPC: 1751 return VM_FAULT_SIGBUS; 1752 default: 1753 WARN_ONCE(ret, "unhandled error in i915_gem_fault: %i\n", ret); 1754 return VM_FAULT_SIGBUS; 1755 } 1756 } 1757 #endif 1758 1759 /** 1760 * i915_gem_release_mmap - remove physical page mappings 1761 * @obj: obj in question 1762 * 1763 * Preserve the reservation of the mmapping with the DRM core code, but 1764 * relinquish ownership of the pages back to the system. 1765 * 1766 * It is vital that we remove the page mapping if we have mapped a tiled 1767 * object through the GTT and then lose the fence register due to 1768 * resource pressure. Similarly if the object has been moved out of the 1769 * aperture, than pages mapped into userspace must be revoked. Removing the 1770 * mapping will then trigger a page fault on the next user access, allowing 1771 * fixup by i915_gem_fault(). 1772 */ 1773 void 1774 i915_gem_release_mmap(struct drm_i915_gem_object *obj) 1775 { 1776 if (!obj->fault_mappable) 1777 return; 1778 1779 #ifdef __NetBSD__ /* XXX gem gtt fault */ 1780 { 1781 struct vm_page *page; 1782 1783 mutex_enter(obj->base.gemo_shm_uao->vmobjlock); 1784 KASSERT(obj->pages != NULL); 1785 /* Force a fresh fault for each page. */ 1786 TAILQ_FOREACH(page, &obj->igo_pageq, pageq.queue) 1787 pmap_page_protect(page, VM_PROT_NONE); 1788 mutex_exit(obj->base.gemo_shm_uao->vmobjlock); 1789 } 1790 #else 1791 if (obj->base.dev->dev_mapping) 1792 unmap_mapping_range(obj->base.dev->dev_mapping, 1793 (loff_t)obj->base.map_list.hash.key<<PAGE_SHIFT, 1794 obj->base.size, 1); 1795 #endif 1796 1797 obj->fault_mappable = false; 1798 } 1799 1800 static uint32_t 1801 i915_gem_get_gtt_size(struct drm_device *dev, uint32_t size, int tiling_mode) 1802 { 1803 uint32_t gtt_size; 1804 1805 if (INTEL_INFO(dev)->gen >= 4 || 1806 tiling_mode == I915_TILING_NONE) 1807 return size; 1808 1809 /* Previous chips need a power-of-two fence region when tiling */ 1810 if (INTEL_INFO(dev)->gen == 3) 1811 gtt_size = 1024*1024; 1812 else 1813 gtt_size = 512*1024; 1814 1815 while (gtt_size < size) 1816 gtt_size <<= 1; 1817 1818 return gtt_size; 1819 } 1820 1821 /** 1822 * i915_gem_get_gtt_alignment - return required GTT alignment for an object 1823 * @obj: object to check 1824 * 1825 * Return the required GTT alignment for an object, taking into account 1826 * potential fence register mapping. 1827 */ 1828 static uint32_t 1829 i915_gem_get_gtt_alignment(struct drm_device *dev, 1830 uint32_t size, 1831 int tiling_mode) 1832 { 1833 /* 1834 * Minimum alignment is 4k (GTT page size), but might be greater 1835 * if a fence register is needed for the object. 1836 */ 1837 if (INTEL_INFO(dev)->gen >= 4 || 1838 tiling_mode == I915_TILING_NONE) 1839 return 4096; 1840 1841 /* 1842 * Previous chips need to be aligned to the size of the smallest 1843 * fence register that can contain the object. 1844 */ 1845 return i915_gem_get_gtt_size(dev, size, tiling_mode); 1846 } 1847 1848 /** 1849 * i915_gem_get_unfenced_gtt_alignment - return required GTT alignment for an 1850 * unfenced object 1851 * @dev: the device 1852 * @size: size of the object 1853 * @tiling_mode: tiling mode of the object 1854 * 1855 * Return the required GTT alignment for an object, only taking into account 1856 * unfenced tiled surface requirements. 1857 */ 1858 uint32_t 1859 i915_gem_get_unfenced_gtt_alignment(struct drm_device *dev, 1860 uint32_t size, 1861 int tiling_mode) 1862 { 1863 /* 1864 * Minimum alignment is 4k (GTT page size) for sane hw. 1865 */ 1866 if (INTEL_INFO(dev)->gen >= 4 || IS_G33(dev) || 1867 tiling_mode == I915_TILING_NONE) 1868 return 4096; 1869 1870 /* Previous hardware however needs to be aligned to a power-of-two 1871 * tile height. The simplest method for determining this is to reuse 1872 * the power-of-tile object size. 1873 */ 1874 return i915_gem_get_gtt_size(dev, size, tiling_mode); 1875 } 1876 1877 static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj) 1878 { 1879 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 1880 int ret; 1881 1882 if (obj->base.map_list.map) 1883 return 0; 1884 1885 dev_priv->mm.shrinker_no_lock_stealing = true; 1886 1887 ret = drm_gem_create_mmap_offset(&obj->base); 1888 if (ret != -ENOSPC) 1889 goto out; 1890 1891 /* Badly fragmented mmap space? The only way we can recover 1892 * space is by destroying unwanted objects. We can't randomly release 1893 * mmap_offsets as userspace expects them to be persistent for the 1894 * lifetime of the objects. The closest we can is to release the 1895 * offsets on purgeable objects by truncating it and marking it purged, 1896 * which prevents userspace from ever using that object again. 1897 */ 1898 i915_gem_purge(dev_priv, obj->base.size >> PAGE_SHIFT); 1899 ret = drm_gem_create_mmap_offset(&obj->base); 1900 if (ret != -ENOSPC) 1901 goto out; 1902 1903 i915_gem_shrink_all(dev_priv); 1904 ret = drm_gem_create_mmap_offset(&obj->base); 1905 out: 1906 dev_priv->mm.shrinker_no_lock_stealing = false; 1907 1908 return ret; 1909 } 1910 1911 static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj) 1912 { 1913 if (!obj->base.map_list.map) 1914 return; 1915 1916 drm_gem_free_mmap_offset(&obj->base); 1917 } 1918 1919 int 1920 i915_gem_mmap_gtt(struct drm_file *file, 1921 struct drm_device *dev, 1922 uint32_t handle, 1923 uint64_t *offset) 1924 { 1925 struct drm_i915_private *dev_priv = dev->dev_private; 1926 struct drm_i915_gem_object *obj; 1927 int ret; 1928 1929 ret = i915_mutex_lock_interruptible(dev); 1930 if (ret) 1931 return ret; 1932 1933 obj = to_intel_bo(drm_gem_object_lookup(dev, file, handle)); 1934 if (&obj->base == NULL) { 1935 ret = -ENOENT; 1936 goto unlock; 1937 } 1938 1939 if (obj->base.size > dev_priv->mm.gtt_mappable_end) { 1940 ret = -E2BIG; 1941 goto out; 1942 } 1943 1944 if (obj->madv != I915_MADV_WILLNEED) { 1945 DRM_ERROR("Attempting to mmap a purgeable buffer\n"); 1946 ret = -EINVAL; 1947 goto out; 1948 } 1949 1950 ret = i915_gem_object_create_mmap_offset(obj); 1951 if (ret) 1952 goto out; 1953 1954 *offset = (u64)obj->base.map_list.hash.key << PAGE_SHIFT; 1955 1956 out: 1957 drm_gem_object_unreference(&obj->base); 1958 unlock: 1959 mutex_unlock(&dev->struct_mutex); 1960 return ret; 1961 } 1962 1963 /** 1964 * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing 1965 * @dev: DRM device 1966 * @data: GTT mapping ioctl data 1967 * @file: GEM object info 1968 * 1969 * Simply returns the fake offset to userspace so it can mmap it. 1970 * The mmap call will end up in drm_gem_mmap(), which will set things 1971 * up so we can get faults in the handler above. 1972 * 1973 * The fault handler will take care of binding the object into the GTT 1974 * (since it may have been evicted to make room for something), allocating 1975 * a fence register, and mapping the appropriate aperture address into 1976 * userspace. 1977 */ 1978 int 1979 i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data, 1980 struct drm_file *file) 1981 { 1982 struct drm_i915_gem_mmap_gtt *args = data; 1983 1984 return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset); 1985 } 1986 1987 /* Immediately discard the backing storage */ 1988 static void 1989 i915_gem_object_truncate(struct drm_i915_gem_object *obj) 1990 { 1991 #ifndef __NetBSD__ 1992 struct inode *inode; 1993 #endif 1994 1995 i915_gem_object_free_mmap_offset(obj); 1996 1997 #ifdef __NetBSD__ 1998 { 1999 struct uvm_object *const uobj = obj->base.gemo_shm_uao; 2000 2001 if (uobj != NULL) { 2002 /* XXX Calling pgo_put like this is bogus. */ 2003 mutex_enter(uobj->vmobjlock); 2004 (*uobj->pgops->pgo_put)(uobj, 0, obj->base.size, 2005 (PGO_ALLPAGES | PGO_FREE)); 2006 } 2007 } 2008 #else 2009 if (obj->base.filp == NULL) 2010 return; 2011 2012 /* Our goal here is to return as much of the memory as 2013 * is possible back to the system as we are called from OOM. 2014 * To do this we must instruct the shmfs to drop all of its 2015 * backing pages, *now*. 2016 */ 2017 inode = obj->base.filp->f_path.dentry->d_inode; 2018 shmem_truncate_range(inode, 0, (loff_t)-1); 2019 #endif 2020 2021 obj->madv = __I915_MADV_PURGED; 2022 } 2023 2024 static inline int 2025 i915_gem_object_is_purgeable(struct drm_i915_gem_object *obj) 2026 { 2027 return obj->madv == I915_MADV_DONTNEED; 2028 } 2029 2030 #ifdef __NetBSD__ 2031 static void 2032 i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj) 2033 { 2034 struct drm_device *const dev = obj->base.dev; 2035 int ret; 2036 2037 /* XXX Cargo-culted from the Linux code. */ 2038 BUG_ON(obj->madv == __I915_MADV_PURGED); 2039 2040 ret = i915_gem_object_set_to_cpu_domain(obj, true); 2041 if (ret) { 2042 WARN_ON(ret != -EIO); 2043 i915_gem_clflush_object(obj); 2044 obj->base.read_domains = obj->base.write_domain = 2045 I915_GEM_DOMAIN_CPU; 2046 } 2047 2048 if (i915_gem_object_needs_bit17_swizzle(obj)) 2049 i915_gem_object_save_bit_17_swizzle(obj); 2050 2051 /* XXX Maintain dirty flag? */ 2052 2053 bus_dmamap_destroy(dev->dmat, obj->igo_dmamap); 2054 bus_dmamem_unwire_uvm_object(dev->dmat, obj->base.gemo_shm_uao, 0, 2055 obj->base.size, obj->pages, obj->igo_nsegs); 2056 2057 kfree(obj->pages); 2058 } 2059 #else 2060 static void 2061 i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj) 2062 { 2063 int page_count = obj->base.size / PAGE_SIZE; 2064 struct scatterlist *sg; 2065 int ret, i; 2066 2067 BUG_ON(obj->madv == __I915_MADV_PURGED); 2068 2069 ret = i915_gem_object_set_to_cpu_domain(obj, true); 2070 if (ret) { 2071 /* In the event of a disaster, abandon all caches and 2072 * hope for the best. 2073 */ 2074 WARN_ON(ret != -EIO); 2075 i915_gem_clflush_object(obj); 2076 obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU; 2077 } 2078 2079 if (i915_gem_object_needs_bit17_swizzle(obj)) 2080 i915_gem_object_save_bit_17_swizzle(obj); 2081 2082 if (obj->madv == I915_MADV_DONTNEED) 2083 obj->dirty = 0; 2084 2085 for_each_sg(obj->pages->sgl, sg, page_count, i) { 2086 struct page *page = sg_page(sg); 2087 2088 if (obj->dirty) 2089 set_page_dirty(page); 2090 2091 if (obj->madv == I915_MADV_WILLNEED) 2092 mark_page_accessed(page); 2093 2094 page_cache_release(page); 2095 } 2096 obj->dirty = 0; 2097 2098 sg_free_table(obj->pages); 2099 kfree(obj->pages); 2100 } 2101 #endif 2102 2103 static int 2104 i915_gem_object_put_pages(struct drm_i915_gem_object *obj) 2105 { 2106 const struct drm_i915_gem_object_ops *ops = obj->ops; 2107 2108 if (obj->pages == NULL) 2109 return 0; 2110 2111 BUG_ON(obj->gtt_space); 2112 2113 if (obj->pages_pin_count) 2114 return -EBUSY; 2115 2116 /* ->put_pages might need to allocate memory for the bit17 swizzle 2117 * array, hence protect them from being reaped by removing them from gtt 2118 * lists early. */ 2119 list_del(&obj->gtt_list); 2120 2121 ops->put_pages(obj); 2122 obj->pages = NULL; 2123 2124 if (i915_gem_object_is_purgeable(obj)) 2125 i915_gem_object_truncate(obj); 2126 2127 return 0; 2128 } 2129 2130 static long 2131 __i915_gem_shrink(struct drm_i915_private *dev_priv, long target, 2132 bool purgeable_only) 2133 { 2134 struct drm_i915_gem_object *obj, *next; 2135 long count = 0; 2136 2137 list_for_each_entry_safe(obj, next, 2138 &dev_priv->mm.unbound_list, 2139 gtt_list) { 2140 if ((i915_gem_object_is_purgeable(obj) || !purgeable_only) && 2141 i915_gem_object_put_pages(obj) == 0) { 2142 count += obj->base.size >> PAGE_SHIFT; 2143 if (count >= target) 2144 return count; 2145 } 2146 } 2147 2148 list_for_each_entry_safe(obj, next, 2149 &dev_priv->mm.inactive_list, 2150 mm_list) { 2151 if ((i915_gem_object_is_purgeable(obj) || !purgeable_only) && 2152 i915_gem_object_unbind(obj) == 0 && 2153 i915_gem_object_put_pages(obj) == 0) { 2154 count += obj->base.size >> PAGE_SHIFT; 2155 if (count >= target) 2156 return count; 2157 } 2158 } 2159 2160 return count; 2161 } 2162 2163 static long 2164 i915_gem_purge(struct drm_i915_private *dev_priv, long target) 2165 { 2166 return __i915_gem_shrink(dev_priv, target, true); 2167 } 2168 2169 static void 2170 i915_gem_shrink_all(struct drm_i915_private *dev_priv) 2171 { 2172 struct drm_i915_gem_object *obj, *next; 2173 2174 i915_gem_evict_everything(dev_priv->dev); 2175 2176 list_for_each_entry_safe(obj, next, &dev_priv->mm.unbound_list, gtt_list) 2177 i915_gem_object_put_pages(obj); 2178 } 2179 2180 #ifdef __NetBSD__ 2181 static int 2182 i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) 2183 { 2184 struct drm_device *const dev = obj->base.dev; 2185 struct vm_page *page; 2186 int error; 2187 2188 /* XXX Cargo-culted from the Linux code. */ 2189 BUG_ON(obj->base.read_domains & I915_GEM_GPU_DOMAINS); 2190 BUG_ON(obj->base.write_domain & I915_GEM_GPU_DOMAINS); 2191 2192 KASSERT(obj->pages == NULL); 2193 TAILQ_INIT(&obj->igo_pageq); 2194 obj->pages = kcalloc((obj->base.size / PAGE_SIZE), 2195 sizeof(obj->pages[0]), GFP_KERNEL); 2196 if (obj->pages == NULL) { 2197 error = -ENOMEM; 2198 goto fail0; 2199 } 2200 2201 /* XXX errno NetBSD->Linux */ 2202 error = -bus_dmamem_wire_uvm_object(dev->dmat, obj->base.gemo_shm_uao, 2203 0, obj->base.size, &obj->igo_pageq, PAGE_SIZE, 0, obj->pages, 2204 (obj->base.size / PAGE_SIZE), &obj->igo_nsegs, BUS_DMA_NOWAIT); 2205 if (error) 2206 /* XXX Try i915_gem_purge, i915_gem_shrink_all. */ 2207 goto fail1; 2208 KASSERT(0 < obj->igo_nsegs); 2209 KASSERT(obj->igo_nsegs <= (obj->base.size / PAGE_SIZE)); 2210 2211 /* 2212 * Check that the paddrs will fit in 40 bits, or 32 bits on i965. 2213 * 2214 * XXX This is wrong; we ought to pass this constraint to 2215 * bus_dmamem_wire_uvm_object instead. 2216 */ 2217 TAILQ_FOREACH(page, &obj->igo_pageq, pageq.queue) { 2218 const uint64_t mask = 2219 (IS_BROADWATER(dev) || IS_CRESTLINE(dev)? 2220 0xffffffffULL : 0xffffffffffULL); 2221 if (VM_PAGE_TO_PHYS(page) & ~mask) { 2222 DRM_ERROR("GEM physical address exceeds %u bits" 2223 ": %"PRIxMAX"\n", 2224 popcount64(mask), 2225 (uintmax_t)VM_PAGE_TO_PHYS(page)); 2226 error = -EIO; 2227 goto fail2; 2228 } 2229 } 2230 2231 /* XXX errno NetBSD->Linux */ 2232 error = -bus_dmamap_create(dev->dmat, obj->base.size, obj->igo_nsegs, 2233 PAGE_SIZE, 0, BUS_DMA_NOWAIT, &obj->igo_dmamap); 2234 if (error) 2235 goto fail2; 2236 2237 /* XXX Cargo-culted from the Linux code. */ 2238 if (i915_gem_object_needs_bit17_swizzle(obj)) 2239 i915_gem_object_do_bit_17_swizzle(obj); 2240 2241 /* Success! */ 2242 return 0; 2243 2244 fail2: bus_dmamem_unwire_uvm_object(dev->dmat, obj->base.gemo_shm_uao, 0, 2245 obj->base.size, obj->pages, (obj->base.size / PAGE_SIZE)); 2246 fail1: kfree(obj->pages); 2247 obj->pages = NULL; 2248 fail0: KASSERT(error); 2249 return error; 2250 } 2251 #else 2252 static int 2253 i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) 2254 { 2255 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 2256 int page_count, i; 2257 struct address_space *mapping; 2258 struct sg_table *st; 2259 struct scatterlist *sg; 2260 struct page *page; 2261 gfp_t gfp; 2262 2263 /* Assert that the object is not currently in any GPU domain. As it 2264 * wasn't in the GTT, there shouldn't be any way it could have been in 2265 * a GPU cache 2266 */ 2267 BUG_ON(obj->base.read_domains & I915_GEM_GPU_DOMAINS); 2268 BUG_ON(obj->base.write_domain & I915_GEM_GPU_DOMAINS); 2269 2270 st = kmalloc(sizeof(*st), GFP_KERNEL); 2271 if (st == NULL) 2272 return -ENOMEM; 2273 2274 page_count = obj->base.size / PAGE_SIZE; 2275 if (sg_alloc_table(st, page_count, GFP_KERNEL)) { 2276 sg_free_table(st); 2277 kfree(st); 2278 return -ENOMEM; 2279 } 2280 2281 /* Get the list of pages out of our struct file. They'll be pinned 2282 * at this point until we release them. 2283 * 2284 * Fail silently without starting the shrinker 2285 */ 2286 mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping; 2287 gfp = mapping_gfp_mask(mapping); 2288 gfp |= __GFP_NORETRY | __GFP_NOWARN | __GFP_NO_KSWAPD; 2289 gfp &= ~(__GFP_IO | __GFP_WAIT); 2290 for_each_sg(st->sgl, sg, page_count, i) { 2291 page = shmem_read_mapping_page_gfp(mapping, i, gfp); 2292 if (IS_ERR(page)) { 2293 i915_gem_purge(dev_priv, page_count); 2294 page = shmem_read_mapping_page_gfp(mapping, i, gfp); 2295 } 2296 if (IS_ERR(page)) { 2297 /* We've tried hard to allocate the memory by reaping 2298 * our own buffer, now let the real VM do its job and 2299 * go down in flames if truly OOM. 2300 */ 2301 gfp &= ~(__GFP_NORETRY | __GFP_NOWARN | __GFP_NO_KSWAPD); 2302 gfp |= __GFP_IO | __GFP_WAIT; 2303 2304 i915_gem_shrink_all(dev_priv); 2305 page = shmem_read_mapping_page_gfp(mapping, i, gfp); 2306 if (IS_ERR(page)) 2307 goto err_pages; 2308 2309 gfp |= __GFP_NORETRY | __GFP_NOWARN | __GFP_NO_KSWAPD; 2310 gfp &= ~(__GFP_IO | __GFP_WAIT); 2311 } 2312 2313 sg_set_page(sg, page, PAGE_SIZE, 0); 2314 } 2315 2316 obj->pages = st; 2317 2318 if (i915_gem_object_needs_bit17_swizzle(obj)) 2319 i915_gem_object_do_bit_17_swizzle(obj); 2320 2321 return 0; 2322 2323 err_pages: 2324 for_each_sg(st->sgl, sg, i, page_count) 2325 page_cache_release(sg_page(sg)); 2326 sg_free_table(st); 2327 kfree(st); 2328 return PTR_ERR(page); 2329 } 2330 #endif 2331 2332 /* Ensure that the associated pages are gathered from the backing storage 2333 * and pinned into our object. i915_gem_object_get_pages() may be called 2334 * multiple times before they are released by a single call to 2335 * i915_gem_object_put_pages() - once the pages are no longer referenced 2336 * either as a result of memory pressure (reaping pages under the shrinker) 2337 * or as the object is itself released. 2338 */ 2339 int 2340 i915_gem_object_get_pages(struct drm_i915_gem_object *obj) 2341 { 2342 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 2343 const struct drm_i915_gem_object_ops *ops = obj->ops; 2344 int ret; 2345 2346 if (obj->pages) 2347 return 0; 2348 2349 BUG_ON(obj->pages_pin_count); 2350 2351 ret = ops->get_pages(obj); 2352 if (ret) 2353 return ret; 2354 2355 list_add_tail(&obj->gtt_list, &dev_priv->mm.unbound_list); 2356 return 0; 2357 } 2358 2359 void 2360 i915_gem_object_move_to_active(struct drm_i915_gem_object *obj, 2361 struct intel_ring_buffer *ring) 2362 { 2363 struct drm_device *dev = obj->base.dev; 2364 struct drm_i915_private *dev_priv = dev->dev_private; 2365 u32 seqno = intel_ring_get_seqno(ring); 2366 2367 BUG_ON(ring == NULL); 2368 obj->ring = ring; 2369 2370 /* Add a reference if we're newly entering the active list. */ 2371 if (!obj->active) { 2372 drm_gem_object_reference(&obj->base); 2373 obj->active = 1; 2374 } 2375 2376 /* Move from whatever list we were on to the tail of execution. */ 2377 list_move_tail(&obj->mm_list, &dev_priv->mm.active_list); 2378 list_move_tail(&obj->ring_list, &ring->active_list); 2379 2380 obj->last_read_seqno = seqno; 2381 2382 if (obj->fenced_gpu_access) { 2383 obj->last_fenced_seqno = seqno; 2384 2385 /* Bump MRU to take account of the delayed flush */ 2386 if (obj->fence_reg != I915_FENCE_REG_NONE) { 2387 struct drm_i915_fence_reg *reg; 2388 2389 reg = &dev_priv->fence_regs[obj->fence_reg]; 2390 list_move_tail(®->lru_list, 2391 &dev_priv->mm.fence_list); 2392 } 2393 } 2394 } 2395 2396 static void 2397 i915_gem_object_move_to_inactive(struct drm_i915_gem_object *obj) 2398 { 2399 struct drm_device *dev = obj->base.dev; 2400 struct drm_i915_private *dev_priv = dev->dev_private; 2401 2402 BUG_ON(obj->base.write_domain & ~I915_GEM_GPU_DOMAINS); 2403 BUG_ON(!obj->active); 2404 2405 if (obj->pin_count) /* are we a framebuffer? */ 2406 intel_mark_fb_idle(obj); 2407 2408 list_move_tail(&obj->mm_list, &dev_priv->mm.inactive_list); 2409 2410 list_del_init(&obj->ring_list); 2411 obj->ring = NULL; 2412 2413 obj->last_read_seqno = 0; 2414 obj->last_write_seqno = 0; 2415 obj->base.write_domain = 0; 2416 2417 obj->last_fenced_seqno = 0; 2418 obj->fenced_gpu_access = false; 2419 2420 obj->active = 0; 2421 drm_gem_object_unreference(&obj->base); 2422 2423 WARN_ON(i915_verify_lists(dev)); 2424 } 2425 2426 static int 2427 i915_gem_handle_seqno_wrap(struct drm_device *dev) 2428 { 2429 struct drm_i915_private *dev_priv = dev->dev_private; 2430 struct intel_ring_buffer *ring; 2431 int ret, i, j; 2432 2433 /* The hardware uses various monotonic 32-bit counters, if we 2434 * detect that they will wraparound we need to idle the GPU 2435 * and reset those counters. 2436 */ 2437 ret = 0; 2438 for_each_ring(ring, dev_priv, i) { 2439 for (j = 0; j < ARRAY_SIZE(ring->sync_seqno); j++) 2440 ret |= ring->sync_seqno[j] != 0; 2441 } 2442 if (ret == 0) 2443 return ret; 2444 2445 ret = i915_gpu_idle(dev); 2446 if (ret) 2447 return ret; 2448 2449 i915_gem_retire_requests(dev); 2450 for_each_ring(ring, dev_priv, i) { 2451 for (j = 0; j < ARRAY_SIZE(ring->sync_seqno); j++) 2452 ring->sync_seqno[j] = 0; 2453 } 2454 2455 return 0; 2456 } 2457 2458 int 2459 i915_gem_get_seqno(struct drm_device *dev, u32 *seqno) 2460 { 2461 struct drm_i915_private *dev_priv = dev->dev_private; 2462 2463 /* reserve 0 for non-seqno */ 2464 if (dev_priv->next_seqno == 0) { 2465 int ret = i915_gem_handle_seqno_wrap(dev); 2466 if (ret) 2467 return ret; 2468 2469 dev_priv->next_seqno = 1; 2470 } 2471 2472 *seqno = dev_priv->next_seqno++; 2473 return 0; 2474 } 2475 2476 int 2477 i915_add_request(struct intel_ring_buffer *ring, 2478 struct drm_file *file, 2479 u32 *out_seqno) 2480 { 2481 drm_i915_private_t *dev_priv = ring->dev->dev_private; 2482 struct drm_i915_gem_request *request; 2483 u32 request_ring_position; 2484 int was_empty; 2485 int ret; 2486 2487 /* 2488 * Emit any outstanding flushes - execbuf can fail to emit the flush 2489 * after having emitted the batchbuffer command. Hence we need to fix 2490 * things up similar to emitting the lazy request. The difference here 2491 * is that the flush _must_ happen before the next request, no matter 2492 * what. 2493 */ 2494 ret = intel_ring_flush_all_caches(ring); 2495 if (ret) 2496 return ret; 2497 2498 request = kmalloc(sizeof(*request), GFP_KERNEL); 2499 if (request == NULL) 2500 return -ENOMEM; 2501 2502 2503 /* Record the position of the start of the request so that 2504 * should we detect the updated seqno part-way through the 2505 * GPU processing the request, we never over-estimate the 2506 * position of the head. 2507 */ 2508 request_ring_position = intel_ring_get_tail(ring); 2509 2510 ret = ring->add_request(ring); 2511 if (ret) { 2512 kfree(request); 2513 return ret; 2514 } 2515 2516 request->seqno = intel_ring_get_seqno(ring); 2517 request->ring = ring; 2518 request->tail = request_ring_position; 2519 request->emitted_jiffies = jiffies; 2520 was_empty = list_empty(&ring->request_list); 2521 list_add_tail(&request->list, &ring->request_list); 2522 request->file_priv = NULL; 2523 2524 if (file) { 2525 struct drm_i915_file_private *file_priv = file->driver_priv; 2526 2527 spin_lock(&file_priv->mm.lock); 2528 request->file_priv = file_priv; 2529 list_add_tail(&request->client_list, 2530 &file_priv->mm.request_list); 2531 spin_unlock(&file_priv->mm.lock); 2532 } 2533 2534 trace_i915_gem_request_add(ring, request->seqno); 2535 ring->outstanding_lazy_request = 0; 2536 2537 if (!dev_priv->mm.suspended) { 2538 if (i915_enable_hangcheck) { 2539 mod_timer(&dev_priv->hangcheck_timer, 2540 round_jiffies_up(jiffies + DRM_I915_HANGCHECK_JIFFIES)); 2541 } 2542 if (was_empty) { 2543 queue_delayed_work(dev_priv->wq, 2544 &dev_priv->mm.retire_work, 2545 round_jiffies_up_relative(HZ)); 2546 intel_mark_busy(dev_priv->dev); 2547 } 2548 } 2549 2550 if (out_seqno) 2551 *out_seqno = request->seqno; 2552 return 0; 2553 } 2554 2555 static inline void 2556 i915_gem_request_remove_from_client(struct drm_i915_gem_request *request) 2557 { 2558 struct drm_i915_file_private *file_priv = request->file_priv; 2559 2560 if (!file_priv) 2561 return; 2562 2563 spin_lock(&file_priv->mm.lock); 2564 if (request->file_priv) { 2565 list_del(&request->client_list); 2566 request->file_priv = NULL; 2567 } 2568 spin_unlock(&file_priv->mm.lock); 2569 } 2570 2571 static void i915_gem_reset_ring_lists(struct drm_i915_private *dev_priv, 2572 struct intel_ring_buffer *ring) 2573 { 2574 while (!list_empty(&ring->request_list)) { 2575 struct drm_i915_gem_request *request; 2576 2577 request = list_first_entry(&ring->request_list, 2578 struct drm_i915_gem_request, 2579 list); 2580 2581 list_del(&request->list); 2582 i915_gem_request_remove_from_client(request); 2583 kfree(request); 2584 } 2585 2586 while (!list_empty(&ring->active_list)) { 2587 struct drm_i915_gem_object *obj; 2588 2589 obj = list_first_entry(&ring->active_list, 2590 struct drm_i915_gem_object, 2591 ring_list); 2592 2593 i915_gem_object_move_to_inactive(obj); 2594 } 2595 } 2596 2597 static void i915_gem_reset_fences(struct drm_device *dev) 2598 { 2599 struct drm_i915_private *dev_priv = dev->dev_private; 2600 int i; 2601 2602 for (i = 0; i < dev_priv->num_fence_regs; i++) { 2603 struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i]; 2604 2605 i915_gem_write_fence(dev, i, NULL); 2606 2607 if (reg->obj) 2608 i915_gem_object_fence_lost(reg->obj); 2609 2610 reg->pin_count = 0; 2611 reg->obj = NULL; 2612 INIT_LIST_HEAD(®->lru_list); 2613 } 2614 2615 INIT_LIST_HEAD(&dev_priv->mm.fence_list); 2616 } 2617 2618 void i915_gem_reset(struct drm_device *dev) 2619 { 2620 struct drm_i915_private *dev_priv = dev->dev_private; 2621 struct drm_i915_gem_object *obj; 2622 struct intel_ring_buffer *ring; 2623 int i; 2624 2625 for_each_ring(ring, dev_priv, i) 2626 i915_gem_reset_ring_lists(dev_priv, ring); 2627 2628 /* Move everything out of the GPU domains to ensure we do any 2629 * necessary invalidation upon reuse. 2630 */ 2631 list_for_each_entry(obj, 2632 &dev_priv->mm.inactive_list, 2633 mm_list) 2634 { 2635 obj->base.read_domains &= ~I915_GEM_GPU_DOMAINS; 2636 } 2637 2638 /* The fence registers are invalidated so clear them out */ 2639 i915_gem_reset_fences(dev); 2640 } 2641 2642 /** 2643 * This function clears the request list as sequence numbers are passed. 2644 */ 2645 void 2646 i915_gem_retire_requests_ring(struct intel_ring_buffer *ring) 2647 { 2648 uint32_t seqno; 2649 2650 if (list_empty(&ring->request_list)) 2651 return; 2652 2653 WARN_ON(i915_verify_lists(ring->dev)); 2654 2655 seqno = ring->get_seqno(ring, true); 2656 2657 while (!list_empty(&ring->request_list)) { 2658 struct drm_i915_gem_request *request; 2659 2660 request = list_first_entry(&ring->request_list, 2661 struct drm_i915_gem_request, 2662 list); 2663 2664 if (!i915_seqno_passed(seqno, request->seqno)) 2665 break; 2666 2667 trace_i915_gem_request_retire(ring, request->seqno); 2668 /* We know the GPU must have read the request to have 2669 * sent us the seqno + interrupt, so use the position 2670 * of tail of the request to update the last known position 2671 * of the GPU head. 2672 */ 2673 ring->last_retired_head = request->tail; 2674 2675 list_del(&request->list); 2676 i915_gem_request_remove_from_client(request); 2677 kfree(request); 2678 } 2679 2680 /* Move any buffers on the active list that are no longer referenced 2681 * by the ringbuffer to the flushing/inactive lists as appropriate. 2682 */ 2683 while (!list_empty(&ring->active_list)) { 2684 struct drm_i915_gem_object *obj; 2685 2686 obj = list_first_entry(&ring->active_list, 2687 struct drm_i915_gem_object, 2688 ring_list); 2689 2690 if (!i915_seqno_passed(seqno, obj->last_read_seqno)) 2691 break; 2692 2693 i915_gem_object_move_to_inactive(obj); 2694 } 2695 2696 if (unlikely(ring->trace_irq_seqno && 2697 i915_seqno_passed(seqno, ring->trace_irq_seqno))) { 2698 ring->irq_put(ring); 2699 ring->trace_irq_seqno = 0; 2700 } 2701 2702 WARN_ON(i915_verify_lists(ring->dev)); 2703 } 2704 2705 void 2706 i915_gem_retire_requests(struct drm_device *dev) 2707 { 2708 drm_i915_private_t *dev_priv = dev->dev_private; 2709 struct intel_ring_buffer *ring; 2710 int i; 2711 2712 for_each_ring(ring, dev_priv, i) 2713 i915_gem_retire_requests_ring(ring); 2714 } 2715 2716 static void 2717 i915_gem_retire_work_handler(struct work_struct *work) 2718 { 2719 drm_i915_private_t *dev_priv; 2720 struct drm_device *dev; 2721 struct intel_ring_buffer *ring; 2722 bool idle; 2723 int i; 2724 2725 dev_priv = container_of(work, drm_i915_private_t, 2726 mm.retire_work.work); 2727 dev = dev_priv->dev; 2728 2729 /* Come back later if the device is busy... */ 2730 if (!mutex_trylock(&dev->struct_mutex)) { 2731 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 2732 round_jiffies_up_relative(HZ)); 2733 return; 2734 } 2735 2736 i915_gem_retire_requests(dev); 2737 2738 /* Send a periodic flush down the ring so we don't hold onto GEM 2739 * objects indefinitely. 2740 */ 2741 idle = true; 2742 for_each_ring(ring, dev_priv, i) { 2743 if (ring->gpu_caches_dirty) 2744 i915_add_request(ring, NULL, NULL); 2745 2746 idle &= list_empty(&ring->request_list); 2747 } 2748 2749 if (!dev_priv->mm.suspended && !idle) 2750 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 2751 round_jiffies_up_relative(HZ)); 2752 if (idle) 2753 intel_mark_idle(dev); 2754 2755 mutex_unlock(&dev->struct_mutex); 2756 } 2757 2758 /** 2759 * Ensures that an object will eventually get non-busy by flushing any required 2760 * write domains, emitting any outstanding lazy request and retiring and 2761 * completed requests. 2762 */ 2763 static int 2764 i915_gem_object_flush_active(struct drm_i915_gem_object *obj) 2765 { 2766 int ret; 2767 2768 if (obj->active) { 2769 ret = i915_gem_check_olr(obj->ring, obj->last_read_seqno); 2770 if (ret) 2771 return ret; 2772 2773 i915_gem_retire_requests_ring(obj->ring); 2774 } 2775 2776 return 0; 2777 } 2778 2779 /** 2780 * i915_gem_wait_ioctl - implements DRM_IOCTL_I915_GEM_WAIT 2781 * @DRM_IOCTL_ARGS: standard ioctl arguments 2782 * 2783 * Returns 0 if successful, else an error is returned with the remaining time in 2784 * the timeout parameter. 2785 * -ETIME: object is still busy after timeout 2786 * -ERESTARTSYS: signal interrupted the wait 2787 * -ENONENT: object doesn't exist 2788 * Also possible, but rare: 2789 * -EAGAIN: GPU wedged 2790 * -ENOMEM: damn 2791 * -ENODEV: Internal IRQ fail 2792 * -E?: The add request failed 2793 * 2794 * The wait ioctl with a timeout of 0 reimplements the busy ioctl. With any 2795 * non-zero timeout parameter the wait ioctl will wait for the given number of 2796 * nanoseconds on an object becoming unbusy. Since the wait itself does so 2797 * without holding struct_mutex the object may become re-busied before this 2798 * function completes. A similar but shorter * race condition exists in the busy 2799 * ioctl 2800 */ 2801 int 2802 i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) 2803 { 2804 struct drm_i915_gem_wait *args = data; 2805 struct drm_i915_gem_object *obj; 2806 struct intel_ring_buffer *ring = NULL; 2807 struct timespec timeout_stack, *timeout = NULL; 2808 u32 seqno = 0; 2809 int ret = 0; 2810 2811 if (args->timeout_ns >= 0) { 2812 timeout_stack = ns_to_timespec(args->timeout_ns); 2813 timeout = &timeout_stack; 2814 } 2815 2816 ret = i915_mutex_lock_interruptible(dev); 2817 if (ret) 2818 return ret; 2819 2820 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->bo_handle)); 2821 if (&obj->base == NULL) { 2822 mutex_unlock(&dev->struct_mutex); 2823 return -ENOENT; 2824 } 2825 2826 /* Need to make sure the object gets inactive eventually. */ 2827 ret = i915_gem_object_flush_active(obj); 2828 if (ret) 2829 goto out; 2830 2831 if (obj->active) { 2832 seqno = obj->last_read_seqno; 2833 ring = obj->ring; 2834 } 2835 2836 if (seqno == 0) 2837 goto out; 2838 2839 /* Do this after OLR check to make sure we make forward progress polling 2840 * on this IOCTL with a 0 timeout (like busy ioctl) 2841 */ 2842 if (!args->timeout_ns) { 2843 ret = -ETIME; 2844 goto out; 2845 } 2846 2847 drm_gem_object_unreference(&obj->base); 2848 mutex_unlock(&dev->struct_mutex); 2849 2850 ret = __wait_seqno(ring, seqno, true, timeout); 2851 if (timeout) { 2852 WARN_ON(!timespec_valid(timeout)); 2853 args->timeout_ns = timespec_to_ns(timeout); 2854 } 2855 return ret; 2856 2857 out: 2858 drm_gem_object_unreference(&obj->base); 2859 mutex_unlock(&dev->struct_mutex); 2860 return ret; 2861 } 2862 2863 /** 2864 * i915_gem_object_sync - sync an object to a ring. 2865 * 2866 * @obj: object which may be in use on another ring. 2867 * @to: ring we wish to use the object on. May be NULL. 2868 * 2869 * This code is meant to abstract object synchronization with the GPU. 2870 * Calling with NULL implies synchronizing the object with the CPU 2871 * rather than a particular GPU ring. 2872 * 2873 * Returns 0 if successful, else propagates up the lower layer error. 2874 */ 2875 int 2876 i915_gem_object_sync(struct drm_i915_gem_object *obj, 2877 struct intel_ring_buffer *to) 2878 { 2879 struct intel_ring_buffer *from = obj->ring; 2880 u32 seqno; 2881 int ret, idx; 2882 2883 if (from == NULL || to == from) 2884 return 0; 2885 2886 if (to == NULL || !i915_semaphore_is_enabled(obj->base.dev)) 2887 return i915_gem_object_wait_rendering(obj, false); 2888 2889 idx = intel_ring_sync_index(from, to); 2890 2891 seqno = obj->last_read_seqno; 2892 if (seqno <= from->sync_seqno[idx]) 2893 return 0; 2894 2895 ret = i915_gem_check_olr(obj->ring, seqno); 2896 if (ret) 2897 return ret; 2898 2899 ret = to->sync_to(to, from, seqno); 2900 if (!ret) 2901 /* We use last_read_seqno because sync_to() 2902 * might have just caused seqno wrap under 2903 * the radar. 2904 */ 2905 from->sync_seqno[idx] = obj->last_read_seqno; 2906 2907 return ret; 2908 } 2909 2910 static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj) 2911 { 2912 u32 old_write_domain, old_read_domains; 2913 2914 /* Act a barrier for all accesses through the GTT */ 2915 mb(); 2916 2917 /* Force a pagefault for domain tracking on next user access */ 2918 i915_gem_release_mmap(obj); 2919 2920 if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0) 2921 return; 2922 2923 old_read_domains = obj->base.read_domains; 2924 old_write_domain = obj->base.write_domain; 2925 2926 obj->base.read_domains &= ~I915_GEM_DOMAIN_GTT; 2927 obj->base.write_domain &= ~I915_GEM_DOMAIN_GTT; 2928 2929 trace_i915_gem_object_change_domain(obj, 2930 old_read_domains, 2931 old_write_domain); 2932 } 2933 2934 /** 2935 * Unbinds an object from the GTT aperture. 2936 */ 2937 int 2938 i915_gem_object_unbind(struct drm_i915_gem_object *obj) 2939 { 2940 drm_i915_private_t *dev_priv = obj->base.dev->dev_private; 2941 int ret = 0; 2942 2943 if (obj->gtt_space == NULL) 2944 return 0; 2945 2946 if (obj->pin_count) 2947 return -EBUSY; 2948 2949 BUG_ON(obj->pages == NULL); 2950 2951 ret = i915_gem_object_finish_gpu(obj); 2952 if (ret) 2953 return ret; 2954 /* Continue on if we fail due to EIO, the GPU is hung so we 2955 * should be safe and we need to cleanup or else we might 2956 * cause memory corruption through use-after-free. 2957 */ 2958 2959 i915_gem_object_finish_gtt(obj); 2960 2961 /* release the fence reg _after_ flushing */ 2962 ret = i915_gem_object_put_fence(obj); 2963 if (ret) 2964 return ret; 2965 2966 trace_i915_gem_object_unbind(obj); 2967 2968 if (obj->has_global_gtt_mapping) 2969 i915_gem_gtt_unbind_object(obj); 2970 if (obj->has_aliasing_ppgtt_mapping) { 2971 i915_ppgtt_unbind_object(dev_priv->mm.aliasing_ppgtt, obj); 2972 obj->has_aliasing_ppgtt_mapping = 0; 2973 } 2974 i915_gem_gtt_finish_object(obj); 2975 2976 list_del(&obj->mm_list); 2977 list_move_tail(&obj->gtt_list, &dev_priv->mm.unbound_list); 2978 /* Avoid an unnecessary call to unbind on rebind. */ 2979 obj->map_and_fenceable = true; 2980 2981 drm_mm_put_block(obj->gtt_space); 2982 obj->gtt_space = NULL; 2983 obj->gtt_offset = 0; 2984 2985 return 0; 2986 } 2987 2988 int i915_gpu_idle(struct drm_device *dev) 2989 { 2990 drm_i915_private_t *dev_priv = dev->dev_private; 2991 struct intel_ring_buffer *ring; 2992 int ret, i; 2993 2994 /* Flush everything onto the inactive list. */ 2995 for_each_ring(ring, dev_priv, i) { 2996 ret = i915_switch_context(ring, NULL, DEFAULT_CONTEXT_ID); 2997 if (ret) 2998 return ret; 2999 3000 ret = intel_ring_idle(ring); 3001 if (ret) 3002 return ret; 3003 } 3004 3005 return 0; 3006 } 3007 3008 static void sandybridge_write_fence_reg(struct drm_device *dev, int reg, 3009 struct drm_i915_gem_object *obj) 3010 { 3011 drm_i915_private_t *dev_priv = dev->dev_private; 3012 uint64_t val; 3013 3014 if (obj) { 3015 u32 size = obj->gtt_space->size; 3016 3017 val = (uint64_t)((obj->gtt_offset + size - 4096) & 3018 0xfffff000) << 32; 3019 val |= obj->gtt_offset & 0xfffff000; 3020 val |= (uint64_t)((obj->stride / 128) - 1) << 3021 SANDYBRIDGE_FENCE_PITCH_SHIFT; 3022 3023 if (obj->tiling_mode == I915_TILING_Y) 3024 val |= 1 << I965_FENCE_TILING_Y_SHIFT; 3025 val |= I965_FENCE_REG_VALID; 3026 } else 3027 val = 0; 3028 3029 I915_WRITE64(FENCE_REG_SANDYBRIDGE_0 + reg * 8, val); 3030 POSTING_READ(FENCE_REG_SANDYBRIDGE_0 + reg * 8); 3031 } 3032 3033 static void i965_write_fence_reg(struct drm_device *dev, int reg, 3034 struct drm_i915_gem_object *obj) 3035 { 3036 drm_i915_private_t *dev_priv = dev->dev_private; 3037 uint64_t val; 3038 3039 if (obj) { 3040 u32 size = obj->gtt_space->size; 3041 3042 val = (uint64_t)((obj->gtt_offset + size - 4096) & 3043 0xfffff000) << 32; 3044 val |= obj->gtt_offset & 0xfffff000; 3045 val |= ((obj->stride / 128) - 1) << I965_FENCE_PITCH_SHIFT; 3046 if (obj->tiling_mode == I915_TILING_Y) 3047 val |= 1 << I965_FENCE_TILING_Y_SHIFT; 3048 val |= I965_FENCE_REG_VALID; 3049 } else 3050 val = 0; 3051 3052 I915_WRITE64(FENCE_REG_965_0 + reg * 8, val); 3053 POSTING_READ(FENCE_REG_965_0 + reg * 8); 3054 } 3055 3056 static void i915_write_fence_reg(struct drm_device *dev, int reg, 3057 struct drm_i915_gem_object *obj) 3058 { 3059 drm_i915_private_t *dev_priv = dev->dev_private; 3060 u32 val; 3061 3062 if (obj) { 3063 u32 size = obj->gtt_space->size; 3064 int pitch_val; 3065 int tile_width; 3066 3067 WARN((obj->gtt_offset & ~I915_FENCE_START_MASK) || 3068 (size & -size) != size || 3069 (obj->gtt_offset & (size - 1)), 3070 "object 0x%08x [fenceable? %d] not 1M or pot-size (0x%08x) aligned\n", 3071 obj->gtt_offset, obj->map_and_fenceable, size); 3072 3073 if (obj->tiling_mode == I915_TILING_Y && HAS_128_BYTE_Y_TILING(dev)) 3074 tile_width = 128; 3075 else 3076 tile_width = 512; 3077 3078 /* Note: pitch better be a power of two tile widths */ 3079 pitch_val = obj->stride / tile_width; 3080 pitch_val = ffs(pitch_val) - 1; 3081 3082 val = obj->gtt_offset; 3083 if (obj->tiling_mode == I915_TILING_Y) 3084 val |= 1 << I830_FENCE_TILING_Y_SHIFT; 3085 val |= I915_FENCE_SIZE_BITS(size); 3086 val |= pitch_val << I830_FENCE_PITCH_SHIFT; 3087 val |= I830_FENCE_REG_VALID; 3088 } else 3089 val = 0; 3090 3091 if (reg < 8) 3092 reg = FENCE_REG_830_0 + reg * 4; 3093 else 3094 reg = FENCE_REG_945_8 + (reg - 8) * 4; 3095 3096 I915_WRITE(reg, val); 3097 POSTING_READ(reg); 3098 } 3099 3100 static void i830_write_fence_reg(struct drm_device *dev, int reg, 3101 struct drm_i915_gem_object *obj) 3102 { 3103 drm_i915_private_t *dev_priv = dev->dev_private; 3104 uint32_t val; 3105 3106 if (obj) { 3107 u32 size = obj->gtt_space->size; 3108 uint32_t pitch_val; 3109 3110 WARN((obj->gtt_offset & ~I830_FENCE_START_MASK) || 3111 (size & -size) != size || 3112 (obj->gtt_offset & (size - 1)), 3113 "object 0x%08x not 512K or pot-size 0x%08x aligned\n", 3114 obj->gtt_offset, size); 3115 3116 pitch_val = obj->stride / 128; 3117 pitch_val = ffs(pitch_val) - 1; 3118 3119 val = obj->gtt_offset; 3120 if (obj->tiling_mode == I915_TILING_Y) 3121 val |= 1 << I830_FENCE_TILING_Y_SHIFT; 3122 val |= I830_FENCE_SIZE_BITS(size); 3123 val |= pitch_val << I830_FENCE_PITCH_SHIFT; 3124 val |= I830_FENCE_REG_VALID; 3125 } else 3126 val = 0; 3127 3128 I915_WRITE(FENCE_REG_830_0 + reg * 4, val); 3129 POSTING_READ(FENCE_REG_830_0 + reg * 4); 3130 } 3131 3132 static void i915_gem_write_fence(struct drm_device *dev, int reg, 3133 struct drm_i915_gem_object *obj) 3134 { 3135 switch (INTEL_INFO(dev)->gen) { 3136 case 7: 3137 case 6: sandybridge_write_fence_reg(dev, reg, obj); break; 3138 case 5: 3139 case 4: i965_write_fence_reg(dev, reg, obj); break; 3140 case 3: i915_write_fence_reg(dev, reg, obj); break; 3141 case 2: i830_write_fence_reg(dev, reg, obj); break; 3142 default: break; 3143 } 3144 } 3145 3146 static inline int fence_number(struct drm_i915_private *dev_priv, 3147 struct drm_i915_fence_reg *fence) 3148 { 3149 return fence - dev_priv->fence_regs; 3150 } 3151 3152 static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj, 3153 struct drm_i915_fence_reg *fence, 3154 bool enable) 3155 { 3156 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 3157 int reg = fence_number(dev_priv, fence); 3158 3159 i915_gem_write_fence(obj->base.dev, reg, enable ? obj : NULL); 3160 3161 if (enable) { 3162 obj->fence_reg = reg; 3163 fence->obj = obj; 3164 list_move_tail(&fence->lru_list, &dev_priv->mm.fence_list); 3165 } else { 3166 obj->fence_reg = I915_FENCE_REG_NONE; 3167 fence->obj = NULL; 3168 list_del_init(&fence->lru_list); 3169 } 3170 } 3171 3172 static int 3173 i915_gem_object_flush_fence(struct drm_i915_gem_object *obj) 3174 { 3175 if (obj->last_fenced_seqno) { 3176 int ret = i915_wait_seqno(obj->ring, obj->last_fenced_seqno); 3177 if (ret) 3178 return ret; 3179 3180 obj->last_fenced_seqno = 0; 3181 } 3182 3183 /* Ensure that all CPU reads are completed before installing a fence 3184 * and all writes before removing the fence. 3185 */ 3186 if (obj->base.read_domains & I915_GEM_DOMAIN_GTT) 3187 mb(); 3188 3189 obj->fenced_gpu_access = false; 3190 return 0; 3191 } 3192 3193 int 3194 i915_gem_object_put_fence(struct drm_i915_gem_object *obj) 3195 { 3196 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 3197 int ret; 3198 3199 ret = i915_gem_object_flush_fence(obj); 3200 if (ret) 3201 return ret; 3202 3203 if (obj->fence_reg == I915_FENCE_REG_NONE) 3204 return 0; 3205 3206 i915_gem_object_update_fence(obj, 3207 &dev_priv->fence_regs[obj->fence_reg], 3208 false); 3209 i915_gem_object_fence_lost(obj); 3210 3211 return 0; 3212 } 3213 3214 static struct drm_i915_fence_reg * 3215 i915_find_fence_reg(struct drm_device *dev) 3216 { 3217 struct drm_i915_private *dev_priv = dev->dev_private; 3218 struct drm_i915_fence_reg *reg, *avail; 3219 int i; 3220 3221 /* First try to find a free reg */ 3222 avail = NULL; 3223 for (i = dev_priv->fence_reg_start; i < dev_priv->num_fence_regs; i++) { 3224 reg = &dev_priv->fence_regs[i]; 3225 if (!reg->obj) 3226 return reg; 3227 3228 if (!reg->pin_count) 3229 avail = reg; 3230 } 3231 3232 if (avail == NULL) 3233 return NULL; 3234 3235 /* None available, try to steal one or wait for a user to finish */ 3236 list_for_each_entry(reg, &dev_priv->mm.fence_list, lru_list) { 3237 if (reg->pin_count) 3238 continue; 3239 3240 return reg; 3241 } 3242 3243 return NULL; 3244 } 3245 3246 /** 3247 * i915_gem_object_get_fence - set up fencing for an object 3248 * @obj: object to map through a fence reg 3249 * 3250 * When mapping objects through the GTT, userspace wants to be able to write 3251 * to them without having to worry about swizzling if the object is tiled. 3252 * This function walks the fence regs looking for a free one for @obj, 3253 * stealing one if it can't find any. 3254 * 3255 * It then sets up the reg based on the object's properties: address, pitch 3256 * and tiling format. 3257 * 3258 * For an untiled surface, this removes any existing fence. 3259 */ 3260 int 3261 i915_gem_object_get_fence(struct drm_i915_gem_object *obj) 3262 { 3263 struct drm_device *dev = obj->base.dev; 3264 struct drm_i915_private *dev_priv = dev->dev_private; 3265 bool enable = obj->tiling_mode != I915_TILING_NONE; 3266 struct drm_i915_fence_reg *reg; 3267 int ret; 3268 3269 /* Have we updated the tiling parameters upon the object and so 3270 * will need to serialise the write to the associated fence register? 3271 */ 3272 if (obj->fence_dirty) { 3273 ret = i915_gem_object_flush_fence(obj); 3274 if (ret) 3275 return ret; 3276 } 3277 3278 /* Just update our place in the LRU if our fence is getting reused. */ 3279 if (obj->fence_reg != I915_FENCE_REG_NONE) { 3280 reg = &dev_priv->fence_regs[obj->fence_reg]; 3281 if (!obj->fence_dirty) { 3282 list_move_tail(®->lru_list, 3283 &dev_priv->mm.fence_list); 3284 return 0; 3285 } 3286 } else if (enable) { 3287 reg = i915_find_fence_reg(dev); 3288 if (reg == NULL) 3289 return -EDEADLK; 3290 3291 if (reg->obj) { 3292 struct drm_i915_gem_object *old = reg->obj; 3293 3294 ret = i915_gem_object_flush_fence(old); 3295 if (ret) 3296 return ret; 3297 3298 i915_gem_object_fence_lost(old); 3299 } 3300 } else 3301 return 0; 3302 3303 i915_gem_object_update_fence(obj, reg, enable); 3304 obj->fence_dirty = false; 3305 3306 return 0; 3307 } 3308 3309 static bool i915_gem_valid_gtt_space(struct drm_device *dev, 3310 struct drm_mm_node *gtt_space, 3311 unsigned long cache_level) 3312 { 3313 struct drm_mm_node *other; 3314 3315 /* On non-LLC machines we have to be careful when putting differing 3316 * types of snoopable memory together to avoid the prefetcher 3317 * crossing memory domains and dieing. 3318 */ 3319 if (HAS_LLC(dev)) 3320 return true; 3321 3322 if (gtt_space == NULL) 3323 return true; 3324 3325 if (list_empty(>t_space->node_list)) 3326 return true; 3327 3328 other = list_entry(gtt_space->node_list.prev, struct drm_mm_node, node_list); 3329 if (other->allocated && !other->hole_follows && other->color != cache_level) 3330 return false; 3331 3332 other = list_entry(gtt_space->node_list.next, struct drm_mm_node, node_list); 3333 if (other->allocated && !gtt_space->hole_follows && other->color != cache_level) 3334 return false; 3335 3336 return true; 3337 } 3338 3339 static void i915_gem_verify_gtt(struct drm_device *dev) 3340 { 3341 #if WATCH_GTT 3342 struct drm_i915_private *dev_priv = dev->dev_private; 3343 struct drm_i915_gem_object *obj; 3344 int err = 0; 3345 3346 list_for_each_entry(obj, &dev_priv->mm.gtt_list, gtt_list) { 3347 if (obj->gtt_space == NULL) { 3348 printk(KERN_ERR "object found on GTT list with no space reserved\n"); 3349 err++; 3350 continue; 3351 } 3352 3353 if (obj->cache_level != obj->gtt_space->color) { 3354 printk(KERN_ERR "object reserved space [%08lx, %08lx] with wrong color, cache_level=%x, color=%lx\n", 3355 obj->gtt_space->start, 3356 obj->gtt_space->start + obj->gtt_space->size, 3357 obj->cache_level, 3358 obj->gtt_space->color); 3359 err++; 3360 continue; 3361 } 3362 3363 if (!i915_gem_valid_gtt_space(dev, 3364 obj->gtt_space, 3365 obj->cache_level)) { 3366 printk(KERN_ERR "invalid GTT space found at [%08lx, %08lx] - color=%x\n", 3367 obj->gtt_space->start, 3368 obj->gtt_space->start + obj->gtt_space->size, 3369 obj->cache_level); 3370 err++; 3371 continue; 3372 } 3373 } 3374 3375 WARN_ON(err); 3376 #endif 3377 } 3378 3379 /** 3380 * Finds free space in the GTT aperture and binds the object there. 3381 */ 3382 static int 3383 i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj, 3384 unsigned alignment, 3385 bool map_and_fenceable, 3386 bool nonblocking) 3387 { 3388 struct drm_device *dev = obj->base.dev; 3389 drm_i915_private_t *dev_priv = dev->dev_private; 3390 struct drm_mm_node *node; 3391 u32 size, fence_size, fence_alignment, unfenced_alignment; 3392 bool mappable, fenceable; 3393 int ret; 3394 3395 if (obj->madv != I915_MADV_WILLNEED) { 3396 DRM_ERROR("Attempting to bind a purgeable object\n"); 3397 return -EINVAL; 3398 } 3399 3400 fence_size = i915_gem_get_gtt_size(dev, 3401 obj->base.size, 3402 obj->tiling_mode); 3403 fence_alignment = i915_gem_get_gtt_alignment(dev, 3404 obj->base.size, 3405 obj->tiling_mode); 3406 unfenced_alignment = 3407 i915_gem_get_unfenced_gtt_alignment(dev, 3408 obj->base.size, 3409 obj->tiling_mode); 3410 3411 if (alignment == 0) 3412 alignment = map_and_fenceable ? fence_alignment : 3413 unfenced_alignment; 3414 if (map_and_fenceable && alignment & (fence_alignment - 1)) { 3415 DRM_ERROR("Invalid object alignment requested %u\n", alignment); 3416 return -EINVAL; 3417 } 3418 3419 size = map_and_fenceable ? fence_size : obj->base.size; 3420 3421 /* If the object is bigger than the entire aperture, reject it early 3422 * before evicting everything in a vain attempt to find space. 3423 */ 3424 if (obj->base.size > 3425 (map_and_fenceable ? dev_priv->mm.gtt_mappable_end : dev_priv->mm.gtt_total)) { 3426 DRM_ERROR("Attempting to bind an object larger than the aperture\n"); 3427 return -E2BIG; 3428 } 3429 3430 ret = i915_gem_object_get_pages(obj); 3431 if (ret) 3432 return ret; 3433 3434 i915_gem_object_pin_pages(obj); 3435 3436 node = kzalloc(sizeof(*node), GFP_KERNEL); 3437 if (node == NULL) { 3438 i915_gem_object_unpin_pages(obj); 3439 return -ENOMEM; 3440 } 3441 3442 search_free: 3443 if (map_and_fenceable) 3444 ret = drm_mm_insert_node_in_range_generic(&dev_priv->mm.gtt_space, node, 3445 size, alignment, obj->cache_level, 3446 0, dev_priv->mm.gtt_mappable_end); 3447 else 3448 ret = drm_mm_insert_node_generic(&dev_priv->mm.gtt_space, node, 3449 size, alignment, obj->cache_level); 3450 if (ret) { 3451 ret = i915_gem_evict_something(dev, size, alignment, 3452 obj->cache_level, 3453 map_and_fenceable, 3454 nonblocking); 3455 if (ret == 0) 3456 goto search_free; 3457 3458 i915_gem_object_unpin_pages(obj); 3459 kfree(node); 3460 return ret; 3461 } 3462 if (WARN_ON(!i915_gem_valid_gtt_space(dev, node, obj->cache_level))) { 3463 i915_gem_object_unpin_pages(obj); 3464 drm_mm_put_block(node); 3465 return -EINVAL; 3466 } 3467 3468 ret = i915_gem_gtt_prepare_object(obj); 3469 if (ret) { 3470 i915_gem_object_unpin_pages(obj); 3471 drm_mm_put_block(node); 3472 return ret; 3473 } 3474 3475 list_move_tail(&obj->gtt_list, &dev_priv->mm.bound_list); 3476 list_add_tail(&obj->mm_list, &dev_priv->mm.inactive_list); 3477 3478 obj->gtt_space = node; 3479 obj->gtt_offset = node->start; 3480 3481 fenceable = 3482 node->size == fence_size && 3483 (node->start & (fence_alignment - 1)) == 0; 3484 3485 mappable = 3486 obj->gtt_offset + obj->base.size <= dev_priv->mm.gtt_mappable_end; 3487 3488 obj->map_and_fenceable = mappable && fenceable; 3489 3490 i915_gem_object_unpin_pages(obj); 3491 trace_i915_gem_object_bind(obj, map_and_fenceable); 3492 i915_gem_verify_gtt(dev); 3493 return 0; 3494 } 3495 3496 void 3497 i915_gem_clflush_object(struct drm_i915_gem_object *obj) 3498 { 3499 /* If we don't have a page list set up, then we're not pinned 3500 * to GPU, and we can ignore the cache flush because it'll happen 3501 * again at bind time. 3502 */ 3503 if (obj->pages == NULL) 3504 return; 3505 3506 /* If the GPU is snooping the contents of the CPU cache, 3507 * we do not need to manually clear the CPU cache lines. However, 3508 * the caches are only snooped when the render cache is 3509 * flushed/invalidated. As we always have to emit invalidations 3510 * and flushes when moving into and out of the RENDER domain, correct 3511 * snooping behaviour occurs naturally as the result of our domain 3512 * tracking. 3513 */ 3514 if (obj->cache_level != I915_CACHE_NONE) 3515 return; 3516 3517 trace_i915_gem_object_clflush(obj); 3518 3519 #ifdef __NetBSD__ 3520 drm_clflush_pglist(&obj->igo_pageq); 3521 #else 3522 drm_clflush_sg(obj->pages); 3523 #endif 3524 } 3525 3526 /** Flushes the GTT write domain for the object if it's dirty. */ 3527 static void 3528 i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj) 3529 { 3530 uint32_t old_write_domain; 3531 3532 if (obj->base.write_domain != I915_GEM_DOMAIN_GTT) 3533 return; 3534 3535 /* No actual flushing is required for the GTT write domain. Writes 3536 * to it immediately go to main memory as far as we know, so there's 3537 * no chipset flush. It also doesn't land in render cache. 3538 * 3539 * However, we do have to enforce the order so that all writes through 3540 * the GTT land before any writes to the device, such as updates to 3541 * the GATT itself. 3542 */ 3543 wmb(); 3544 3545 old_write_domain = obj->base.write_domain; 3546 obj->base.write_domain = 0; 3547 3548 trace_i915_gem_object_change_domain(obj, 3549 obj->base.read_domains, 3550 old_write_domain); 3551 } 3552 3553 /** Flushes the CPU write domain for the object if it's dirty. */ 3554 static void 3555 i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj) 3556 { 3557 uint32_t old_write_domain; 3558 3559 if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) 3560 return; 3561 3562 i915_gem_clflush_object(obj); 3563 i915_gem_chipset_flush(obj->base.dev); 3564 old_write_domain = obj->base.write_domain; 3565 obj->base.write_domain = 0; 3566 3567 trace_i915_gem_object_change_domain(obj, 3568 obj->base.read_domains, 3569 old_write_domain); 3570 } 3571 3572 /** 3573 * Moves a single object to the GTT read, and possibly write domain. 3574 * 3575 * This function returns when the move is complete, including waiting on 3576 * flushes to occur. 3577 */ 3578 int 3579 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) 3580 { 3581 drm_i915_private_t *dev_priv = obj->base.dev->dev_private; 3582 uint32_t old_write_domain, old_read_domains; 3583 int ret; 3584 3585 /* Not valid to be called on unbound objects. */ 3586 if (obj->gtt_space == NULL) 3587 return -EINVAL; 3588 3589 if (obj->base.write_domain == I915_GEM_DOMAIN_GTT) 3590 return 0; 3591 3592 ret = i915_gem_object_wait_rendering(obj, !write); 3593 if (ret) 3594 return ret; 3595 3596 i915_gem_object_flush_cpu_write_domain(obj); 3597 3598 old_write_domain = obj->base.write_domain; 3599 old_read_domains = obj->base.read_domains; 3600 3601 /* It should now be out of any other write domains, and we can update 3602 * the domain values for our changes. 3603 */ 3604 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_GTT) != 0); 3605 obj->base.read_domains |= I915_GEM_DOMAIN_GTT; 3606 if (write) { 3607 obj->base.read_domains = I915_GEM_DOMAIN_GTT; 3608 obj->base.write_domain = I915_GEM_DOMAIN_GTT; 3609 obj->dirty = 1; 3610 } 3611 3612 trace_i915_gem_object_change_domain(obj, 3613 old_read_domains, 3614 old_write_domain); 3615 3616 /* And bump the LRU for this access */ 3617 if (i915_gem_object_is_inactive(obj)) 3618 list_move_tail(&obj->mm_list, &dev_priv->mm.inactive_list); 3619 3620 return 0; 3621 } 3622 3623 int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, 3624 enum i915_cache_level cache_level) 3625 { 3626 struct drm_device *dev = obj->base.dev; 3627 drm_i915_private_t *dev_priv = dev->dev_private; 3628 int ret; 3629 3630 if (obj->cache_level == cache_level) 3631 return 0; 3632 3633 if (obj->pin_count) { 3634 DRM_DEBUG("can not change the cache level of pinned objects\n"); 3635 return -EBUSY; 3636 } 3637 3638 if (!i915_gem_valid_gtt_space(dev, obj->gtt_space, cache_level)) { 3639 ret = i915_gem_object_unbind(obj); 3640 if (ret) 3641 return ret; 3642 } 3643 3644 if (obj->gtt_space) { 3645 ret = i915_gem_object_finish_gpu(obj); 3646 if (ret) 3647 return ret; 3648 3649 i915_gem_object_finish_gtt(obj); 3650 3651 /* Before SandyBridge, you could not use tiling or fence 3652 * registers with snooped memory, so relinquish any fences 3653 * currently pointing to our region in the aperture. 3654 */ 3655 if (INTEL_INFO(dev)->gen < 6) { 3656 ret = i915_gem_object_put_fence(obj); 3657 if (ret) 3658 return ret; 3659 } 3660 3661 if (obj->has_global_gtt_mapping) 3662 i915_gem_gtt_bind_object(obj, cache_level); 3663 if (obj->has_aliasing_ppgtt_mapping) 3664 i915_ppgtt_bind_object(dev_priv->mm.aliasing_ppgtt, 3665 obj, cache_level); 3666 3667 obj->gtt_space->color = cache_level; 3668 } 3669 3670 if (cache_level == I915_CACHE_NONE) { 3671 u32 old_read_domains, old_write_domain; 3672 3673 /* If we're coming from LLC cached, then we haven't 3674 * actually been tracking whether the data is in the 3675 * CPU cache or not, since we only allow one bit set 3676 * in obj->write_domain and have been skipping the clflushes. 3677 * Just set it to the CPU cache for now. 3678 */ 3679 WARN_ON(obj->base.write_domain & ~I915_GEM_DOMAIN_CPU); 3680 WARN_ON(obj->base.read_domains & ~I915_GEM_DOMAIN_CPU); 3681 3682 old_read_domains = obj->base.read_domains; 3683 old_write_domain = obj->base.write_domain; 3684 3685 obj->base.read_domains = I915_GEM_DOMAIN_CPU; 3686 obj->base.write_domain = I915_GEM_DOMAIN_CPU; 3687 3688 trace_i915_gem_object_change_domain(obj, 3689 old_read_domains, 3690 old_write_domain); 3691 } 3692 3693 obj->cache_level = cache_level; 3694 i915_gem_verify_gtt(dev); 3695 return 0; 3696 } 3697 3698 int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data, 3699 struct drm_file *file) 3700 { 3701 struct drm_i915_gem_caching *args = data; 3702 struct drm_i915_gem_object *obj; 3703 int ret; 3704 3705 ret = i915_mutex_lock_interruptible(dev); 3706 if (ret) 3707 return ret; 3708 3709 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 3710 if (&obj->base == NULL) { 3711 ret = -ENOENT; 3712 goto unlock; 3713 } 3714 3715 args->caching = obj->cache_level != I915_CACHE_NONE; 3716 3717 drm_gem_object_unreference(&obj->base); 3718 unlock: 3719 mutex_unlock(&dev->struct_mutex); 3720 return ret; 3721 } 3722 3723 int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data, 3724 struct drm_file *file) 3725 { 3726 struct drm_i915_gem_caching *args = data; 3727 struct drm_i915_gem_object *obj; 3728 enum i915_cache_level level; 3729 int ret; 3730 3731 switch (args->caching) { 3732 case I915_CACHING_NONE: 3733 level = I915_CACHE_NONE; 3734 break; 3735 case I915_CACHING_CACHED: 3736 level = I915_CACHE_LLC; 3737 break; 3738 default: 3739 return -EINVAL; 3740 } 3741 3742 ret = i915_mutex_lock_interruptible(dev); 3743 if (ret) 3744 return ret; 3745 3746 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 3747 if (&obj->base == NULL) { 3748 ret = -ENOENT; 3749 goto unlock; 3750 } 3751 3752 ret = i915_gem_object_set_cache_level(obj, level); 3753 3754 drm_gem_object_unreference(&obj->base); 3755 unlock: 3756 mutex_unlock(&dev->struct_mutex); 3757 return ret; 3758 } 3759 3760 /* 3761 * Prepare buffer for display plane (scanout, cursors, etc). 3762 * Can be called from an uninterruptible phase (modesetting) and allows 3763 * any flushes to be pipelined (for pageflips). 3764 */ 3765 int 3766 i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, 3767 u32 alignment, 3768 struct intel_ring_buffer *pipelined) 3769 { 3770 u32 old_read_domains, old_write_domain; 3771 int ret; 3772 3773 if (pipelined != obj->ring) { 3774 ret = i915_gem_object_sync(obj, pipelined); 3775 if (ret) 3776 return ret; 3777 } 3778 3779 /* The display engine is not coherent with the LLC cache on gen6. As 3780 * a result, we make sure that the pinning that is about to occur is 3781 * done with uncached PTEs. This is lowest common denominator for all 3782 * chipsets. 3783 * 3784 * However for gen6+, we could do better by using the GFDT bit instead 3785 * of uncaching, which would allow us to flush all the LLC-cached data 3786 * with that bit in the PTE to main memory with just one PIPE_CONTROL. 3787 */ 3788 ret = i915_gem_object_set_cache_level(obj, I915_CACHE_NONE); 3789 if (ret) 3790 return ret; 3791 3792 /* As the user may map the buffer once pinned in the display plane 3793 * (e.g. libkms for the bootup splash), we have to ensure that we 3794 * always use map_and_fenceable for all scanout buffers. 3795 */ 3796 ret = i915_gem_object_pin(obj, alignment, true, false); 3797 if (ret) 3798 return ret; 3799 3800 i915_gem_object_flush_cpu_write_domain(obj); 3801 3802 old_write_domain = obj->base.write_domain; 3803 old_read_domains = obj->base.read_domains; 3804 3805 /* It should now be out of any other write domains, and we can update 3806 * the domain values for our changes. 3807 */ 3808 obj->base.write_domain = 0; 3809 obj->base.read_domains |= I915_GEM_DOMAIN_GTT; 3810 3811 trace_i915_gem_object_change_domain(obj, 3812 old_read_domains, 3813 old_write_domain); 3814 3815 return 0; 3816 } 3817 3818 int 3819 i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj) 3820 { 3821 int ret; 3822 3823 if ((obj->base.read_domains & I915_GEM_GPU_DOMAINS) == 0) 3824 return 0; 3825 3826 ret = i915_gem_object_wait_rendering(obj, false); 3827 if (ret) 3828 return ret; 3829 3830 /* Ensure that we invalidate the GPU's caches and TLBs. */ 3831 obj->base.read_domains &= ~I915_GEM_GPU_DOMAINS; 3832 return 0; 3833 } 3834 3835 /** 3836 * Moves a single object to the CPU read, and possibly write domain. 3837 * 3838 * This function returns when the move is complete, including waiting on 3839 * flushes to occur. 3840 */ 3841 int 3842 i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) 3843 { 3844 uint32_t old_write_domain, old_read_domains; 3845 int ret; 3846 3847 if (obj->base.write_domain == I915_GEM_DOMAIN_CPU) 3848 return 0; 3849 3850 ret = i915_gem_object_wait_rendering(obj, !write); 3851 if (ret) 3852 return ret; 3853 3854 i915_gem_object_flush_gtt_write_domain(obj); 3855 3856 old_write_domain = obj->base.write_domain; 3857 old_read_domains = obj->base.read_domains; 3858 3859 /* Flush the CPU cache if it's still invalid. */ 3860 if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) { 3861 i915_gem_clflush_object(obj); 3862 3863 obj->base.read_domains |= I915_GEM_DOMAIN_CPU; 3864 } 3865 3866 /* It should now be out of any other write domains, and we can update 3867 * the domain values for our changes. 3868 */ 3869 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_CPU) != 0); 3870 3871 /* If we're writing through the CPU, then the GPU read domains will 3872 * need to be invalidated at next use. 3873 */ 3874 if (write) { 3875 obj->base.read_domains = I915_GEM_DOMAIN_CPU; 3876 obj->base.write_domain = I915_GEM_DOMAIN_CPU; 3877 } 3878 3879 trace_i915_gem_object_change_domain(obj, 3880 old_read_domains, 3881 old_write_domain); 3882 3883 return 0; 3884 } 3885 3886 /* Throttle our rendering by waiting until the ring has completed our requests 3887 * emitted over 20 msec ago. 3888 * 3889 * Note that if we were to use the current jiffies each time around the loop, 3890 * we wouldn't escape the function with any frames outstanding if the time to 3891 * render a frame was over 20ms. 3892 * 3893 * This should get us reasonable parallelism between CPU and GPU but also 3894 * relatively low latency when blocking on a particular request to finish. 3895 */ 3896 static int 3897 i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file) 3898 { 3899 struct drm_i915_private *dev_priv = dev->dev_private; 3900 struct drm_i915_file_private *file_priv = file->driver_priv; 3901 unsigned long recent_enough = jiffies - msecs_to_jiffies(20); 3902 struct drm_i915_gem_request *request; 3903 struct intel_ring_buffer *ring = NULL; 3904 u32 seqno = 0; 3905 int ret; 3906 3907 if (atomic_read(&dev_priv->mm.wedged)) 3908 return -EIO; 3909 3910 spin_lock(&file_priv->mm.lock); 3911 list_for_each_entry(request, &file_priv->mm.request_list, client_list) { 3912 if (time_after_eq(request->emitted_jiffies, recent_enough)) 3913 break; 3914 3915 ring = request->ring; 3916 seqno = request->seqno; 3917 } 3918 spin_unlock(&file_priv->mm.lock); 3919 3920 if (seqno == 0) 3921 return 0; 3922 3923 ret = __wait_seqno(ring, seqno, true, NULL); 3924 if (ret == 0) 3925 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 0); 3926 3927 return ret; 3928 } 3929 3930 int 3931 i915_gem_object_pin(struct drm_i915_gem_object *obj, 3932 uint32_t alignment, 3933 bool map_and_fenceable, 3934 bool nonblocking) 3935 { 3936 int ret; 3937 3938 if (WARN_ON(obj->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT)) 3939 return -EBUSY; 3940 3941 if (obj->gtt_space != NULL) { 3942 if ((alignment && obj->gtt_offset & (alignment - 1)) || 3943 (map_and_fenceable && !obj->map_and_fenceable)) { 3944 WARN(obj->pin_count, 3945 "bo is already pinned with incorrect alignment:" 3946 " offset=%x, req.alignment=%x, req.map_and_fenceable=%d," 3947 " obj->map_and_fenceable=%d\n", 3948 obj->gtt_offset, alignment, 3949 map_and_fenceable, 3950 obj->map_and_fenceable); 3951 ret = i915_gem_object_unbind(obj); 3952 if (ret) 3953 return ret; 3954 } 3955 } 3956 3957 if (obj->gtt_space == NULL) { 3958 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 3959 3960 ret = i915_gem_object_bind_to_gtt(obj, alignment, 3961 map_and_fenceable, 3962 nonblocking); 3963 if (ret) 3964 return ret; 3965 3966 if (!dev_priv->mm.aliasing_ppgtt) 3967 i915_gem_gtt_bind_object(obj, obj->cache_level); 3968 } 3969 3970 if (!obj->has_global_gtt_mapping && map_and_fenceable) 3971 i915_gem_gtt_bind_object(obj, obj->cache_level); 3972 3973 obj->pin_count++; 3974 obj->pin_mappable |= map_and_fenceable; 3975 3976 return 0; 3977 } 3978 3979 void 3980 i915_gem_object_unpin(struct drm_i915_gem_object *obj) 3981 { 3982 BUG_ON(obj->pin_count == 0); 3983 BUG_ON(obj->gtt_space == NULL); 3984 3985 if (--obj->pin_count == 0) 3986 obj->pin_mappable = false; 3987 } 3988 3989 int 3990 i915_gem_pin_ioctl(struct drm_device *dev, void *data, 3991 struct drm_file *file) 3992 { 3993 struct drm_i915_gem_pin *args = data; 3994 struct drm_i915_gem_object *obj; 3995 int ret; 3996 3997 ret = i915_mutex_lock_interruptible(dev); 3998 if (ret) 3999 return ret; 4000 4001 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 4002 if (&obj->base == NULL) { 4003 ret = -ENOENT; 4004 goto unlock; 4005 } 4006 4007 if (obj->madv != I915_MADV_WILLNEED) { 4008 DRM_ERROR("Attempting to pin a purgeable buffer\n"); 4009 ret = -EINVAL; 4010 goto out; 4011 } 4012 4013 if (obj->pin_filp != NULL && obj->pin_filp != file) { 4014 DRM_ERROR("Already pinned in i915_gem_pin_ioctl(): %d\n", 4015 args->handle); 4016 ret = -EINVAL; 4017 goto out; 4018 } 4019 4020 if (obj->user_pin_count == 0) { 4021 ret = i915_gem_object_pin(obj, args->alignment, true, false); 4022 if (ret) 4023 goto out; 4024 } 4025 4026 obj->user_pin_count++; 4027 obj->pin_filp = file; 4028 4029 /* XXX - flush the CPU caches for pinned objects 4030 * as the X server doesn't manage domains yet 4031 */ 4032 i915_gem_object_flush_cpu_write_domain(obj); 4033 args->offset = obj->gtt_offset; 4034 out: 4035 drm_gem_object_unreference(&obj->base); 4036 unlock: 4037 mutex_unlock(&dev->struct_mutex); 4038 return ret; 4039 } 4040 4041 int 4042 i915_gem_unpin_ioctl(struct drm_device *dev, void *data, 4043 struct drm_file *file) 4044 { 4045 struct drm_i915_gem_pin *args = data; 4046 struct drm_i915_gem_object *obj; 4047 int ret; 4048 4049 ret = i915_mutex_lock_interruptible(dev); 4050 if (ret) 4051 return ret; 4052 4053 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 4054 if (&obj->base == NULL) { 4055 ret = -ENOENT; 4056 goto unlock; 4057 } 4058 4059 if (obj->pin_filp != file) { 4060 DRM_ERROR("Not pinned by caller in i915_gem_pin_ioctl(): %d\n", 4061 args->handle); 4062 ret = -EINVAL; 4063 goto out; 4064 } 4065 obj->user_pin_count--; 4066 if (obj->user_pin_count == 0) { 4067 obj->pin_filp = NULL; 4068 i915_gem_object_unpin(obj); 4069 } 4070 4071 out: 4072 drm_gem_object_unreference(&obj->base); 4073 unlock: 4074 mutex_unlock(&dev->struct_mutex); 4075 return ret; 4076 } 4077 4078 int 4079 i915_gem_busy_ioctl(struct drm_device *dev, void *data, 4080 struct drm_file *file) 4081 { 4082 struct drm_i915_gem_busy *args = data; 4083 struct drm_i915_gem_object *obj; 4084 int ret; 4085 4086 ret = i915_mutex_lock_interruptible(dev); 4087 if (ret) 4088 return ret; 4089 4090 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 4091 if (&obj->base == NULL) { 4092 ret = -ENOENT; 4093 goto unlock; 4094 } 4095 4096 /* Count all active objects as busy, even if they are currently not used 4097 * by the gpu. Users of this interface expect objects to eventually 4098 * become non-busy without any further actions, therefore emit any 4099 * necessary flushes here. 4100 */ 4101 ret = i915_gem_object_flush_active(obj); 4102 4103 args->busy = obj->active; 4104 if (obj->ring) { 4105 BUILD_BUG_ON(I915_NUM_RINGS > 16); 4106 args->busy |= intel_ring_flag(obj->ring) << 16; 4107 } 4108 4109 drm_gem_object_unreference(&obj->base); 4110 unlock: 4111 mutex_unlock(&dev->struct_mutex); 4112 return ret; 4113 } 4114 4115 int 4116 i915_gem_throttle_ioctl(struct drm_device *dev, void *data, 4117 struct drm_file *file_priv) 4118 { 4119 return i915_gem_ring_throttle(dev, file_priv); 4120 } 4121 4122 int 4123 i915_gem_madvise_ioctl(struct drm_device *dev, void *data, 4124 struct drm_file *file_priv) 4125 { 4126 struct drm_i915_gem_madvise *args = data; 4127 struct drm_i915_gem_object *obj; 4128 int ret; 4129 4130 switch (args->madv) { 4131 case I915_MADV_DONTNEED: 4132 case I915_MADV_WILLNEED: 4133 break; 4134 default: 4135 return -EINVAL; 4136 } 4137 4138 ret = i915_mutex_lock_interruptible(dev); 4139 if (ret) 4140 return ret; 4141 4142 obj = to_intel_bo(drm_gem_object_lookup(dev, file_priv, args->handle)); 4143 if (&obj->base == NULL) { 4144 ret = -ENOENT; 4145 goto unlock; 4146 } 4147 4148 if (obj->pin_count) { 4149 ret = -EINVAL; 4150 goto out; 4151 } 4152 4153 if (obj->madv != __I915_MADV_PURGED) 4154 obj->madv = args->madv; 4155 4156 /* if the object is no longer attached, discard its backing storage */ 4157 if (i915_gem_object_is_purgeable(obj) && obj->pages == NULL) 4158 i915_gem_object_truncate(obj); 4159 4160 args->retained = obj->madv != __I915_MADV_PURGED; 4161 4162 out: 4163 drm_gem_object_unreference(&obj->base); 4164 unlock: 4165 mutex_unlock(&dev->struct_mutex); 4166 return ret; 4167 } 4168 4169 void i915_gem_object_init(struct drm_i915_gem_object *obj, 4170 const struct drm_i915_gem_object_ops *ops) 4171 { 4172 INIT_LIST_HEAD(&obj->mm_list); 4173 INIT_LIST_HEAD(&obj->gtt_list); 4174 INIT_LIST_HEAD(&obj->ring_list); 4175 INIT_LIST_HEAD(&obj->exec_list); 4176 4177 obj->ops = ops; 4178 4179 obj->fence_reg = I915_FENCE_REG_NONE; 4180 obj->madv = I915_MADV_WILLNEED; 4181 /* Avoid an unnecessary call to unbind on the first bind. */ 4182 obj->map_and_fenceable = true; 4183 4184 i915_gem_info_add_obj(obj->base.dev->dev_private, obj->base.size); 4185 } 4186 4187 static const struct drm_i915_gem_object_ops i915_gem_object_ops = { 4188 .get_pages = i915_gem_object_get_pages_gtt, 4189 .put_pages = i915_gem_object_put_pages_gtt, 4190 }; 4191 4192 struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev, 4193 size_t size) 4194 { 4195 struct drm_i915_gem_object *obj; 4196 #ifndef __NetBSD__ /* XXX >32bit dma? */ 4197 struct address_space *mapping; 4198 u32 mask; 4199 #endif 4200 4201 obj = kzalloc(sizeof(*obj), GFP_KERNEL); 4202 if (obj == NULL) 4203 return NULL; 4204 4205 if (drm_gem_object_init(dev, &obj->base, size) != 0) { 4206 kfree(obj); 4207 return NULL; 4208 } 4209 4210 #ifndef __NetBSD__ /* XXX >32bit dma? */ 4211 mask = GFP_HIGHUSER | __GFP_RECLAIMABLE; 4212 if (IS_CRESTLINE(dev) || IS_BROADWATER(dev)) { 4213 /* 965gm cannot relocate objects above 4GiB. */ 4214 mask &= ~__GFP_HIGHMEM; 4215 mask |= __GFP_DMA32; 4216 } 4217 4218 mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping; 4219 mapping_set_gfp_mask(mapping, mask); 4220 #endif 4221 4222 i915_gem_object_init(obj, &i915_gem_object_ops); 4223 4224 obj->base.write_domain = I915_GEM_DOMAIN_CPU; 4225 obj->base.read_domains = I915_GEM_DOMAIN_CPU; 4226 4227 if (HAS_LLC(dev)) { 4228 /* On some devices, we can have the GPU use the LLC (the CPU 4229 * cache) for about a 10% performance improvement 4230 * compared to uncached. Graphics requests other than 4231 * display scanout are coherent with the CPU in 4232 * accessing this cache. This means in this mode we 4233 * don't need to clflush on the CPU side, and on the 4234 * GPU side we only need to flush internal caches to 4235 * get data visible to the CPU. 4236 * 4237 * However, we maintain the display planes as UC, and so 4238 * need to rebind when first used as such. 4239 */ 4240 obj->cache_level = I915_CACHE_LLC; 4241 } else 4242 obj->cache_level = I915_CACHE_NONE; 4243 4244 return obj; 4245 } 4246 4247 int i915_gem_init_object(struct drm_gem_object *obj) 4248 { 4249 BUG(); 4250 4251 return 0; 4252 } 4253 4254 void i915_gem_free_object(struct drm_gem_object *gem_obj) 4255 { 4256 struct drm_i915_gem_object *obj = to_intel_bo(gem_obj); 4257 struct drm_device *dev = obj->base.dev; 4258 drm_i915_private_t *dev_priv = dev->dev_private; 4259 4260 trace_i915_gem_object_destroy(obj); 4261 4262 if (obj->phys_obj) 4263 i915_gem_detach_phys_object(dev, obj); 4264 4265 obj->pin_count = 0; 4266 if (WARN_ON(i915_gem_object_unbind(obj) == -ERESTARTSYS)) { 4267 bool was_interruptible; 4268 4269 was_interruptible = dev_priv->mm.interruptible; 4270 dev_priv->mm.interruptible = false; 4271 4272 WARN_ON(i915_gem_object_unbind(obj)); 4273 4274 dev_priv->mm.interruptible = was_interruptible; 4275 } 4276 4277 obj->pages_pin_count = 0; 4278 i915_gem_object_put_pages(obj); 4279 i915_gem_object_free_mmap_offset(obj); 4280 4281 BUG_ON(obj->pages); 4282 4283 #ifndef __NetBSD__ /* XXX drm prime */ 4284 if (obj->base.import_attach) 4285 drm_prime_gem_destroy(&obj->base, NULL); 4286 #endif 4287 4288 drm_gem_object_release(&obj->base); 4289 i915_gem_info_remove_obj(dev_priv, obj->base.size); 4290 4291 kfree(obj->bit_17); 4292 kfree(obj); 4293 } 4294 4295 int 4296 i915_gem_idle(struct drm_device *dev) 4297 { 4298 drm_i915_private_t *dev_priv = dev->dev_private; 4299 int ret; 4300 4301 mutex_lock(&dev->struct_mutex); 4302 4303 if (dev_priv->mm.suspended) { 4304 mutex_unlock(&dev->struct_mutex); 4305 return 0; 4306 } 4307 4308 ret = i915_gpu_idle(dev); 4309 if (ret) { 4310 mutex_unlock(&dev->struct_mutex); 4311 return ret; 4312 } 4313 i915_gem_retire_requests(dev); 4314 4315 /* Under UMS, be paranoid and evict. */ 4316 if (!drm_core_check_feature(dev, DRIVER_MODESET)) 4317 i915_gem_evict_everything(dev); 4318 4319 i915_gem_reset_fences(dev); 4320 4321 /* Hack! Don't let anybody do execbuf while we don't control the chip. 4322 * We need to replace this with a semaphore, or something. 4323 * And not confound mm.suspended! 4324 */ 4325 dev_priv->mm.suspended = 1; 4326 del_timer_sync(&dev_priv->hangcheck_timer); 4327 4328 i915_kernel_lost_context(dev); 4329 i915_gem_cleanup_ringbuffer(dev); 4330 4331 mutex_unlock(&dev->struct_mutex); 4332 4333 /* Cancel the retire work handler, which should be idle now. */ 4334 cancel_delayed_work_sync(&dev_priv->mm.retire_work); 4335 4336 return 0; 4337 } 4338 4339 void i915_gem_l3_remap(struct drm_device *dev) 4340 { 4341 drm_i915_private_t *dev_priv = dev->dev_private; 4342 u32 misccpctl; 4343 int i; 4344 4345 if (!IS_IVYBRIDGE(dev)) 4346 return; 4347 4348 if (!dev_priv->l3_parity.remap_info) 4349 return; 4350 4351 misccpctl = I915_READ(GEN7_MISCCPCTL); 4352 I915_WRITE(GEN7_MISCCPCTL, misccpctl & ~GEN7_DOP_CLOCK_GATE_ENABLE); 4353 POSTING_READ(GEN7_MISCCPCTL); 4354 4355 for (i = 0; i < GEN7_L3LOG_SIZE; i += 4) { 4356 u32 remap = I915_READ(GEN7_L3LOG_BASE + i); 4357 if (remap && remap != dev_priv->l3_parity.remap_info[i/4]) 4358 DRM_DEBUG("0x%x was already programmed to %x\n", 4359 GEN7_L3LOG_BASE + i, remap); 4360 if (remap && !dev_priv->l3_parity.remap_info[i/4]) 4361 DRM_DEBUG_DRIVER("Clearing remapped register\n"); 4362 I915_WRITE(GEN7_L3LOG_BASE + i, dev_priv->l3_parity.remap_info[i/4]); 4363 } 4364 4365 /* Make sure all the writes land before disabling dop clock gating */ 4366 POSTING_READ(GEN7_L3LOG_BASE); 4367 4368 I915_WRITE(GEN7_MISCCPCTL, misccpctl); 4369 } 4370 4371 void i915_gem_init_swizzling(struct drm_device *dev) 4372 { 4373 drm_i915_private_t *dev_priv = dev->dev_private; 4374 4375 if (INTEL_INFO(dev)->gen < 5 || 4376 dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE) 4377 return; 4378 4379 I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) | 4380 DISP_TILE_SURFACE_SWIZZLING); 4381 4382 if (IS_GEN5(dev)) 4383 return; 4384 4385 I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_SWZCTL); 4386 if (IS_GEN6(dev)) 4387 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_SNB)); 4388 else 4389 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_IVB)); 4390 } 4391 4392 static bool 4393 intel_enable_blt(struct drm_device *dev) 4394 { 4395 if (!HAS_BLT(dev)) 4396 return false; 4397 4398 /* The blitter was dysfunctional on early prototypes */ 4399 if (IS_GEN6(dev) && dev->pdev->revision < 8) { 4400 DRM_INFO("BLT not supported on this pre-production hardware;" 4401 " graphics performance will be degraded.\n"); 4402 return false; 4403 } 4404 4405 return true; 4406 } 4407 4408 int 4409 i915_gem_init_hw(struct drm_device *dev) 4410 { 4411 drm_i915_private_t *dev_priv = dev->dev_private; 4412 int ret; 4413 4414 if (INTEL_INFO(dev)->gen < 6 && !intel_enable_gtt()) 4415 return -EIO; 4416 4417 if (IS_HASWELL(dev) && (I915_READ(0x120010) == 1)) 4418 I915_WRITE(0x9008, I915_READ(0x9008) | 0xf0000); 4419 4420 i915_gem_l3_remap(dev); 4421 4422 i915_gem_init_swizzling(dev); 4423 4424 ret = intel_init_render_ring_buffer(dev); 4425 if (ret) 4426 return ret; 4427 4428 if (HAS_BSD(dev)) { 4429 ret = intel_init_bsd_ring_buffer(dev); 4430 if (ret) 4431 goto cleanup_render_ring; 4432 } 4433 4434 if (intel_enable_blt(dev)) { 4435 ret = intel_init_blt_ring_buffer(dev); 4436 if (ret) 4437 goto cleanup_bsd_ring; 4438 } 4439 4440 dev_priv->next_seqno = 1; 4441 4442 /* 4443 * XXX: There was some w/a described somewhere suggesting loading 4444 * contexts before PPGTT. 4445 */ 4446 i915_gem_context_init(dev); 4447 i915_gem_init_ppgtt(dev); 4448 4449 return 0; 4450 4451 cleanup_bsd_ring: 4452 intel_cleanup_ring_buffer(&dev_priv->ring[VCS]); 4453 cleanup_render_ring: 4454 intel_cleanup_ring_buffer(&dev_priv->ring[RCS]); 4455 return ret; 4456 } 4457 4458 static bool 4459 intel_enable_ppgtt(struct drm_device *dev) 4460 { 4461 #ifdef __NetBSD__ /* XXX ppgtt */ 4462 return false; 4463 #else 4464 if (i915_enable_ppgtt >= 0) 4465 return i915_enable_ppgtt; 4466 4467 #ifdef CONFIG_INTEL_IOMMU 4468 /* Disable ppgtt on SNB if VT-d is on. */ 4469 if (INTEL_INFO(dev)->gen == 6 && intel_iommu_gfx_mapped) 4470 return false; 4471 #endif 4472 4473 return true; 4474 #endif 4475 } 4476 4477 int i915_gem_init(struct drm_device *dev) 4478 { 4479 struct drm_i915_private *dev_priv = dev->dev_private; 4480 unsigned long gtt_size, mappable_size; 4481 int ret; 4482 4483 gtt_size = dev_priv->mm.gtt->gtt_total_entries << PAGE_SHIFT; 4484 mappable_size = dev_priv->mm.gtt->gtt_mappable_entries << PAGE_SHIFT; 4485 4486 mutex_lock(&dev->struct_mutex); 4487 if (intel_enable_ppgtt(dev) && HAS_ALIASING_PPGTT(dev)) { 4488 /* PPGTT pdes are stolen from global gtt ptes, so shrink the 4489 * aperture accordingly when using aliasing ppgtt. */ 4490 gtt_size -= I915_PPGTT_PD_ENTRIES*PAGE_SIZE; 4491 4492 i915_gem_init_global_gtt(dev, 0, mappable_size, gtt_size); 4493 4494 ret = i915_gem_init_aliasing_ppgtt(dev); 4495 if (ret) { 4496 i915_gem_fini_global_gtt(dev); 4497 mutex_unlock(&dev->struct_mutex); 4498 return ret; 4499 } 4500 } else { 4501 /* Let GEM Manage all of the aperture. 4502 * 4503 * However, leave one page at the end still bound to the scratch 4504 * page. There are a number of places where the hardware 4505 * apparently prefetches past the end of the object, and we've 4506 * seen multiple hangs with the GPU head pointer stuck in a 4507 * batchbuffer bound at the last page of the aperture. One page 4508 * should be enough to keep any prefetching inside of the 4509 * aperture. 4510 */ 4511 i915_gem_init_global_gtt(dev, 0, mappable_size, 4512 gtt_size); 4513 } 4514 4515 ret = i915_gem_init_hw(dev); 4516 #ifdef __NetBSD__ /* XXX fini global gtt */ 4517 if (ret) 4518 i915_gem_fini_global_gtt(dev); 4519 #endif 4520 mutex_unlock(&dev->struct_mutex); 4521 if (ret) { 4522 i915_gem_cleanup_aliasing_ppgtt(dev); 4523 return ret; 4524 } 4525 4526 /* Allow hardware batchbuffers unless told otherwise, but not for KMS. */ 4527 if (!drm_core_check_feature(dev, DRIVER_MODESET)) 4528 dev_priv->dri1.allow_batchbuffer = 1; 4529 return 0; 4530 } 4531 4532 void 4533 i915_gem_cleanup_ringbuffer(struct drm_device *dev) 4534 { 4535 drm_i915_private_t *dev_priv = dev->dev_private; 4536 struct intel_ring_buffer *ring; 4537 int i; 4538 4539 for_each_ring(ring, dev_priv, i) 4540 intel_cleanup_ring_buffer(ring); 4541 } 4542 4543 int 4544 i915_gem_entervt_ioctl(struct drm_device *dev, void *data, 4545 struct drm_file *file_priv) 4546 { 4547 drm_i915_private_t *dev_priv = dev->dev_private; 4548 int ret; 4549 4550 if (drm_core_check_feature(dev, DRIVER_MODESET)) 4551 return 0; 4552 4553 if (atomic_read(&dev_priv->mm.wedged)) { 4554 DRM_ERROR("Reenabling wedged hardware, good luck\n"); 4555 atomic_set(&dev_priv->mm.wedged, 0); 4556 } 4557 4558 mutex_lock(&dev->struct_mutex); 4559 dev_priv->mm.suspended = 0; 4560 4561 ret = i915_gem_init_hw(dev); 4562 if (ret != 0) { 4563 mutex_unlock(&dev->struct_mutex); 4564 return ret; 4565 } 4566 4567 BUG_ON(!list_empty(&dev_priv->mm.active_list)); 4568 mutex_unlock(&dev->struct_mutex); 4569 4570 ret = drm_irq_install(dev); 4571 if (ret) 4572 goto cleanup_ringbuffer; 4573 4574 return 0; 4575 4576 cleanup_ringbuffer: 4577 mutex_lock(&dev->struct_mutex); 4578 i915_gem_cleanup_ringbuffer(dev); 4579 dev_priv->mm.suspended = 1; 4580 mutex_unlock(&dev->struct_mutex); 4581 4582 return ret; 4583 } 4584 4585 int 4586 i915_gem_leavevt_ioctl(struct drm_device *dev, void *data, 4587 struct drm_file *file_priv) 4588 { 4589 if (drm_core_check_feature(dev, DRIVER_MODESET)) 4590 return 0; 4591 4592 drm_irq_uninstall(dev); 4593 return i915_gem_idle(dev); 4594 } 4595 4596 void 4597 i915_gem_lastclose(struct drm_device *dev) 4598 { 4599 int ret; 4600 4601 if (drm_core_check_feature(dev, DRIVER_MODESET)) 4602 return; 4603 4604 ret = i915_gem_idle(dev); 4605 if (ret) 4606 DRM_ERROR("failed to idle hardware: %d\n", ret); 4607 } 4608 4609 static void 4610 init_ring_lists(struct intel_ring_buffer *ring) 4611 { 4612 INIT_LIST_HEAD(&ring->active_list); 4613 INIT_LIST_HEAD(&ring->request_list); 4614 } 4615 4616 void 4617 i915_gem_load(struct drm_device *dev) 4618 { 4619 int i; 4620 drm_i915_private_t *dev_priv = dev->dev_private; 4621 4622 INIT_LIST_HEAD(&dev_priv->mm.active_list); 4623 INIT_LIST_HEAD(&dev_priv->mm.inactive_list); 4624 INIT_LIST_HEAD(&dev_priv->mm.unbound_list); 4625 INIT_LIST_HEAD(&dev_priv->mm.bound_list); 4626 INIT_LIST_HEAD(&dev_priv->mm.fence_list); 4627 for (i = 0; i < I915_NUM_RINGS; i++) 4628 init_ring_lists(&dev_priv->ring[i]); 4629 for (i = 0; i < I915_MAX_NUM_FENCES; i++) 4630 INIT_LIST_HEAD(&dev_priv->fence_regs[i].lru_list); 4631 INIT_DELAYED_WORK(&dev_priv->mm.retire_work, 4632 i915_gem_retire_work_handler); 4633 init_completion(&dev_priv->error_completion); 4634 4635 /* On GEN3 we really need to make sure the ARB C3 LP bit is set */ 4636 if (IS_GEN3(dev)) { 4637 I915_WRITE(MI_ARB_STATE, 4638 _MASKED_BIT_ENABLE(MI_ARB_C3_LP_WRITE_ENABLE)); 4639 } 4640 4641 dev_priv->relative_constants_mode = I915_EXEC_CONSTANTS_REL_GENERAL; 4642 4643 /* Old X drivers will take 0-2 for front, back, depth buffers */ 4644 if (!drm_core_check_feature(dev, DRIVER_MODESET)) 4645 dev_priv->fence_reg_start = 3; 4646 4647 if (INTEL_INFO(dev)->gen >= 4 || IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev)) 4648 dev_priv->num_fence_regs = 16; 4649 else 4650 dev_priv->num_fence_regs = 8; 4651 4652 /* Initialize fence registers to zero */ 4653 i915_gem_reset_fences(dev); 4654 4655 i915_gem_detect_bit_6_swizzle(dev); 4656 #ifdef __NetBSD__ 4657 DRM_INIT_WAITQUEUE(&dev_priv->pending_flip_queue, "i915flip"); 4658 spin_lock_init(&dev_priv->pending_flip_lock); 4659 #else 4660 init_waitqueue_head(&dev_priv->pending_flip_queue); 4661 #endif 4662 4663 dev_priv->mm.interruptible = true; 4664 4665 dev_priv->mm.inactive_shrinker.shrink = i915_gem_inactive_shrink; 4666 dev_priv->mm.inactive_shrinker.seeks = DEFAULT_SEEKS; 4667 register_shrinker(&dev_priv->mm.inactive_shrinker); 4668 } 4669 4670 /* 4671 * Create a physically contiguous memory object for this object 4672 * e.g. for cursor + overlay regs 4673 */ 4674 static int i915_gem_init_phys_object(struct drm_device *dev, 4675 int id, int size, int align) 4676 { 4677 drm_i915_private_t *dev_priv = dev->dev_private; 4678 struct drm_i915_gem_phys_object *phys_obj; 4679 int ret; 4680 4681 if (dev_priv->mm.phys_objs[id - 1] || !size) 4682 return 0; 4683 4684 phys_obj = kzalloc(sizeof(struct drm_i915_gem_phys_object), GFP_KERNEL); 4685 if (!phys_obj) 4686 return -ENOMEM; 4687 4688 phys_obj->id = id; 4689 4690 phys_obj->handle = drm_pci_alloc(dev, size, align); 4691 if (!phys_obj->handle) { 4692 ret = -ENOMEM; 4693 goto kfree_obj; 4694 } 4695 #ifndef __NetBSD__ /* XXX x86 wc? */ 4696 #ifdef CONFIG_X86 4697 set_memory_wc((unsigned long)phys_obj->handle->vaddr, phys_obj->handle->size / PAGE_SIZE); 4698 #endif 4699 #endif 4700 4701 dev_priv->mm.phys_objs[id - 1] = phys_obj; 4702 4703 return 0; 4704 kfree_obj: 4705 kfree(phys_obj); 4706 return ret; 4707 } 4708 4709 static void i915_gem_free_phys_object(struct drm_device *dev, int id) 4710 { 4711 drm_i915_private_t *dev_priv = dev->dev_private; 4712 struct drm_i915_gem_phys_object *phys_obj; 4713 4714 if (!dev_priv->mm.phys_objs[id - 1]) 4715 return; 4716 4717 phys_obj = dev_priv->mm.phys_objs[id - 1]; 4718 if (phys_obj->cur_obj) { 4719 i915_gem_detach_phys_object(dev, phys_obj->cur_obj); 4720 } 4721 4722 #ifndef __NetBSD__ /* XXX x86 wb? */ 4723 #ifdef CONFIG_X86 4724 set_memory_wb((unsigned long)phys_obj->handle->vaddr, phys_obj->handle->size / PAGE_SIZE); 4725 #endif 4726 #endif 4727 drm_pci_free(dev, phys_obj->handle); 4728 kfree(phys_obj); 4729 dev_priv->mm.phys_objs[id - 1] = NULL; 4730 } 4731 4732 void i915_gem_free_all_phys_object(struct drm_device *dev) 4733 { 4734 int i; 4735 4736 for (i = I915_GEM_PHYS_CURSOR_0; i <= I915_MAX_PHYS_OBJECT; i++) 4737 i915_gem_free_phys_object(dev, i); 4738 } 4739 4740 void i915_gem_detach_phys_object(struct drm_device *dev, 4741 struct drm_i915_gem_object *obj) 4742 { 4743 #ifndef __NetBSD__ 4744 struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping; 4745 #endif 4746 char *vaddr; 4747 int i; 4748 int page_count; 4749 4750 if (!obj->phys_obj) 4751 return; 4752 vaddr = obj->phys_obj->handle->vaddr; 4753 4754 page_count = obj->base.size / PAGE_SIZE; 4755 for (i = 0; i < page_count; i++) { 4756 #ifdef __NetBSD__ 4757 /* XXX Just use ubc_uiomove? */ 4758 struct pglist pages; 4759 int error; 4760 4761 TAILQ_INIT(&pages); 4762 error = uvm_obj_wirepages(obj->base.gemo_shm_uao, i*PAGE_SIZE, 4763 (i+1)*PAGE_SIZE, &pages); 4764 if (error) { 4765 printf("unable to map page %d of i915 gem obj: %d\n", 4766 i, error); 4767 continue; 4768 } 4769 4770 KASSERT(!TAILQ_EMPTY(&pages)); 4771 struct vm_page *const page = TAILQ_FIRST(&pages); 4772 TAILQ_REMOVE(&pages, page, pageq.queue); 4773 KASSERT(TAILQ_EMPTY(&pages)); 4774 4775 char *const dst = kmap_atomic(container_of(page, struct page, 4776 p_vmp)); 4777 (void)memcpy(dst, vaddr + (i*PAGE_SIZE), PAGE_SIZE); 4778 kunmap_atomic(dst); 4779 4780 drm_clflush_page(container_of(page, struct page, p_vmp)); 4781 page->flags &= ~PG_CLEAN; 4782 /* XXX mark page accessed */ 4783 uvm_obj_unwirepages(obj->base.gemo_shm_uao, i*PAGE_SIZE, 4784 (i+1)*PAGE_SIZE); 4785 #else 4786 struct page *page = shmem_read_mapping_page(mapping, i); 4787 if (!IS_ERR(page)) { 4788 char *dst = kmap_atomic(page); 4789 memcpy(dst, vaddr + i*PAGE_SIZE, PAGE_SIZE); 4790 kunmap_atomic(dst); 4791 4792 drm_clflush_pages(&page, 1); 4793 4794 set_page_dirty(page); 4795 mark_page_accessed(page); 4796 page_cache_release(page); 4797 } 4798 #endif 4799 } 4800 i915_gem_chipset_flush(dev); 4801 4802 obj->phys_obj->cur_obj = NULL; 4803 obj->phys_obj = NULL; 4804 } 4805 4806 int 4807 i915_gem_attach_phys_object(struct drm_device *dev, 4808 struct drm_i915_gem_object *obj, 4809 int id, 4810 int align) 4811 { 4812 #ifndef __NetBSD__ 4813 struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping; 4814 #endif 4815 drm_i915_private_t *dev_priv = dev->dev_private; 4816 int ret = 0; 4817 int page_count; 4818 int i; 4819 4820 if (id > I915_MAX_PHYS_OBJECT) 4821 return -EINVAL; 4822 4823 if (obj->phys_obj) { 4824 if (obj->phys_obj->id == id) 4825 return 0; 4826 i915_gem_detach_phys_object(dev, obj); 4827 } 4828 4829 /* create a new object */ 4830 if (!dev_priv->mm.phys_objs[id - 1]) { 4831 ret = i915_gem_init_phys_object(dev, id, 4832 obj->base.size, align); 4833 if (ret) { 4834 DRM_ERROR("failed to init phys object %d size: %zu\n", 4835 id, obj->base.size); 4836 return ret; 4837 } 4838 } 4839 4840 /* bind to the object */ 4841 obj->phys_obj = dev_priv->mm.phys_objs[id - 1]; 4842 obj->phys_obj->cur_obj = obj; 4843 4844 page_count = obj->base.size / PAGE_SIZE; 4845 4846 for (i = 0; i < page_count; i++) { 4847 #ifdef __NetBSD__ 4848 char *const vaddr = obj->phys_obj->handle->vaddr; 4849 struct pglist pages; 4850 int error; 4851 4852 TAILQ_INIT(&pages); 4853 error = uvm_obj_wirepages(obj->base.gemo_shm_uao, i*PAGE_SIZE, 4854 (i+1)*PAGE_SIZE, &pages); 4855 if (error) 4856 /* XXX errno NetBSD->Linux */ 4857 return -error; 4858 4859 KASSERT(!TAILQ_EMPTY(&pages)); 4860 struct vm_page *const page = TAILQ_FIRST(&pages); 4861 TAILQ_REMOVE(&pages, page, pageq.queue); 4862 KASSERT(TAILQ_EMPTY(&pages)); 4863 4864 char *const src = kmap_atomic(container_of(page, struct page, 4865 p_vmp)); 4866 (void)memcpy(vaddr + (i*PAGE_SIZE), src, PAGE_SIZE); 4867 kunmap_atomic(src); 4868 4869 /* XXX mark page accessed */ 4870 uvm_obj_unwirepages(obj->base.gemo_shm_uao, i*PAGE_SIZE, 4871 (i+1)*PAGE_SIZE); 4872 #else 4873 struct page *page; 4874 char *dst, *src; 4875 4876 page = shmem_read_mapping_page(mapping, i); 4877 if (IS_ERR(page)) 4878 return PTR_ERR(page); 4879 4880 src = kmap_atomic(page); 4881 dst = obj->phys_obj->handle->vaddr + (i * PAGE_SIZE); 4882 memcpy(dst, src, PAGE_SIZE); 4883 kunmap_atomic(src); 4884 4885 mark_page_accessed(page); 4886 page_cache_release(page); 4887 #endif 4888 } 4889 4890 return 0; 4891 } 4892 4893 static int 4894 i915_gem_phys_pwrite(struct drm_device *dev, 4895 struct drm_i915_gem_object *obj, 4896 struct drm_i915_gem_pwrite *args, 4897 struct drm_file *file_priv) 4898 { 4899 void *vaddr = (char *)obj->phys_obj->handle->vaddr + args->offset; 4900 char __user *user_data = (char __user *) (uintptr_t) args->data_ptr; 4901 4902 if (__copy_from_user_inatomic_nocache(vaddr, user_data, args->size)) { 4903 unsigned long unwritten; 4904 4905 /* The physical object once assigned is fixed for the lifetime 4906 * of the obj, so we can safely drop the lock and continue 4907 * to access vaddr. 4908 */ 4909 mutex_unlock(&dev->struct_mutex); 4910 unwritten = copy_from_user(vaddr, user_data, args->size); 4911 mutex_lock(&dev->struct_mutex); 4912 if (unwritten) 4913 return -EFAULT; 4914 } 4915 4916 i915_gem_chipset_flush(dev); 4917 return 0; 4918 } 4919 4920 void i915_gem_release(struct drm_device *dev, struct drm_file *file) 4921 { 4922 struct drm_i915_file_private *file_priv = file->driver_priv; 4923 4924 /* Clean up our request list when the client is going away, so that 4925 * later retire_requests won't dereference our soon-to-be-gone 4926 * file_priv. 4927 */ 4928 spin_lock(&file_priv->mm.lock); 4929 while (!list_empty(&file_priv->mm.request_list)) { 4930 struct drm_i915_gem_request *request; 4931 4932 request = list_first_entry(&file_priv->mm.request_list, 4933 struct drm_i915_gem_request, 4934 client_list); 4935 list_del(&request->client_list); 4936 request->file_priv = NULL; 4937 } 4938 spin_unlock(&file_priv->mm.lock); 4939 } 4940 4941 #ifndef __NetBSD__ /* XXX */ 4942 static bool mutex_is_locked_by(struct mutex *mutex, struct task_struct *task) 4943 { 4944 if (!mutex_is_locked(mutex)) 4945 return false; 4946 4947 #if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_MUTEXES) 4948 return mutex->owner == task; 4949 #else 4950 /* Since UP may be pre-empted, we cannot assume that we own the lock */ 4951 return false; 4952 #endif 4953 } 4954 #endif 4955 4956 static int 4957 i915_gem_inactive_shrink(struct shrinker *shrinker, struct shrink_control *sc) 4958 { 4959 #ifdef __NetBSD__ /* XXX shrinkers */ 4960 return 0; 4961 #else 4962 struct drm_i915_private *dev_priv = 4963 container_of(shrinker, 4964 struct drm_i915_private, 4965 mm.inactive_shrinker); 4966 struct drm_device *dev = dev_priv->dev; 4967 struct drm_i915_gem_object *obj; 4968 int nr_to_scan = sc->nr_to_scan; 4969 bool unlock = true; 4970 int cnt; 4971 4972 if (!mutex_trylock(&dev->struct_mutex)) { 4973 if (!mutex_is_locked_by(&dev->struct_mutex, current)) 4974 return 0; 4975 4976 if (dev_priv->mm.shrinker_no_lock_stealing) 4977 return 0; 4978 4979 unlock = false; 4980 } 4981 4982 if (nr_to_scan) { 4983 nr_to_scan -= i915_gem_purge(dev_priv, nr_to_scan); 4984 if (nr_to_scan > 0) 4985 nr_to_scan -= __i915_gem_shrink(dev_priv, nr_to_scan, 4986 false); 4987 if (nr_to_scan > 0) 4988 i915_gem_shrink_all(dev_priv); 4989 } 4990 4991 cnt = 0; 4992 list_for_each_entry(obj, &dev_priv->mm.unbound_list, gtt_list) 4993 if (obj->pages_pin_count == 0) 4994 cnt += obj->base.size >> PAGE_SHIFT; 4995 list_for_each_entry(obj, &dev_priv->mm.inactive_list, gtt_list) 4996 if (obj->pin_count == 0 && obj->pages_pin_count == 0) 4997 cnt += obj->base.size >> PAGE_SHIFT; 4998 4999 if (unlock) 5000 mutex_unlock(&dev->struct_mutex); 5001 return cnt; 5002 #endif 5003 } 5004