1 /* 2 * Copyright © 2008 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Eric Anholt <eric@anholt.net> 25 * 26 */ 27 28 #ifdef __NetBSD__ 29 #if 0 /* XXX uvmhist option? */ 30 #include "opt_uvmhist.h" 31 #endif 32 33 #include <sys/types.h> 34 #include <sys/param.h> 35 36 #include <uvm/uvm.h> 37 #include <uvm/uvm_extern.h> 38 #include <uvm/uvm_fault.h> 39 #include <uvm/uvm_page.h> 40 #include <uvm/uvm_pmap.h> 41 #include <uvm/uvm_prot.h> 42 #endif 43 44 #include <drm/drmP.h> 45 #include <drm/i915_drm.h> 46 #include "i915_drv.h" 47 #include "i915_trace.h" 48 #include "intel_drv.h" 49 #include <linux/shmem_fs.h> 50 #include <linux/slab.h> 51 #include <linux/swap.h> 52 #include <linux/pci.h> 53 #include <linux/dma-buf.h> 54 #include <linux/errno.h> 55 #include <linux/time.h> 56 #include <linux/err.h> 57 #include <asm/param.h> 58 59 static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj); 60 static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj); 61 static __must_check int i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj, 62 unsigned alignment, 63 bool map_and_fenceable, 64 bool nonblocking); 65 static int i915_gem_phys_pwrite(struct drm_device *dev, 66 struct drm_i915_gem_object *obj, 67 struct drm_i915_gem_pwrite *args, 68 struct drm_file *file); 69 70 static void i915_gem_write_fence(struct drm_device *dev, int reg, 71 struct drm_i915_gem_object *obj); 72 static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj, 73 struct drm_i915_fence_reg *fence, 74 bool enable); 75 76 static int i915_gem_inactive_shrink(struct shrinker *shrinker, 77 struct shrink_control *sc); 78 static long i915_gem_purge(struct drm_i915_private *dev_priv, long target); 79 static void i915_gem_shrink_all(struct drm_i915_private *dev_priv); 80 static void i915_gem_object_truncate(struct drm_i915_gem_object *obj); 81 82 static inline void i915_gem_object_fence_lost(struct drm_i915_gem_object *obj) 83 { 84 if (obj->tiling_mode) 85 i915_gem_release_mmap(obj); 86 87 /* As we do not have an associated fence register, we will force 88 * a tiling change if we ever need to acquire one. 89 */ 90 obj->fence_dirty = false; 91 obj->fence_reg = I915_FENCE_REG_NONE; 92 } 93 94 /* some bookkeeping */ 95 static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv, 96 size_t size) 97 { 98 dev_priv->mm.object_count++; 99 dev_priv->mm.object_memory += size; 100 } 101 102 static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv, 103 size_t size) 104 { 105 dev_priv->mm.object_count--; 106 dev_priv->mm.object_memory -= size; 107 } 108 109 static int 110 i915_gem_wait_for_error(struct drm_device *dev) 111 { 112 struct drm_i915_private *dev_priv = dev->dev_private; 113 struct completion *x = &dev_priv->error_completion; 114 #ifndef __NetBSD__ 115 unsigned long flags; 116 #endif 117 int ret; 118 119 if (!atomic_read(&dev_priv->mm.wedged)) 120 return 0; 121 122 /* 123 * Only wait 10 seconds for the gpu reset to complete to avoid hanging 124 * userspace. If it takes that long something really bad is going on and 125 * we should simply try to bail out and fail as gracefully as possible. 126 */ 127 ret = wait_for_completion_interruptible_timeout(x, 10*HZ); 128 if (ret == 0) { 129 DRM_ERROR("Timed out waiting for the gpu reset to complete\n"); 130 return -EIO; 131 } else if (ret < 0) { 132 return ret; 133 } 134 135 if (atomic_read(&dev_priv->mm.wedged)) { 136 /* GPU is hung, bump the completion count to account for 137 * the token we just consumed so that we never hit zero and 138 * end up waiting upon a subsequent completion event that 139 * will never happen. 140 */ 141 #ifdef __NetBSD__ 142 /* XXX Hope it's not a problem that we might wake someone. */ 143 complete(x); 144 #else 145 spin_lock_irqsave(&x->wait.lock, flags); 146 x->done++; 147 spin_unlock_irqrestore(&x->wait.lock, flags); 148 #endif 149 } 150 return 0; 151 } 152 153 int i915_mutex_lock_interruptible(struct drm_device *dev) 154 { 155 int ret; 156 157 ret = i915_gem_wait_for_error(dev); 158 if (ret) 159 return ret; 160 161 ret = mutex_lock_interruptible(&dev->struct_mutex); 162 if (ret) 163 return ret; 164 165 WARN_ON(i915_verify_lists(dev)); 166 return 0; 167 } 168 169 static inline bool 170 i915_gem_object_is_inactive(struct drm_i915_gem_object *obj) 171 { 172 return obj->gtt_space && !obj->active; 173 } 174 175 int 176 i915_gem_init_ioctl(struct drm_device *dev, void *data, 177 struct drm_file *file) 178 { 179 struct drm_i915_gem_init *args = data; 180 181 if (drm_core_check_feature(dev, DRIVER_MODESET)) 182 return -ENODEV; 183 184 if (args->gtt_start >= args->gtt_end || 185 (args->gtt_end | args->gtt_start) & (PAGE_SIZE - 1)) 186 return -EINVAL; 187 188 /* GEM with user mode setting was never supported on ilk and later. */ 189 if (INTEL_INFO(dev)->gen >= 5) 190 return -ENODEV; 191 192 mutex_lock(&dev->struct_mutex); 193 i915_gem_init_global_gtt(dev, args->gtt_start, 194 args->gtt_end, args->gtt_end); 195 mutex_unlock(&dev->struct_mutex); 196 197 return 0; 198 } 199 200 int 201 i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, 202 struct drm_file *file) 203 { 204 struct drm_i915_private *dev_priv = dev->dev_private; 205 struct drm_i915_gem_get_aperture *args = data; 206 struct drm_i915_gem_object *obj; 207 size_t pinned; 208 209 pinned = 0; 210 mutex_lock(&dev->struct_mutex); 211 list_for_each_entry(obj, &dev_priv->mm.bound_list, gtt_list) 212 if (obj->pin_count) 213 pinned += obj->gtt_space->size; 214 mutex_unlock(&dev->struct_mutex); 215 216 args->aper_size = dev_priv->mm.gtt_total; 217 args->aper_available_size = args->aper_size - pinned; 218 219 return 0; 220 } 221 222 static int 223 i915_gem_create(struct drm_file *file, 224 struct drm_device *dev, 225 uint64_t size, 226 uint32_t *handle_p) 227 { 228 struct drm_i915_gem_object *obj; 229 int ret; 230 u32 handle; 231 232 size = roundup(size, PAGE_SIZE); 233 if (size == 0) 234 return -EINVAL; 235 236 /* Allocate the new object */ 237 obj = i915_gem_alloc_object(dev, size); 238 if (obj == NULL) 239 return -ENOMEM; 240 241 ret = drm_gem_handle_create(file, &obj->base, &handle); 242 if (ret) { 243 drm_gem_object_release(&obj->base); 244 i915_gem_info_remove_obj(dev->dev_private, obj->base.size); 245 kfree(obj); 246 return ret; 247 } 248 249 /* drop reference from allocate - handle holds it now */ 250 drm_gem_object_unreference(&obj->base); 251 trace_i915_gem_object_create(obj); 252 253 *handle_p = handle; 254 return 0; 255 } 256 257 int 258 i915_gem_dumb_create(struct drm_file *file, 259 struct drm_device *dev, 260 struct drm_mode_create_dumb *args) 261 { 262 /* have to work out size/pitch and return them */ 263 #ifdef __NetBSD__ /* ALIGN already means something. */ 264 args->pitch = round_up(args->width * ((args->bpp + 7) / 8), 64); 265 #else 266 args->pitch = ALIGN(args->width * ((args->bpp + 7) / 8), 64); 267 #endif 268 args->size = args->pitch * args->height; 269 return i915_gem_create(file, dev, 270 args->size, &args->handle); 271 } 272 273 int i915_gem_dumb_destroy(struct drm_file *file, 274 struct drm_device *dev, 275 uint32_t handle) 276 { 277 return drm_gem_handle_delete(file, handle); 278 } 279 280 /** 281 * Creates a new mm object and returns a handle to it. 282 */ 283 int 284 i915_gem_create_ioctl(struct drm_device *dev, void *data, 285 struct drm_file *file) 286 { 287 struct drm_i915_gem_create *args = data; 288 289 return i915_gem_create(file, dev, 290 args->size, &args->handle); 291 } 292 293 static int i915_gem_object_needs_bit17_swizzle(struct drm_i915_gem_object *obj) 294 { 295 drm_i915_private_t *dev_priv = obj->base.dev->dev_private; 296 297 return dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_9_10_17 && 298 obj->tiling_mode != I915_TILING_NONE; 299 } 300 301 static inline int 302 __copy_to_user_swizzled(char __user *cpu_vaddr, 303 const char *gpu_vaddr, int gpu_offset, 304 int length) 305 { 306 int ret, cpu_offset = 0; 307 308 while (length > 0) { 309 #ifdef __NetBSD__ 310 int cacheline_end = round_up(gpu_offset + 1, 64); 311 #else 312 int cacheline_end = ALIGN(gpu_offset + 1, 64); 313 #endif 314 int this_length = min(cacheline_end - gpu_offset, length); 315 int swizzled_gpu_offset = gpu_offset ^ 64; 316 317 ret = __copy_to_user(cpu_vaddr + cpu_offset, 318 gpu_vaddr + swizzled_gpu_offset, 319 this_length); 320 if (ret) 321 return ret + length; 322 323 cpu_offset += this_length; 324 gpu_offset += this_length; 325 length -= this_length; 326 } 327 328 return 0; 329 } 330 331 static inline int 332 __copy_from_user_swizzled(char *gpu_vaddr, int gpu_offset, 333 const char __user *cpu_vaddr, 334 int length) 335 { 336 int ret, cpu_offset = 0; 337 338 while (length > 0) { 339 #ifdef __NetBSD__ 340 int cacheline_end = round_up(gpu_offset + 1, 64); 341 #else 342 int cacheline_end = ALIGN(gpu_offset + 1, 64); 343 #endif 344 int this_length = min(cacheline_end - gpu_offset, length); 345 int swizzled_gpu_offset = gpu_offset ^ 64; 346 347 ret = __copy_from_user(gpu_vaddr + swizzled_gpu_offset, 348 cpu_vaddr + cpu_offset, 349 this_length); 350 if (ret) 351 return ret + length; 352 353 cpu_offset += this_length; 354 gpu_offset += this_length; 355 length -= this_length; 356 } 357 358 return 0; 359 } 360 361 /* Per-page copy function for the shmem pread fastpath. 362 * Flushes invalid cachelines before reading the target if 363 * needs_clflush is set. */ 364 static int 365 shmem_pread_fast(struct page *page, int shmem_page_offset, int page_length, 366 char __user *user_data, 367 bool page_do_bit17_swizzling, bool needs_clflush) 368 { 369 #ifdef __NetBSD__ /* XXX atomic shmem fast path */ 370 return -EFAULT; 371 #else 372 char *vaddr; 373 int ret; 374 375 if (unlikely(page_do_bit17_swizzling)) 376 return -EINVAL; 377 378 vaddr = kmap_atomic(page); 379 if (needs_clflush) 380 drm_clflush_virt_range(vaddr + shmem_page_offset, 381 page_length); 382 ret = __copy_to_user_inatomic(user_data, 383 vaddr + shmem_page_offset, 384 page_length); 385 kunmap_atomic(vaddr); 386 387 return ret ? -EFAULT : 0; 388 #endif 389 } 390 391 static void 392 shmem_clflush_swizzled_range(char *addr, unsigned long length, 393 bool swizzled) 394 { 395 if (unlikely(swizzled)) { 396 unsigned long start = (unsigned long) addr; 397 unsigned long end = (unsigned long) addr + length; 398 399 /* For swizzling simply ensure that we always flush both 400 * channels. Lame, but simple and it works. Swizzled 401 * pwrite/pread is far from a hotpath - current userspace 402 * doesn't use it at all. */ 403 start = round_down(start, 128); 404 end = round_up(end, 128); 405 406 drm_clflush_virt_range((void *)start, end - start); 407 } else { 408 drm_clflush_virt_range(addr, length); 409 } 410 411 } 412 413 /* Only difference to the fast-path function is that this can handle bit17 414 * and uses non-atomic copy and kmap functions. */ 415 static int 416 shmem_pread_slow(struct page *page, int shmem_page_offset, int page_length, 417 char __user *user_data, 418 bool page_do_bit17_swizzling, bool needs_clflush) 419 { 420 char *vaddr; 421 int ret; 422 423 vaddr = kmap(page); 424 if (needs_clflush) 425 shmem_clflush_swizzled_range(vaddr + shmem_page_offset, 426 page_length, 427 page_do_bit17_swizzling); 428 429 if (page_do_bit17_swizzling) 430 ret = __copy_to_user_swizzled(user_data, 431 vaddr, shmem_page_offset, 432 page_length); 433 else 434 ret = __copy_to_user(user_data, 435 vaddr + shmem_page_offset, 436 page_length); 437 kunmap(page); 438 439 return ret ? - EFAULT : 0; 440 } 441 442 static int 443 i915_gem_shmem_pread(struct drm_device *dev, 444 struct drm_i915_gem_object *obj, 445 struct drm_i915_gem_pread *args, 446 struct drm_file *file) 447 { 448 char __user *user_data; 449 ssize_t remain; 450 loff_t offset; 451 int shmem_page_offset, page_length, ret = 0; 452 int obj_do_bit17_swizzling, page_do_bit17_swizzling; 453 int hit_slowpath = 0; 454 #ifndef __NetBSD__ /* XXX */ 455 int prefaulted = 0; 456 #endif 457 int needs_clflush = 0; 458 #ifndef __NetBSD__ 459 struct scatterlist *sg; 460 int i; 461 #endif 462 463 user_data = (char __user *) (uintptr_t) args->data_ptr; 464 remain = args->size; 465 466 obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj); 467 468 if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)) { 469 /* If we're not in the cpu read domain, set ourself into the gtt 470 * read domain and manually flush cachelines (if required). This 471 * optimizes for the case when the gpu will dirty the data 472 * anyway again before the next pread happens. */ 473 if (obj->cache_level == I915_CACHE_NONE) 474 needs_clflush = 1; 475 if (obj->gtt_space) { 476 ret = i915_gem_object_set_to_gtt_domain(obj, false); 477 if (ret) 478 return ret; 479 } 480 } 481 482 ret = i915_gem_object_get_pages(obj); 483 if (ret) 484 return ret; 485 486 i915_gem_object_pin_pages(obj); 487 488 offset = args->offset; 489 490 #ifdef __NetBSD__ 491 /* 492 * XXX This is a big #ifdef with a lot of duplicated code, but 493 * factoring out the loop head -- which is all that 494 * substantially differs -- is probably more trouble than it's 495 * worth at the moment. 496 */ 497 while (0 < remain) { 498 /* Get the next page. */ 499 shmem_page_offset = offset_in_page(offset); 500 KASSERT(shmem_page_offset < PAGE_SIZE); 501 page_length = MIN(remain, (PAGE_SIZE - shmem_page_offset)); 502 struct page *const page = i915_gem_object_get_page(obj, 503 atop(offset)); 504 505 /* Decide whether to swizzle bit 17. */ 506 page_do_bit17_swizzling = obj_do_bit17_swizzling && 507 (page_to_phys(page) & (1 << 17)) != 0; 508 509 /* Try the fast path. */ 510 ret = shmem_pread_fast(page, shmem_page_offset, page_length, 511 user_data, page_do_bit17_swizzling, needs_clflush); 512 if (ret == 0) 513 goto next_page; 514 515 /* Fast path failed. Try the slow path. */ 516 hit_slowpath = 1; 517 mutex_unlock(&dev->struct_mutex); 518 /* XXX prefault */ 519 ret = shmem_pread_slow(page, shmem_page_offset, page_length, 520 user_data, page_do_bit17_swizzling, needs_clflush); 521 mutex_lock(&dev->struct_mutex); 522 523 next_page: 524 /* XXX mark page accessed */ 525 if (ret) 526 goto out; 527 528 KASSERT(page_length <= remain); 529 remain -= page_length; 530 user_data += page_length; 531 offset += page_length; 532 } 533 #else 534 for_each_sg(obj->pages->sgl, sg, obj->pages->nents, i) { 535 struct page *page; 536 537 if (i < offset >> PAGE_SHIFT) 538 continue; 539 540 if (remain <= 0) 541 break; 542 543 /* Operation in this page 544 * 545 * shmem_page_offset = offset within page in shmem file 546 * page_length = bytes to copy for this page 547 */ 548 shmem_page_offset = offset_in_page(offset); 549 page_length = remain; 550 if ((shmem_page_offset + page_length) > PAGE_SIZE) 551 page_length = PAGE_SIZE - shmem_page_offset; 552 553 page = sg_page(sg); 554 page_do_bit17_swizzling = obj_do_bit17_swizzling && 555 (page_to_phys(page) & (1 << 17)) != 0; 556 557 ret = shmem_pread_fast(page, shmem_page_offset, page_length, 558 user_data, page_do_bit17_swizzling, 559 needs_clflush); 560 if (ret == 0) 561 goto next_page; 562 563 hit_slowpath = 1; 564 mutex_unlock(&dev->struct_mutex); 565 566 if (!prefaulted) { 567 ret = fault_in_multipages_writeable(user_data, remain); 568 /* Userspace is tricking us, but we've already clobbered 569 * its pages with the prefault and promised to write the 570 * data up to the first fault. Hence ignore any errors 571 * and just continue. */ 572 (void)ret; 573 prefaulted = 1; 574 } 575 576 ret = shmem_pread_slow(page, shmem_page_offset, page_length, 577 user_data, page_do_bit17_swizzling, 578 needs_clflush); 579 580 mutex_lock(&dev->struct_mutex); 581 582 next_page: 583 mark_page_accessed(page); 584 585 if (ret) 586 goto out; 587 588 remain -= page_length; 589 user_data += page_length; 590 offset += page_length; 591 } 592 #endif 593 594 out: 595 i915_gem_object_unpin_pages(obj); 596 597 if (hit_slowpath) { 598 /* Fixup: Kill any reinstated backing storage pages */ 599 if (obj->madv == __I915_MADV_PURGED) 600 i915_gem_object_truncate(obj); 601 } 602 603 return ret; 604 } 605 606 /** 607 * Reads data from the object referenced by handle. 608 * 609 * On error, the contents of *data are undefined. 610 */ 611 int 612 i915_gem_pread_ioctl(struct drm_device *dev, void *data, 613 struct drm_file *file) 614 { 615 struct drm_i915_gem_pread *args = data; 616 struct drm_i915_gem_object *obj; 617 int ret = 0; 618 619 if (args->size == 0) 620 return 0; 621 622 if (!access_ok(VERIFY_WRITE, 623 (char __user *)(uintptr_t)args->data_ptr, 624 args->size)) 625 return -EFAULT; 626 627 ret = i915_mutex_lock_interruptible(dev); 628 if (ret) 629 return ret; 630 631 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 632 if (&obj->base == NULL) { 633 ret = -ENOENT; 634 goto unlock; 635 } 636 637 /* Bounds check source. */ 638 if (args->offset > obj->base.size || 639 args->size > obj->base.size - args->offset) { 640 ret = -EINVAL; 641 goto out; 642 } 643 644 #ifndef __NetBSD__ /* XXX drm prime */ 645 /* prime objects have no backing filp to GEM pread/pwrite 646 * pages from. 647 */ 648 if (!obj->base.filp) { 649 ret = -EINVAL; 650 goto out; 651 } 652 #endif 653 654 trace_i915_gem_object_pread(obj, args->offset, args->size); 655 656 ret = i915_gem_shmem_pread(dev, obj, args, file); 657 658 out: 659 drm_gem_object_unreference(&obj->base); 660 unlock: 661 mutex_unlock(&dev->struct_mutex); 662 return ret; 663 } 664 665 /* This is the fast write path which cannot handle 666 * page faults in the source data 667 */ 668 669 static inline int 670 fast_user_write(struct io_mapping *mapping, 671 loff_t page_base, int page_offset, 672 char __user *user_data, 673 int length) 674 { 675 #ifdef __NetBSD__ /* XXX atomic shmem fast path */ 676 return -EFAULT; 677 #else 678 void __iomem *vaddr_atomic; 679 void *vaddr; 680 unsigned long unwritten; 681 682 vaddr_atomic = io_mapping_map_atomic_wc(mapping, page_base); 683 /* We can use the cpu mem copy function because this is X86. */ 684 vaddr = (void __force*)vaddr_atomic + page_offset; 685 unwritten = __copy_from_user_inatomic_nocache(vaddr, 686 user_data, length); 687 io_mapping_unmap_atomic(vaddr_atomic); 688 return unwritten; 689 #endif 690 } 691 692 /** 693 * This is the fast pwrite path, where we copy the data directly from the 694 * user into the GTT, uncached. 695 */ 696 static int 697 i915_gem_gtt_pwrite_fast(struct drm_device *dev, 698 struct drm_i915_gem_object *obj, 699 struct drm_i915_gem_pwrite *args, 700 struct drm_file *file) 701 { 702 drm_i915_private_t *dev_priv = dev->dev_private; 703 ssize_t remain; 704 loff_t offset, page_base; 705 char __user *user_data; 706 int page_offset, page_length, ret; 707 708 ret = i915_gem_object_pin(obj, 0, true, true); 709 if (ret) 710 goto out; 711 712 ret = i915_gem_object_set_to_gtt_domain(obj, true); 713 if (ret) 714 goto out_unpin; 715 716 ret = i915_gem_object_put_fence(obj); 717 if (ret) 718 goto out_unpin; 719 720 user_data = (char __user *) (uintptr_t) args->data_ptr; 721 remain = args->size; 722 723 offset = obj->gtt_offset + args->offset; 724 725 while (remain > 0) { 726 /* Operation in this page 727 * 728 * page_base = page offset within aperture 729 * page_offset = offset within page 730 * page_length = bytes to copy for this page 731 */ 732 page_base = offset & PAGE_MASK; 733 page_offset = offset_in_page(offset); 734 page_length = remain; 735 if ((page_offset + remain) > PAGE_SIZE) 736 page_length = PAGE_SIZE - page_offset; 737 738 /* If we get a fault while copying data, then (presumably) our 739 * source page isn't available. Return the error and we'll 740 * retry in the slow path. 741 */ 742 if (fast_user_write(dev_priv->mm.gtt_mapping, page_base, 743 page_offset, user_data, page_length)) { 744 ret = -EFAULT; 745 goto out_unpin; 746 } 747 748 remain -= page_length; 749 user_data += page_length; 750 offset += page_length; 751 } 752 753 out_unpin: 754 i915_gem_object_unpin(obj); 755 out: 756 return ret; 757 } 758 759 /* Per-page copy function for the shmem pwrite fastpath. 760 * Flushes invalid cachelines before writing to the target if 761 * needs_clflush_before is set and flushes out any written cachelines after 762 * writing if needs_clflush is set. */ 763 static int 764 shmem_pwrite_fast(struct page *page, int shmem_page_offset, int page_length, 765 char __user *user_data, 766 bool page_do_bit17_swizzling, 767 bool needs_clflush_before, 768 bool needs_clflush_after) 769 { 770 #ifdef __NetBSD__ 771 return -EFAULT; 772 #else 773 char *vaddr; 774 int ret; 775 776 if (unlikely(page_do_bit17_swizzling)) 777 return -EINVAL; 778 779 vaddr = kmap_atomic(page); 780 if (needs_clflush_before) 781 drm_clflush_virt_range(vaddr + shmem_page_offset, 782 page_length); 783 ret = __copy_from_user_inatomic_nocache(vaddr + shmem_page_offset, 784 user_data, 785 page_length); 786 if (needs_clflush_after) 787 drm_clflush_virt_range(vaddr + shmem_page_offset, 788 page_length); 789 kunmap_atomic(vaddr); 790 791 return ret ? -EFAULT : 0; 792 #endif 793 } 794 795 /* Only difference to the fast-path function is that this can handle bit17 796 * and uses non-atomic copy and kmap functions. */ 797 static int 798 shmem_pwrite_slow(struct page *page, int shmem_page_offset, int page_length, 799 char __user *user_data, 800 bool page_do_bit17_swizzling, 801 bool needs_clflush_before, 802 bool needs_clflush_after) 803 { 804 char *vaddr; 805 int ret; 806 807 vaddr = kmap(page); 808 if (unlikely(needs_clflush_before || page_do_bit17_swizzling)) 809 shmem_clflush_swizzled_range(vaddr + shmem_page_offset, 810 page_length, 811 page_do_bit17_swizzling); 812 if (page_do_bit17_swizzling) 813 ret = __copy_from_user_swizzled(vaddr, shmem_page_offset, 814 user_data, 815 page_length); 816 else 817 ret = __copy_from_user(vaddr + shmem_page_offset, 818 user_data, 819 page_length); 820 if (needs_clflush_after) 821 shmem_clflush_swizzled_range(vaddr + shmem_page_offset, 822 page_length, 823 page_do_bit17_swizzling); 824 kunmap(page); 825 826 return ret ? -EFAULT : 0; 827 } 828 829 static int 830 i915_gem_shmem_pwrite(struct drm_device *dev, 831 struct drm_i915_gem_object *obj, 832 struct drm_i915_gem_pwrite *args, 833 struct drm_file *file) 834 { 835 ssize_t remain; 836 loff_t offset; 837 char __user *user_data; 838 int shmem_page_offset, page_length, ret = 0; 839 int obj_do_bit17_swizzling, page_do_bit17_swizzling; 840 int hit_slowpath = 0; 841 int needs_clflush_after = 0; 842 int needs_clflush_before = 0; 843 #ifndef __NetBSD__ 844 int i; 845 struct scatterlist *sg; 846 #endif 847 848 user_data = (char __user *) (uintptr_t) args->data_ptr; 849 remain = args->size; 850 851 obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj); 852 853 if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) { 854 /* If we're not in the cpu write domain, set ourself into the gtt 855 * write domain and manually flush cachelines (if required). This 856 * optimizes for the case when the gpu will use the data 857 * right away and we therefore have to clflush anyway. */ 858 if (obj->cache_level == I915_CACHE_NONE) 859 needs_clflush_after = 1; 860 if (obj->gtt_space) { 861 ret = i915_gem_object_set_to_gtt_domain(obj, true); 862 if (ret) 863 return ret; 864 } 865 } 866 /* Same trick applies for invalidate partially written cachelines before 867 * writing. */ 868 if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU) 869 && obj->cache_level == I915_CACHE_NONE) 870 needs_clflush_before = 1; 871 872 ret = i915_gem_object_get_pages(obj); 873 if (ret) 874 return ret; 875 876 i915_gem_object_pin_pages(obj); 877 878 offset = args->offset; 879 obj->dirty = 1; 880 881 #ifdef __NetBSD__ 882 while (0 < remain) { 883 /* Get the next page. */ 884 shmem_page_offset = offset_in_page(offset); 885 KASSERT(shmem_page_offset < PAGE_SIZE); 886 page_length = MIN(remain, (PAGE_SIZE - shmem_page_offset)); 887 struct page *const page = i915_gem_object_get_page(obj, 888 atop(offset)); 889 890 /* Decide whether to flush the cache or swizzle bit 17. */ 891 const bool partial_cacheline_write = needs_clflush_before && 892 ((shmem_page_offset | page_length) 893 & (cpu_info_primary.ci_cflush_lsize - 1)); 894 page_do_bit17_swizzling = obj_do_bit17_swizzling && 895 (page_to_phys(page) & (1 << 17)) != 0; 896 897 /* Try the fast path. */ 898 ret = shmem_pwrite_fast(page, shmem_page_offset, page_length, 899 user_data, page_do_bit17_swizzling, 900 partial_cacheline_write, needs_clflush_after); 901 if (ret == 0) 902 goto next_page; 903 904 /* Fast path failed. Try the slow path. */ 905 hit_slowpath = 1; 906 mutex_unlock(&dev->struct_mutex); 907 ret = shmem_pwrite_slow(page, shmem_page_offset, page_length, 908 user_data, page_do_bit17_swizzling, 909 partial_cacheline_write, needs_clflush_after); 910 mutex_lock(&dev->struct_mutex); 911 912 next_page: 913 page->p_vmp.flags &= ~PG_CLEAN; 914 /* XXX mark page accessed */ 915 if (ret) 916 goto out; 917 918 KASSERT(page_length <= remain); 919 remain -= page_length; 920 user_data += page_length; 921 offset += page_length; 922 } 923 #else 924 for_each_sg(obj->pages->sgl, sg, obj->pages->nents, i) { 925 struct page *page; 926 int partial_cacheline_write; 927 928 if (i < offset >> PAGE_SHIFT) 929 continue; 930 931 if (remain <= 0) 932 break; 933 934 /* Operation in this page 935 * 936 * shmem_page_offset = offset within page in shmem file 937 * page_length = bytes to copy for this page 938 */ 939 shmem_page_offset = offset_in_page(offset); 940 941 page_length = remain; 942 if ((shmem_page_offset + page_length) > PAGE_SIZE) 943 page_length = PAGE_SIZE - shmem_page_offset; 944 945 /* If we don't overwrite a cacheline completely we need to be 946 * careful to have up-to-date data by first clflushing. Don't 947 * overcomplicate things and flush the entire patch. */ 948 partial_cacheline_write = needs_clflush_before && 949 ((shmem_page_offset | page_length) 950 & (boot_cpu_data.x86_clflush_size - 1)); 951 952 page = sg_page(sg); 953 page_do_bit17_swizzling = obj_do_bit17_swizzling && 954 (page_to_phys(page) & (1 << 17)) != 0; 955 956 ret = shmem_pwrite_fast(page, shmem_page_offset, page_length, 957 user_data, page_do_bit17_swizzling, 958 partial_cacheline_write, 959 needs_clflush_after); 960 if (ret == 0) 961 goto next_page; 962 963 hit_slowpath = 1; 964 mutex_unlock(&dev->struct_mutex); 965 ret = shmem_pwrite_slow(page, shmem_page_offset, page_length, 966 user_data, page_do_bit17_swizzling, 967 partial_cacheline_write, 968 needs_clflush_after); 969 970 mutex_lock(&dev->struct_mutex); 971 972 next_page: 973 set_page_dirty(page); 974 mark_page_accessed(page); 975 976 if (ret) 977 goto out; 978 979 remain -= page_length; 980 user_data += page_length; 981 offset += page_length; 982 } 983 #endif 984 985 out: 986 i915_gem_object_unpin_pages(obj); 987 988 if (hit_slowpath) { 989 /* Fixup: Kill any reinstated backing storage pages */ 990 if (obj->madv == __I915_MADV_PURGED) 991 i915_gem_object_truncate(obj); 992 /* and flush dirty cachelines in case the object isn't in the cpu write 993 * domain anymore. */ 994 if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) { 995 i915_gem_clflush_object(obj); 996 i915_gem_chipset_flush(dev); 997 } 998 } 999 1000 if (needs_clflush_after) 1001 i915_gem_chipset_flush(dev); 1002 1003 return ret; 1004 } 1005 1006 /** 1007 * Writes data to the object referenced by handle. 1008 * 1009 * On error, the contents of the buffer that were to be modified are undefined. 1010 */ 1011 int 1012 i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, 1013 struct drm_file *file) 1014 { 1015 struct drm_i915_gem_pwrite *args = data; 1016 struct drm_i915_gem_object *obj; 1017 int ret; 1018 1019 if (args->size == 0) 1020 return 0; 1021 1022 if (!access_ok(VERIFY_READ, 1023 (char __user *)(uintptr_t)args->data_ptr, 1024 args->size)) 1025 return -EFAULT; 1026 1027 #ifndef __NetBSD__ /* XXX prefault */ 1028 ret = fault_in_multipages_readable((char __user *)(uintptr_t)args->data_ptr, 1029 args->size); 1030 if (ret) 1031 return -EFAULT; 1032 #endif 1033 1034 ret = i915_mutex_lock_interruptible(dev); 1035 if (ret) 1036 return ret; 1037 1038 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 1039 if (&obj->base == NULL) { 1040 ret = -ENOENT; 1041 goto unlock; 1042 } 1043 1044 /* Bounds check destination. */ 1045 if (args->offset > obj->base.size || 1046 args->size > obj->base.size - args->offset) { 1047 ret = -EINVAL; 1048 goto out; 1049 } 1050 1051 #ifndef __NetBSD__ /* XXX drm prime */ 1052 /* prime objects have no backing filp to GEM pread/pwrite 1053 * pages from. 1054 */ 1055 if (!obj->base.filp) { 1056 ret = -EINVAL; 1057 goto out; 1058 } 1059 #endif 1060 1061 trace_i915_gem_object_pwrite(obj, args->offset, args->size); 1062 1063 ret = -EFAULT; 1064 /* We can only do the GTT pwrite on untiled buffers, as otherwise 1065 * it would end up going through the fenced access, and we'll get 1066 * different detiling behavior between reading and writing. 1067 * pread/pwrite currently are reading and writing from the CPU 1068 * perspective, requiring manual detiling by the client. 1069 */ 1070 if (obj->phys_obj) { 1071 ret = i915_gem_phys_pwrite(dev, obj, args, file); 1072 goto out; 1073 } 1074 1075 if (obj->cache_level == I915_CACHE_NONE && 1076 obj->tiling_mode == I915_TILING_NONE && 1077 obj->base.write_domain != I915_GEM_DOMAIN_CPU) { 1078 ret = i915_gem_gtt_pwrite_fast(dev, obj, args, file); 1079 /* Note that the gtt paths might fail with non-page-backed user 1080 * pointers (e.g. gtt mappings when moving data between 1081 * textures). Fallback to the shmem path in that case. */ 1082 } 1083 1084 if (ret == -EFAULT || ret == -ENOSPC) 1085 ret = i915_gem_shmem_pwrite(dev, obj, args, file); 1086 1087 out: 1088 drm_gem_object_unreference(&obj->base); 1089 unlock: 1090 mutex_unlock(&dev->struct_mutex); 1091 return ret; 1092 } 1093 1094 int 1095 i915_gem_check_wedge(struct drm_i915_private *dev_priv, 1096 bool interruptible) 1097 { 1098 if (atomic_read(&dev_priv->mm.wedged)) { 1099 struct completion *x = &dev_priv->error_completion; 1100 bool recovery_complete; 1101 #ifndef __NetBSD__ 1102 unsigned long flags; 1103 #endif 1104 1105 #ifdef __NetBSD__ 1106 /* 1107 * XXX This is a horrible kludge. Reading internal 1108 * fields is no good, nor is reading them unlocked, and 1109 * neither is locking it and then unlocking it before 1110 * making a decision. 1111 */ 1112 recovery_complete = x->c_done > 0; 1113 #else 1114 /* Give the error handler a chance to run. */ 1115 spin_lock_irqsave(&x->wait.lock, flags); 1116 recovery_complete = x->done > 0; 1117 spin_unlock_irqrestore(&x->wait.lock, flags); 1118 #endif 1119 1120 /* Non-interruptible callers can't handle -EAGAIN, hence return 1121 * -EIO unconditionally for these. */ 1122 if (!interruptible) 1123 return -EIO; 1124 1125 /* Recovery complete, but still wedged means reset failure. */ 1126 if (recovery_complete) 1127 return -EIO; 1128 1129 return -EAGAIN; 1130 } 1131 1132 return 0; 1133 } 1134 1135 /* 1136 * Compare seqno against outstanding lazy request. Emit a request if they are 1137 * equal. 1138 */ 1139 static int 1140 i915_gem_check_olr(struct intel_ring_buffer *ring, u32 seqno) 1141 { 1142 int ret; 1143 1144 BUG_ON(!mutex_is_locked(&ring->dev->struct_mutex)); 1145 1146 ret = 0; 1147 if (seqno == ring->outstanding_lazy_request) 1148 ret = i915_add_request(ring, NULL, NULL); 1149 1150 return ret; 1151 } 1152 1153 /** 1154 * __wait_seqno - wait until execution of seqno has finished 1155 * @ring: the ring expected to report seqno 1156 * @seqno: duh! 1157 * @interruptible: do an interruptible wait (normally yes) 1158 * @timeout: in - how long to wait (NULL forever); out - how much time remaining 1159 * 1160 * Returns 0 if the seqno was found within the alloted time. Else returns the 1161 * errno with remaining time filled in timeout argument. 1162 */ 1163 static int __wait_seqno(struct intel_ring_buffer *ring, u32 seqno, 1164 bool interruptible, struct timespec *timeout) 1165 { 1166 drm_i915_private_t *dev_priv = ring->dev->dev_private; 1167 struct timespec before, now, wait_time={1,0}; 1168 unsigned long timeout_jiffies; 1169 long end; 1170 bool wait_forever = true; 1171 int ret; 1172 1173 if (i915_seqno_passed(ring->get_seqno(ring, true), seqno)) 1174 return 0; 1175 1176 trace_i915_gem_request_wait_begin(ring, seqno); 1177 1178 if (timeout != NULL) { 1179 wait_time = *timeout; 1180 wait_forever = false; 1181 } 1182 1183 timeout_jiffies = timespec_to_jiffies(&wait_time); 1184 1185 if (WARN_ON(!ring->irq_get(ring))) 1186 return -ENODEV; 1187 1188 /* Record current time in case interrupted by signal, or wedged * */ 1189 getrawmonotonic(&before); 1190 1191 #define EXIT_COND \ 1192 (i915_seqno_passed(ring->get_seqno(ring, false), seqno) || \ 1193 atomic_read(&dev_priv->mm.wedged)) 1194 do { 1195 #ifdef __NetBSD__ 1196 unsigned long flags; 1197 spin_lock_irqsave(&dev_priv->irq_lock, flags); 1198 if (interruptible) 1199 DRM_SPIN_TIMED_WAIT_UNTIL(end, &ring->irq_queue, 1200 &dev_priv->irq_lock, 1201 timeout_jiffies, 1202 EXIT_COND); 1203 else 1204 DRM_SPIN_TIMED_WAIT_NOINTR_UNTIL(end, &ring->irq_queue, 1205 &dev_priv->irq_lock, 1206 timeout_jiffies, 1207 EXIT_COND); 1208 spin_unlock_irqrestore(&dev_priv->irq_lock, flags); 1209 #else 1210 if (interruptible) 1211 end = wait_event_interruptible_timeout(ring->irq_queue, 1212 EXIT_COND, 1213 timeout_jiffies); 1214 else 1215 end = wait_event_timeout(ring->irq_queue, EXIT_COND, 1216 timeout_jiffies); 1217 1218 #endif 1219 ret = i915_gem_check_wedge(dev_priv, interruptible); 1220 if (ret) 1221 end = ret; 1222 } while (end == 0 && wait_forever); 1223 1224 getrawmonotonic(&now); 1225 1226 ring->irq_put(ring); 1227 trace_i915_gem_request_wait_end(ring, seqno); 1228 #undef EXIT_COND 1229 1230 if (timeout) { 1231 struct timespec sleep_time = timespec_sub(now, before); 1232 *timeout = timespec_sub(*timeout, sleep_time); 1233 } 1234 1235 switch (end) { 1236 case -EIO: 1237 case -EAGAIN: /* Wedged */ 1238 case -ERESTARTSYS: /* Signal */ 1239 case -EINTR: 1240 return (int)end; 1241 case 0: /* Timeout */ 1242 if (timeout) 1243 set_normalized_timespec(timeout, 0, 0); 1244 return -ETIME; 1245 default: /* Completed */ 1246 WARN_ON(end < 0); /* We're not aware of other errors */ 1247 return 0; 1248 } 1249 } 1250 1251 /** 1252 * Waits for a sequence number to be signaled, and cleans up the 1253 * request and object lists appropriately for that event. 1254 */ 1255 int 1256 i915_wait_seqno(struct intel_ring_buffer *ring, uint32_t seqno) 1257 { 1258 struct drm_device *dev = ring->dev; 1259 struct drm_i915_private *dev_priv = dev->dev_private; 1260 bool interruptible = dev_priv->mm.interruptible; 1261 int ret; 1262 1263 BUG_ON(!mutex_is_locked(&dev->struct_mutex)); 1264 BUG_ON(seqno == 0); 1265 1266 ret = i915_gem_check_wedge(dev_priv, interruptible); 1267 if (ret) 1268 return ret; 1269 1270 ret = i915_gem_check_olr(ring, seqno); 1271 if (ret) 1272 return ret; 1273 1274 return __wait_seqno(ring, seqno, interruptible, NULL); 1275 } 1276 1277 /** 1278 * Ensures that all rendering to the object has completed and the object is 1279 * safe to unbind from the GTT or access from the CPU. 1280 */ 1281 static __must_check int 1282 i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj, 1283 bool readonly) 1284 { 1285 struct intel_ring_buffer *ring = obj->ring; 1286 u32 seqno; 1287 int ret; 1288 1289 seqno = readonly ? obj->last_write_seqno : obj->last_read_seqno; 1290 if (seqno == 0) 1291 return 0; 1292 1293 ret = i915_wait_seqno(ring, seqno); 1294 if (ret) 1295 return ret; 1296 1297 i915_gem_retire_requests_ring(ring); 1298 1299 /* Manually manage the write flush as we may have not yet 1300 * retired the buffer. 1301 */ 1302 if (obj->last_write_seqno && 1303 i915_seqno_passed(seqno, obj->last_write_seqno)) { 1304 obj->last_write_seqno = 0; 1305 obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS; 1306 } 1307 1308 return 0; 1309 } 1310 1311 /* A nonblocking variant of the above wait. This is a highly dangerous routine 1312 * as the object state may change during this call. 1313 */ 1314 static __must_check int 1315 i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj, 1316 bool readonly) 1317 { 1318 struct drm_device *dev = obj->base.dev; 1319 struct drm_i915_private *dev_priv = dev->dev_private; 1320 struct intel_ring_buffer *ring = obj->ring; 1321 u32 seqno; 1322 int ret; 1323 1324 BUG_ON(!mutex_is_locked(&dev->struct_mutex)); 1325 BUG_ON(!dev_priv->mm.interruptible); 1326 1327 seqno = readonly ? obj->last_write_seqno : obj->last_read_seqno; 1328 if (seqno == 0) 1329 return 0; 1330 1331 ret = i915_gem_check_wedge(dev_priv, true); 1332 if (ret) 1333 return ret; 1334 1335 ret = i915_gem_check_olr(ring, seqno); 1336 if (ret) 1337 return ret; 1338 1339 mutex_unlock(&dev->struct_mutex); 1340 ret = __wait_seqno(ring, seqno, true, NULL); 1341 mutex_lock(&dev->struct_mutex); 1342 1343 i915_gem_retire_requests_ring(ring); 1344 1345 /* Manually manage the write flush as we may have not yet 1346 * retired the buffer. 1347 */ 1348 if (obj->last_write_seqno && 1349 i915_seqno_passed(seqno, obj->last_write_seqno)) { 1350 obj->last_write_seqno = 0; 1351 obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS; 1352 } 1353 1354 return ret; 1355 } 1356 1357 /** 1358 * Called when user space prepares to use an object with the CPU, either 1359 * through the mmap ioctl's mapping or a GTT mapping. 1360 */ 1361 int 1362 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, 1363 struct drm_file *file) 1364 { 1365 struct drm_i915_gem_set_domain *args = data; 1366 struct drm_i915_gem_object *obj; 1367 uint32_t read_domains = args->read_domains; 1368 uint32_t write_domain = args->write_domain; 1369 int ret; 1370 1371 /* Only handle setting domains to types used by the CPU. */ 1372 if (write_domain & I915_GEM_GPU_DOMAINS) 1373 return -EINVAL; 1374 1375 if (read_domains & I915_GEM_GPU_DOMAINS) 1376 return -EINVAL; 1377 1378 /* Having something in the write domain implies it's in the read 1379 * domain, and only that read domain. Enforce that in the request. 1380 */ 1381 if (write_domain != 0 && read_domains != write_domain) 1382 return -EINVAL; 1383 1384 ret = i915_mutex_lock_interruptible(dev); 1385 if (ret) 1386 return ret; 1387 1388 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 1389 if (&obj->base == NULL) { 1390 ret = -ENOENT; 1391 goto unlock; 1392 } 1393 1394 /* Try to flush the object off the GPU without holding the lock. 1395 * We will repeat the flush holding the lock in the normal manner 1396 * to catch cases where we are gazumped. 1397 */ 1398 ret = i915_gem_object_wait_rendering__nonblocking(obj, !write_domain); 1399 if (ret) 1400 goto unref; 1401 1402 if (read_domains & I915_GEM_DOMAIN_GTT) { 1403 ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0); 1404 1405 /* Silently promote "you're not bound, there was nothing to do" 1406 * to success, since the client was just asking us to 1407 * make sure everything was done. 1408 */ 1409 if (ret == -EINVAL) 1410 ret = 0; 1411 } else { 1412 ret = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0); 1413 } 1414 1415 unref: 1416 drm_gem_object_unreference(&obj->base); 1417 unlock: 1418 mutex_unlock(&dev->struct_mutex); 1419 return ret; 1420 } 1421 1422 /** 1423 * Called when user space has done writes to this buffer 1424 */ 1425 int 1426 i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data, 1427 struct drm_file *file) 1428 { 1429 struct drm_i915_gem_sw_finish *args = data; 1430 struct drm_i915_gem_object *obj; 1431 int ret = 0; 1432 1433 ret = i915_mutex_lock_interruptible(dev); 1434 if (ret) 1435 return ret; 1436 1437 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 1438 if (&obj->base == NULL) { 1439 ret = -ENOENT; 1440 goto unlock; 1441 } 1442 1443 /* Pinned buffers may be scanout, so flush the cache */ 1444 if (obj->pin_count) 1445 i915_gem_object_flush_cpu_write_domain(obj); 1446 1447 drm_gem_object_unreference(&obj->base); 1448 unlock: 1449 mutex_unlock(&dev->struct_mutex); 1450 return ret; 1451 } 1452 1453 /** 1454 * Maps the contents of an object, returning the address it is mapped 1455 * into. 1456 * 1457 * While the mapping holds a reference on the contents of the object, it doesn't 1458 * imply a ref on the object itself. 1459 */ 1460 int 1461 i915_gem_mmap_ioctl(struct drm_device *dev, void *data, 1462 struct drm_file *file) 1463 { 1464 struct drm_i915_gem_mmap *args = data; 1465 struct drm_gem_object *obj; 1466 unsigned long addr; 1467 #ifdef __NetBSD__ 1468 int ret; 1469 #endif 1470 1471 obj = drm_gem_object_lookup(dev, file, args->handle); 1472 if (obj == NULL) 1473 return -ENOENT; 1474 1475 #ifndef __NetBSD__ /* XXX drm prime */ 1476 /* prime objects have no backing filp to GEM mmap 1477 * pages from. 1478 */ 1479 if (!obj->filp) { 1480 drm_gem_object_unreference_unlocked(obj); 1481 return -EINVAL; 1482 } 1483 #endif 1484 1485 #ifdef __NetBSD__ 1486 addr = (*curproc->p_emul->e_vm_default_addr)(curproc, 1487 (vaddr_t)curproc->p_vmspace->vm_daddr, args->size); 1488 /* XXX errno NetBSD->Linux */ 1489 ret = -uvm_map(&curproc->p_vmspace->vm_map, &addr, args->size, 1490 obj->gemo_shm_uao, args->offset, 0, 1491 UVM_MAPFLAG((VM_PROT_READ | VM_PROT_WRITE), 1492 (VM_PROT_READ | VM_PROT_WRITE), UVM_INH_COPY, UVM_ADV_NORMAL, 1493 UVM_FLAG_COPYONW)); 1494 if (ret) 1495 return ret; 1496 #else 1497 addr = vm_mmap(obj->filp, 0, args->size, 1498 PROT_READ | PROT_WRITE, MAP_SHARED, 1499 args->offset); 1500 drm_gem_object_unreference_unlocked(obj); 1501 if (IS_ERR((void *)addr)) 1502 return addr; 1503 #endif 1504 1505 args->addr_ptr = (uint64_t) addr; 1506 1507 return 0; 1508 } 1509 1510 #ifdef __NetBSD__ /* XXX gem gtt fault */ 1511 static int i915_udv_fault(struct uvm_faultinfo *, vaddr_t, 1512 struct vm_page **, int, int, vm_prot_t, int, paddr_t); 1513 1514 int 1515 i915_gem_fault(struct uvm_faultinfo *ufi, vaddr_t vaddr, struct vm_page **pps, 1516 int npages, int centeridx, vm_prot_t access_type, int flags) 1517 { 1518 struct uvm_object *uobj = ufi->entry->object.uvm_obj; 1519 struct drm_gem_object *gem_obj = 1520 container_of(uobj, struct drm_gem_object, gemo_uvmobj); 1521 struct drm_i915_gem_object *obj = to_intel_bo(gem_obj); 1522 struct drm_device *dev = obj->base.dev; 1523 struct drm_i915_private *dev_priv = dev->dev_private; 1524 pgoff_t page_offset; 1525 int ret = 0; 1526 bool write = ISSET(access_type, VM_PROT_WRITE)? 1 : 0; 1527 1528 page_offset = (ufi->entry->offset + (vaddr - ufi->entry->start)) >> 1529 PAGE_SHIFT; 1530 1531 ret = i915_mutex_lock_interruptible(dev); 1532 if (ret) 1533 goto out; 1534 1535 trace_i915_gem_object_fault(obj, page_offset, true, write); 1536 1537 /* Now bind it into the GTT if needed */ 1538 ret = i915_gem_object_pin(obj, 0, true, false); 1539 if (ret) 1540 goto unlock; 1541 1542 ret = i915_gem_object_set_to_gtt_domain(obj, write); 1543 if (ret) 1544 goto unpin; 1545 1546 ret = i915_gem_object_get_fence(obj); 1547 if (ret) 1548 goto unpin; 1549 1550 obj->fault_mappable = true; 1551 1552 /* Finally, remap it using the new GTT offset */ 1553 /* XXX errno NetBSD->Linux */ 1554 ret = -i915_udv_fault(ufi, vaddr, pps, npages, centeridx, access_type, 1555 flags, (dev_priv->mm.gtt_base_addr + obj->gtt_offset)); 1556 unpin: 1557 i915_gem_object_unpin(obj); 1558 unlock: 1559 mutex_unlock(&dev->struct_mutex); 1560 out: 1561 return ret; 1562 } 1563 1564 /* 1565 * XXX i915_udv_fault is copypasta of udv_fault from uvm_device.c. 1566 * 1567 * XXX pmap_enter_default instead of pmap_enter because of a problem 1568 * with using weak aliases in kernel modules or something. 1569 */ 1570 int pmap_enter_default(pmap_t, vaddr_t, paddr_t, vm_prot_t, unsigned); 1571 1572 static int 1573 i915_udv_fault(struct uvm_faultinfo *ufi, vaddr_t vaddr, struct vm_page **pps, 1574 int npages, int centeridx, vm_prot_t access_type, int flags, 1575 paddr_t gtt_paddr) 1576 { 1577 struct vm_map_entry *entry = ufi->entry; 1578 struct uvm_object *uobj = entry->object.uvm_obj; 1579 vaddr_t curr_va; 1580 off_t curr_offset; 1581 paddr_t paddr; 1582 u_int mmapflags; 1583 int lcv, retval; 1584 vm_prot_t mapprot; 1585 UVMHIST_FUNC("i915_udv_fault"); UVMHIST_CALLED(maphist); 1586 UVMHIST_LOG(maphist," flags=%d", flags,0,0,0); 1587 1588 /* 1589 * we do not allow device mappings to be mapped copy-on-write 1590 * so we kill any attempt to do so here. 1591 */ 1592 1593 if (UVM_ET_ISCOPYONWRITE(entry)) { 1594 UVMHIST_LOG(maphist, "<- failed -- COW entry (etype=0x%x)", 1595 entry->etype, 0,0,0); 1596 uvmfault_unlockall(ufi, ufi->entry->aref.ar_amap, uobj); 1597 return(EIO); 1598 } 1599 1600 /* 1601 * now we must determine the offset in udv to use and the VA to 1602 * use for pmap_enter. note that we always use orig_map's pmap 1603 * for pmap_enter (even if we have a submap). since virtual 1604 * addresses in a submap must match the main map, this is ok. 1605 */ 1606 1607 /* udv offset = (offset from start of entry) + entry's offset */ 1608 curr_offset = entry->offset + (vaddr - entry->start); 1609 /* pmap va = vaddr (virtual address of pps[0]) */ 1610 curr_va = vaddr; 1611 1612 /* 1613 * loop over the page range entering in as needed 1614 */ 1615 1616 retval = 0; 1617 for (lcv = 0 ; lcv < npages ; lcv++, curr_offset += PAGE_SIZE, 1618 curr_va += PAGE_SIZE) { 1619 if ((flags & PGO_ALLPAGES) == 0 && lcv != centeridx) 1620 continue; 1621 1622 if (pps[lcv] == PGO_DONTCARE) 1623 continue; 1624 1625 paddr = (gtt_paddr + curr_offset); 1626 mmapflags = 0; 1627 mapprot = ufi->entry->protection; 1628 UVMHIST_LOG(maphist, 1629 " MAPPING: device: pm=0x%x, va=0x%x, pa=0x%lx, at=%d", 1630 ufi->orig_map->pmap, curr_va, paddr, mapprot); 1631 if (pmap_enter_default(ufi->orig_map->pmap, curr_va, paddr, mapprot, 1632 PMAP_CANFAIL | mapprot | mmapflags) != 0) { 1633 /* 1634 * pmap_enter() didn't have the resource to 1635 * enter this mapping. Unlock everything, 1636 * wait for the pagedaemon to free up some 1637 * pages, and then tell uvm_fault() to start 1638 * the fault again. 1639 * 1640 * XXX Needs some rethinking for the PGO_ALLPAGES 1641 * XXX case. 1642 */ 1643 pmap_update(ufi->orig_map->pmap); /* sync what we have so far */ 1644 uvmfault_unlockall(ufi, ufi->entry->aref.ar_amap, 1645 uobj); 1646 uvm_wait("i915flt"); 1647 return (ERESTART); 1648 } 1649 } 1650 1651 pmap_update(ufi->orig_map->pmap); 1652 uvmfault_unlockall(ufi, ufi->entry->aref.ar_amap, uobj); 1653 return (retval); 1654 } 1655 #else 1656 /** 1657 * i915_gem_fault - fault a page into the GTT 1658 * vma: VMA in question 1659 * vmf: fault info 1660 * 1661 * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped 1662 * from userspace. The fault handler takes care of binding the object to 1663 * the GTT (if needed), allocating and programming a fence register (again, 1664 * only if needed based on whether the old reg is still valid or the object 1665 * is tiled) and inserting a new PTE into the faulting process. 1666 * 1667 * Note that the faulting process may involve evicting existing objects 1668 * from the GTT and/or fence registers to make room. So performance may 1669 * suffer if the GTT working set is large or there are few fence registers 1670 * left. 1671 */ 1672 int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) 1673 { 1674 struct drm_i915_gem_object *obj = to_intel_bo(vma->vm_private_data); 1675 struct drm_device *dev = obj->base.dev; 1676 drm_i915_private_t *dev_priv = dev->dev_private; 1677 pgoff_t page_offset; 1678 unsigned long pfn; 1679 int ret = 0; 1680 bool write = !!(vmf->flags & FAULT_FLAG_WRITE); 1681 1682 /* We don't use vmf->pgoff since that has the fake offset */ 1683 page_offset = ((unsigned long)vmf->virtual_address - vma->vm_start) >> 1684 PAGE_SHIFT; 1685 1686 ret = i915_mutex_lock_interruptible(dev); 1687 if (ret) 1688 goto out; 1689 1690 trace_i915_gem_object_fault(obj, page_offset, true, write); 1691 1692 /* Now bind it into the GTT if needed */ 1693 ret = i915_gem_object_pin(obj, 0, true, false); 1694 if (ret) 1695 goto unlock; 1696 1697 ret = i915_gem_object_set_to_gtt_domain(obj, write); 1698 if (ret) 1699 goto unpin; 1700 1701 ret = i915_gem_object_get_fence(obj); 1702 if (ret) 1703 goto unpin; 1704 1705 obj->fault_mappable = true; 1706 1707 pfn = ((dev_priv->mm.gtt_base_addr + obj->gtt_offset) >> PAGE_SHIFT) + 1708 page_offset; 1709 1710 /* Finally, remap it using the new GTT offset */ 1711 ret = vm_insert_pfn(vma, (unsigned long)vmf->virtual_address, pfn); 1712 unpin: 1713 i915_gem_object_unpin(obj); 1714 unlock: 1715 mutex_unlock(&dev->struct_mutex); 1716 out: 1717 switch (ret) { 1718 case -EIO: 1719 /* If this -EIO is due to a gpu hang, give the reset code a 1720 * chance to clean up the mess. Otherwise return the proper 1721 * SIGBUS. */ 1722 if (!atomic_read(&dev_priv->mm.wedged)) 1723 return VM_FAULT_SIGBUS; 1724 case -EAGAIN: 1725 /* Give the error handler a chance to run and move the 1726 * objects off the GPU active list. Next time we service the 1727 * fault, we should be able to transition the page into the 1728 * GTT without touching the GPU (and so avoid further 1729 * EIO/EGAIN). If the GPU is wedged, then there is no issue 1730 * with coherency, just lost writes. 1731 */ 1732 set_need_resched(); 1733 case 0: 1734 case -ERESTARTSYS: 1735 case -EINTR: 1736 case -EBUSY: 1737 /* 1738 * EBUSY is ok: this just means that another thread 1739 * already did the job. 1740 */ 1741 return VM_FAULT_NOPAGE; 1742 case -ENOMEM: 1743 return VM_FAULT_OOM; 1744 case -ENOSPC: 1745 return VM_FAULT_SIGBUS; 1746 default: 1747 WARN_ONCE(ret, "unhandled error in i915_gem_fault: %i\n", ret); 1748 return VM_FAULT_SIGBUS; 1749 } 1750 } 1751 #endif 1752 1753 /** 1754 * i915_gem_release_mmap - remove physical page mappings 1755 * @obj: obj in question 1756 * 1757 * Preserve the reservation of the mmapping with the DRM core code, but 1758 * relinquish ownership of the pages back to the system. 1759 * 1760 * It is vital that we remove the page mapping if we have mapped a tiled 1761 * object through the GTT and then lose the fence register due to 1762 * resource pressure. Similarly if the object has been moved out of the 1763 * aperture, than pages mapped into userspace must be revoked. Removing the 1764 * mapping will then trigger a page fault on the next user access, allowing 1765 * fixup by i915_gem_fault(). 1766 */ 1767 void 1768 i915_gem_release_mmap(struct drm_i915_gem_object *obj) 1769 { 1770 if (!obj->fault_mappable) 1771 return; 1772 1773 #ifdef __NetBSD__ /* XXX gem gtt fault */ 1774 { 1775 struct vm_page *page; 1776 1777 mutex_enter(obj->base.gemo_shm_uao->vmobjlock); 1778 KASSERT(obj->pages != NULL); 1779 /* Force a fresh fault for each page. */ 1780 TAILQ_FOREACH(page, &obj->igo_pageq, pageq.queue) 1781 pmap_page_protect(page, VM_PROT_NONE); 1782 mutex_exit(obj->base.gemo_shm_uao->vmobjlock); 1783 } 1784 #else 1785 if (obj->base.dev->dev_mapping) 1786 unmap_mapping_range(obj->base.dev->dev_mapping, 1787 (loff_t)obj->base.map_list.hash.key<<PAGE_SHIFT, 1788 obj->base.size, 1); 1789 #endif 1790 1791 obj->fault_mappable = false; 1792 } 1793 1794 static uint32_t 1795 i915_gem_get_gtt_size(struct drm_device *dev, uint32_t size, int tiling_mode) 1796 { 1797 uint32_t gtt_size; 1798 1799 if (INTEL_INFO(dev)->gen >= 4 || 1800 tiling_mode == I915_TILING_NONE) 1801 return size; 1802 1803 /* Previous chips need a power-of-two fence region when tiling */ 1804 if (INTEL_INFO(dev)->gen == 3) 1805 gtt_size = 1024*1024; 1806 else 1807 gtt_size = 512*1024; 1808 1809 while (gtt_size < size) 1810 gtt_size <<= 1; 1811 1812 return gtt_size; 1813 } 1814 1815 /** 1816 * i915_gem_get_gtt_alignment - return required GTT alignment for an object 1817 * @obj: object to check 1818 * 1819 * Return the required GTT alignment for an object, taking into account 1820 * potential fence register mapping. 1821 */ 1822 static uint32_t 1823 i915_gem_get_gtt_alignment(struct drm_device *dev, 1824 uint32_t size, 1825 int tiling_mode) 1826 { 1827 /* 1828 * Minimum alignment is 4k (GTT page size), but might be greater 1829 * if a fence register is needed for the object. 1830 */ 1831 if (INTEL_INFO(dev)->gen >= 4 || 1832 tiling_mode == I915_TILING_NONE) 1833 return 4096; 1834 1835 /* 1836 * Previous chips need to be aligned to the size of the smallest 1837 * fence register that can contain the object. 1838 */ 1839 return i915_gem_get_gtt_size(dev, size, tiling_mode); 1840 } 1841 1842 /** 1843 * i915_gem_get_unfenced_gtt_alignment - return required GTT alignment for an 1844 * unfenced object 1845 * @dev: the device 1846 * @size: size of the object 1847 * @tiling_mode: tiling mode of the object 1848 * 1849 * Return the required GTT alignment for an object, only taking into account 1850 * unfenced tiled surface requirements. 1851 */ 1852 uint32_t 1853 i915_gem_get_unfenced_gtt_alignment(struct drm_device *dev, 1854 uint32_t size, 1855 int tiling_mode) 1856 { 1857 /* 1858 * Minimum alignment is 4k (GTT page size) for sane hw. 1859 */ 1860 if (INTEL_INFO(dev)->gen >= 4 || IS_G33(dev) || 1861 tiling_mode == I915_TILING_NONE) 1862 return 4096; 1863 1864 /* Previous hardware however needs to be aligned to a power-of-two 1865 * tile height. The simplest method for determining this is to reuse 1866 * the power-of-tile object size. 1867 */ 1868 return i915_gem_get_gtt_size(dev, size, tiling_mode); 1869 } 1870 1871 static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj) 1872 { 1873 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 1874 int ret; 1875 1876 if (obj->base.map_list.map) 1877 return 0; 1878 1879 dev_priv->mm.shrinker_no_lock_stealing = true; 1880 1881 ret = drm_gem_create_mmap_offset(&obj->base); 1882 if (ret != -ENOSPC) 1883 goto out; 1884 1885 /* Badly fragmented mmap space? The only way we can recover 1886 * space is by destroying unwanted objects. We can't randomly release 1887 * mmap_offsets as userspace expects them to be persistent for the 1888 * lifetime of the objects. The closest we can is to release the 1889 * offsets on purgeable objects by truncating it and marking it purged, 1890 * which prevents userspace from ever using that object again. 1891 */ 1892 i915_gem_purge(dev_priv, obj->base.size >> PAGE_SHIFT); 1893 ret = drm_gem_create_mmap_offset(&obj->base); 1894 if (ret != -ENOSPC) 1895 goto out; 1896 1897 i915_gem_shrink_all(dev_priv); 1898 ret = drm_gem_create_mmap_offset(&obj->base); 1899 out: 1900 dev_priv->mm.shrinker_no_lock_stealing = false; 1901 1902 return ret; 1903 } 1904 1905 static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj) 1906 { 1907 if (!obj->base.map_list.map) 1908 return; 1909 1910 drm_gem_free_mmap_offset(&obj->base); 1911 } 1912 1913 int 1914 i915_gem_mmap_gtt(struct drm_file *file, 1915 struct drm_device *dev, 1916 uint32_t handle, 1917 uint64_t *offset) 1918 { 1919 struct drm_i915_private *dev_priv = dev->dev_private; 1920 struct drm_i915_gem_object *obj; 1921 int ret; 1922 1923 ret = i915_mutex_lock_interruptible(dev); 1924 if (ret) 1925 return ret; 1926 1927 obj = to_intel_bo(drm_gem_object_lookup(dev, file, handle)); 1928 if (&obj->base == NULL) { 1929 ret = -ENOENT; 1930 goto unlock; 1931 } 1932 1933 if (obj->base.size > dev_priv->mm.gtt_mappable_end) { 1934 ret = -E2BIG; 1935 goto out; 1936 } 1937 1938 if (obj->madv != I915_MADV_WILLNEED) { 1939 DRM_ERROR("Attempting to mmap a purgeable buffer\n"); 1940 ret = -EINVAL; 1941 goto out; 1942 } 1943 1944 ret = i915_gem_object_create_mmap_offset(obj); 1945 if (ret) 1946 goto out; 1947 1948 *offset = (u64)obj->base.map_list.hash.key << PAGE_SHIFT; 1949 1950 out: 1951 drm_gem_object_unreference(&obj->base); 1952 unlock: 1953 mutex_unlock(&dev->struct_mutex); 1954 return ret; 1955 } 1956 1957 /** 1958 * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing 1959 * @dev: DRM device 1960 * @data: GTT mapping ioctl data 1961 * @file: GEM object info 1962 * 1963 * Simply returns the fake offset to userspace so it can mmap it. 1964 * The mmap call will end up in drm_gem_mmap(), which will set things 1965 * up so we can get faults in the handler above. 1966 * 1967 * The fault handler will take care of binding the object into the GTT 1968 * (since it may have been evicted to make room for something), allocating 1969 * a fence register, and mapping the appropriate aperture address into 1970 * userspace. 1971 */ 1972 int 1973 i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data, 1974 struct drm_file *file) 1975 { 1976 struct drm_i915_gem_mmap_gtt *args = data; 1977 1978 return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset); 1979 } 1980 1981 /* Immediately discard the backing storage */ 1982 static void 1983 i915_gem_object_truncate(struct drm_i915_gem_object *obj) 1984 { 1985 #ifndef __NetBSD__ 1986 struct inode *inode; 1987 #endif 1988 1989 i915_gem_object_free_mmap_offset(obj); 1990 1991 #ifdef __NetBSD__ 1992 { 1993 struct uvm_object *const uobj = obj->base.gemo_shm_uao; 1994 1995 if (uobj != NULL) { 1996 /* XXX Calling pgo_put like this is bogus. */ 1997 mutex_enter(uobj->vmobjlock); 1998 (*uobj->pgops->pgo_put)(uobj, 0, obj->base.size, 1999 (PGO_ALLPAGES | PGO_FREE)); 2000 } 2001 } 2002 #else 2003 if (obj->base.filp == NULL) 2004 return; 2005 2006 /* Our goal here is to return as much of the memory as 2007 * is possible back to the system as we are called from OOM. 2008 * To do this we must instruct the shmfs to drop all of its 2009 * backing pages, *now*. 2010 */ 2011 inode = obj->base.filp->f_path.dentry->d_inode; 2012 shmem_truncate_range(inode, 0, (loff_t)-1); 2013 #endif 2014 2015 obj->madv = __I915_MADV_PURGED; 2016 } 2017 2018 static inline int 2019 i915_gem_object_is_purgeable(struct drm_i915_gem_object *obj) 2020 { 2021 return obj->madv == I915_MADV_DONTNEED; 2022 } 2023 2024 #ifdef __NetBSD__ 2025 static void 2026 i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj) 2027 { 2028 struct drm_device *const dev = obj->base.dev; 2029 int ret; 2030 2031 /* XXX Cargo-culted from the Linux code. */ 2032 BUG_ON(obj->madv == __I915_MADV_PURGED); 2033 2034 ret = i915_gem_object_set_to_cpu_domain(obj, true); 2035 if (ret) { 2036 WARN_ON(ret != -EIO); 2037 i915_gem_clflush_object(obj); 2038 obj->base.read_domains = obj->base.write_domain = 2039 I915_GEM_DOMAIN_CPU; 2040 } 2041 2042 if (i915_gem_object_needs_bit17_swizzle(obj)) 2043 i915_gem_object_save_bit_17_swizzle(obj); 2044 2045 /* XXX Maintain dirty flag? */ 2046 2047 bus_dmamap_destroy(dev->dmat, obj->igo_dmamap); 2048 bus_dmamem_unwire_uvm_object(dev->dmat, obj->base.gemo_shm_uao, 0, 2049 obj->base.size, obj->pages, obj->igo_nsegs); 2050 2051 kfree(obj->pages); 2052 } 2053 #else 2054 static void 2055 i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj) 2056 { 2057 int page_count = obj->base.size / PAGE_SIZE; 2058 struct scatterlist *sg; 2059 int ret, i; 2060 2061 BUG_ON(obj->madv == __I915_MADV_PURGED); 2062 2063 ret = i915_gem_object_set_to_cpu_domain(obj, true); 2064 if (ret) { 2065 /* In the event of a disaster, abandon all caches and 2066 * hope for the best. 2067 */ 2068 WARN_ON(ret != -EIO); 2069 i915_gem_clflush_object(obj); 2070 obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU; 2071 } 2072 2073 if (i915_gem_object_needs_bit17_swizzle(obj)) 2074 i915_gem_object_save_bit_17_swizzle(obj); 2075 2076 if (obj->madv == I915_MADV_DONTNEED) 2077 obj->dirty = 0; 2078 2079 for_each_sg(obj->pages->sgl, sg, page_count, i) { 2080 struct page *page = sg_page(sg); 2081 2082 if (obj->dirty) 2083 set_page_dirty(page); 2084 2085 if (obj->madv == I915_MADV_WILLNEED) 2086 mark_page_accessed(page); 2087 2088 page_cache_release(page); 2089 } 2090 obj->dirty = 0; 2091 2092 sg_free_table(obj->pages); 2093 kfree(obj->pages); 2094 } 2095 #endif 2096 2097 static int 2098 i915_gem_object_put_pages(struct drm_i915_gem_object *obj) 2099 { 2100 const struct drm_i915_gem_object_ops *ops = obj->ops; 2101 2102 if (obj->pages == NULL) 2103 return 0; 2104 2105 BUG_ON(obj->gtt_space); 2106 2107 if (obj->pages_pin_count) 2108 return -EBUSY; 2109 2110 /* ->put_pages might need to allocate memory for the bit17 swizzle 2111 * array, hence protect them from being reaped by removing them from gtt 2112 * lists early. */ 2113 list_del(&obj->gtt_list); 2114 2115 ops->put_pages(obj); 2116 obj->pages = NULL; 2117 2118 if (i915_gem_object_is_purgeable(obj)) 2119 i915_gem_object_truncate(obj); 2120 2121 return 0; 2122 } 2123 2124 static long 2125 __i915_gem_shrink(struct drm_i915_private *dev_priv, long target, 2126 bool purgeable_only) 2127 { 2128 struct drm_i915_gem_object *obj, *next; 2129 long count = 0; 2130 2131 list_for_each_entry_safe(obj, next, 2132 &dev_priv->mm.unbound_list, 2133 gtt_list) { 2134 if ((i915_gem_object_is_purgeable(obj) || !purgeable_only) && 2135 i915_gem_object_put_pages(obj) == 0) { 2136 count += obj->base.size >> PAGE_SHIFT; 2137 if (count >= target) 2138 return count; 2139 } 2140 } 2141 2142 list_for_each_entry_safe(obj, next, 2143 &dev_priv->mm.inactive_list, 2144 mm_list) { 2145 if ((i915_gem_object_is_purgeable(obj) || !purgeable_only) && 2146 i915_gem_object_unbind(obj) == 0 && 2147 i915_gem_object_put_pages(obj) == 0) { 2148 count += obj->base.size >> PAGE_SHIFT; 2149 if (count >= target) 2150 return count; 2151 } 2152 } 2153 2154 return count; 2155 } 2156 2157 static long 2158 i915_gem_purge(struct drm_i915_private *dev_priv, long target) 2159 { 2160 return __i915_gem_shrink(dev_priv, target, true); 2161 } 2162 2163 static void 2164 i915_gem_shrink_all(struct drm_i915_private *dev_priv) 2165 { 2166 struct drm_i915_gem_object *obj, *next; 2167 2168 i915_gem_evict_everything(dev_priv->dev); 2169 2170 list_for_each_entry_safe(obj, next, &dev_priv->mm.unbound_list, gtt_list) 2171 i915_gem_object_put_pages(obj); 2172 } 2173 2174 #ifdef __NetBSD__ 2175 static int 2176 i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) 2177 { 2178 struct drm_device *const dev = obj->base.dev; 2179 struct vm_page *page; 2180 int error; 2181 2182 /* XXX Cargo-culted from the Linux code. */ 2183 BUG_ON(obj->base.read_domains & I915_GEM_GPU_DOMAINS); 2184 BUG_ON(obj->base.write_domain & I915_GEM_GPU_DOMAINS); 2185 2186 KASSERT(obj->pages == NULL); 2187 TAILQ_INIT(&obj->igo_pageq); 2188 obj->pages = kcalloc((obj->base.size / PAGE_SIZE), 2189 sizeof(obj->pages[0]), GFP_KERNEL); 2190 if (obj->pages == NULL) { 2191 error = -ENOMEM; 2192 goto fail0; 2193 } 2194 2195 /* XXX errno NetBSD->Linux */ 2196 error = -bus_dmamem_wire_uvm_object(dev->dmat, obj->base.gemo_shm_uao, 2197 0, obj->base.size, &obj->igo_pageq, PAGE_SIZE, 0, obj->pages, 2198 (obj->base.size / PAGE_SIZE), &obj->igo_nsegs, BUS_DMA_NOWAIT); 2199 if (error) 2200 /* XXX Try i915_gem_purge, i915_gem_shrink_all. */ 2201 goto fail1; 2202 KASSERT(0 < obj->igo_nsegs); 2203 KASSERT(obj->igo_nsegs <= (obj->base.size / PAGE_SIZE)); 2204 2205 /* 2206 * Check that the paddrs will fit in 40 bits. 2207 * 2208 * XXX This is wrong; we ought to pass this constraint to 2209 * bus_dmamem_wire_uvm_object instead. 2210 */ 2211 TAILQ_FOREACH(page, &obj->igo_pageq, pageq.queue) { 2212 if (VM_PAGE_TO_PHYS(page) & ~0xffffffffffULL) { 2213 DRM_ERROR("GEM physical address exceeds 40 bits" 2214 ": %"PRIxMAX"\n", 2215 (uintmax_t)VM_PAGE_TO_PHYS(page)); 2216 goto fail2; 2217 } 2218 } 2219 2220 /* XXX errno NetBSD->Linux */ 2221 error = -bus_dmamap_create(dev->dmat, obj->base.size, obj->igo_nsegs, 2222 PAGE_SIZE, 0, BUS_DMA_NOWAIT, &obj->igo_dmamap); 2223 if (error) 2224 goto fail2; 2225 2226 /* XXX Cargo-culted from the Linux code. */ 2227 if (i915_gem_object_needs_bit17_swizzle(obj)) 2228 i915_gem_object_do_bit_17_swizzle(obj); 2229 2230 /* Success! */ 2231 return 0; 2232 2233 fail2: bus_dmamem_unwire_uvm_object(dev->dmat, obj->base.gemo_shm_uao, 0, 2234 obj->base.size, obj->pages, (obj->base.size / PAGE_SIZE)); 2235 fail1: kfree(obj->pages); 2236 obj->pages = NULL; 2237 fail0: return error; 2238 } 2239 #else 2240 static int 2241 i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) 2242 { 2243 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 2244 int page_count, i; 2245 struct address_space *mapping; 2246 struct sg_table *st; 2247 struct scatterlist *sg; 2248 struct page *page; 2249 gfp_t gfp; 2250 2251 /* Assert that the object is not currently in any GPU domain. As it 2252 * wasn't in the GTT, there shouldn't be any way it could have been in 2253 * a GPU cache 2254 */ 2255 BUG_ON(obj->base.read_domains & I915_GEM_GPU_DOMAINS); 2256 BUG_ON(obj->base.write_domain & I915_GEM_GPU_DOMAINS); 2257 2258 st = kmalloc(sizeof(*st), GFP_KERNEL); 2259 if (st == NULL) 2260 return -ENOMEM; 2261 2262 page_count = obj->base.size / PAGE_SIZE; 2263 if (sg_alloc_table(st, page_count, GFP_KERNEL)) { 2264 sg_free_table(st); 2265 kfree(st); 2266 return -ENOMEM; 2267 } 2268 2269 /* Get the list of pages out of our struct file. They'll be pinned 2270 * at this point until we release them. 2271 * 2272 * Fail silently without starting the shrinker 2273 */ 2274 mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping; 2275 gfp = mapping_gfp_mask(mapping); 2276 gfp |= __GFP_NORETRY | __GFP_NOWARN | __GFP_NO_KSWAPD; 2277 gfp &= ~(__GFP_IO | __GFP_WAIT); 2278 for_each_sg(st->sgl, sg, page_count, i) { 2279 page = shmem_read_mapping_page_gfp(mapping, i, gfp); 2280 if (IS_ERR(page)) { 2281 i915_gem_purge(dev_priv, page_count); 2282 page = shmem_read_mapping_page_gfp(mapping, i, gfp); 2283 } 2284 if (IS_ERR(page)) { 2285 /* We've tried hard to allocate the memory by reaping 2286 * our own buffer, now let the real VM do its job and 2287 * go down in flames if truly OOM. 2288 */ 2289 gfp &= ~(__GFP_NORETRY | __GFP_NOWARN | __GFP_NO_KSWAPD); 2290 gfp |= __GFP_IO | __GFP_WAIT; 2291 2292 i915_gem_shrink_all(dev_priv); 2293 page = shmem_read_mapping_page_gfp(mapping, i, gfp); 2294 if (IS_ERR(page)) 2295 goto err_pages; 2296 2297 gfp |= __GFP_NORETRY | __GFP_NOWARN | __GFP_NO_KSWAPD; 2298 gfp &= ~(__GFP_IO | __GFP_WAIT); 2299 } 2300 2301 sg_set_page(sg, page, PAGE_SIZE, 0); 2302 } 2303 2304 obj->pages = st; 2305 2306 if (i915_gem_object_needs_bit17_swizzle(obj)) 2307 i915_gem_object_do_bit_17_swizzle(obj); 2308 2309 return 0; 2310 2311 err_pages: 2312 for_each_sg(st->sgl, sg, i, page_count) 2313 page_cache_release(sg_page(sg)); 2314 sg_free_table(st); 2315 kfree(st); 2316 return PTR_ERR(page); 2317 } 2318 #endif 2319 2320 /* Ensure that the associated pages are gathered from the backing storage 2321 * and pinned into our object. i915_gem_object_get_pages() may be called 2322 * multiple times before they are released by a single call to 2323 * i915_gem_object_put_pages() - once the pages are no longer referenced 2324 * either as a result of memory pressure (reaping pages under the shrinker) 2325 * or as the object is itself released. 2326 */ 2327 int 2328 i915_gem_object_get_pages(struct drm_i915_gem_object *obj) 2329 { 2330 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 2331 const struct drm_i915_gem_object_ops *ops = obj->ops; 2332 int ret; 2333 2334 if (obj->pages) 2335 return 0; 2336 2337 BUG_ON(obj->pages_pin_count); 2338 2339 ret = ops->get_pages(obj); 2340 if (ret) 2341 return ret; 2342 2343 list_add_tail(&obj->gtt_list, &dev_priv->mm.unbound_list); 2344 return 0; 2345 } 2346 2347 void 2348 i915_gem_object_move_to_active(struct drm_i915_gem_object *obj, 2349 struct intel_ring_buffer *ring) 2350 { 2351 struct drm_device *dev = obj->base.dev; 2352 struct drm_i915_private *dev_priv = dev->dev_private; 2353 u32 seqno = intel_ring_get_seqno(ring); 2354 2355 BUG_ON(ring == NULL); 2356 obj->ring = ring; 2357 2358 /* Add a reference if we're newly entering the active list. */ 2359 if (!obj->active) { 2360 drm_gem_object_reference(&obj->base); 2361 obj->active = 1; 2362 } 2363 2364 /* Move from whatever list we were on to the tail of execution. */ 2365 list_move_tail(&obj->mm_list, &dev_priv->mm.active_list); 2366 list_move_tail(&obj->ring_list, &ring->active_list); 2367 2368 obj->last_read_seqno = seqno; 2369 2370 if (obj->fenced_gpu_access) { 2371 obj->last_fenced_seqno = seqno; 2372 2373 /* Bump MRU to take account of the delayed flush */ 2374 if (obj->fence_reg != I915_FENCE_REG_NONE) { 2375 struct drm_i915_fence_reg *reg; 2376 2377 reg = &dev_priv->fence_regs[obj->fence_reg]; 2378 list_move_tail(®->lru_list, 2379 &dev_priv->mm.fence_list); 2380 } 2381 } 2382 } 2383 2384 static void 2385 i915_gem_object_move_to_inactive(struct drm_i915_gem_object *obj) 2386 { 2387 struct drm_device *dev = obj->base.dev; 2388 struct drm_i915_private *dev_priv = dev->dev_private; 2389 2390 BUG_ON(obj->base.write_domain & ~I915_GEM_GPU_DOMAINS); 2391 BUG_ON(!obj->active); 2392 2393 if (obj->pin_count) /* are we a framebuffer? */ 2394 intel_mark_fb_idle(obj); 2395 2396 list_move_tail(&obj->mm_list, &dev_priv->mm.inactive_list); 2397 2398 list_del_init(&obj->ring_list); 2399 obj->ring = NULL; 2400 2401 obj->last_read_seqno = 0; 2402 obj->last_write_seqno = 0; 2403 obj->base.write_domain = 0; 2404 2405 obj->last_fenced_seqno = 0; 2406 obj->fenced_gpu_access = false; 2407 2408 obj->active = 0; 2409 drm_gem_object_unreference(&obj->base); 2410 2411 WARN_ON(i915_verify_lists(dev)); 2412 } 2413 2414 static int 2415 i915_gem_handle_seqno_wrap(struct drm_device *dev) 2416 { 2417 struct drm_i915_private *dev_priv = dev->dev_private; 2418 struct intel_ring_buffer *ring; 2419 int ret, i, j; 2420 2421 /* The hardware uses various monotonic 32-bit counters, if we 2422 * detect that they will wraparound we need to idle the GPU 2423 * and reset those counters. 2424 */ 2425 ret = 0; 2426 for_each_ring(ring, dev_priv, i) { 2427 for (j = 0; j < ARRAY_SIZE(ring->sync_seqno); j++) 2428 ret |= ring->sync_seqno[j] != 0; 2429 } 2430 if (ret == 0) 2431 return ret; 2432 2433 ret = i915_gpu_idle(dev); 2434 if (ret) 2435 return ret; 2436 2437 i915_gem_retire_requests(dev); 2438 for_each_ring(ring, dev_priv, i) { 2439 for (j = 0; j < ARRAY_SIZE(ring->sync_seqno); j++) 2440 ring->sync_seqno[j] = 0; 2441 } 2442 2443 return 0; 2444 } 2445 2446 int 2447 i915_gem_get_seqno(struct drm_device *dev, u32 *seqno) 2448 { 2449 struct drm_i915_private *dev_priv = dev->dev_private; 2450 2451 /* reserve 0 for non-seqno */ 2452 if (dev_priv->next_seqno == 0) { 2453 int ret = i915_gem_handle_seqno_wrap(dev); 2454 if (ret) 2455 return ret; 2456 2457 dev_priv->next_seqno = 1; 2458 } 2459 2460 *seqno = dev_priv->next_seqno++; 2461 return 0; 2462 } 2463 2464 int 2465 i915_add_request(struct intel_ring_buffer *ring, 2466 struct drm_file *file, 2467 u32 *out_seqno) 2468 { 2469 drm_i915_private_t *dev_priv = ring->dev->dev_private; 2470 struct drm_i915_gem_request *request; 2471 u32 request_ring_position; 2472 int was_empty; 2473 int ret; 2474 2475 /* 2476 * Emit any outstanding flushes - execbuf can fail to emit the flush 2477 * after having emitted the batchbuffer command. Hence we need to fix 2478 * things up similar to emitting the lazy request. The difference here 2479 * is that the flush _must_ happen before the next request, no matter 2480 * what. 2481 */ 2482 ret = intel_ring_flush_all_caches(ring); 2483 if (ret) 2484 return ret; 2485 2486 request = kmalloc(sizeof(*request), GFP_KERNEL); 2487 if (request == NULL) 2488 return -ENOMEM; 2489 2490 2491 /* Record the position of the start of the request so that 2492 * should we detect the updated seqno part-way through the 2493 * GPU processing the request, we never over-estimate the 2494 * position of the head. 2495 */ 2496 request_ring_position = intel_ring_get_tail(ring); 2497 2498 ret = ring->add_request(ring); 2499 if (ret) { 2500 kfree(request); 2501 return ret; 2502 } 2503 2504 request->seqno = intel_ring_get_seqno(ring); 2505 request->ring = ring; 2506 request->tail = request_ring_position; 2507 request->emitted_jiffies = jiffies; 2508 was_empty = list_empty(&ring->request_list); 2509 list_add_tail(&request->list, &ring->request_list); 2510 request->file_priv = NULL; 2511 2512 if (file) { 2513 struct drm_i915_file_private *file_priv = file->driver_priv; 2514 2515 spin_lock(&file_priv->mm.lock); 2516 request->file_priv = file_priv; 2517 list_add_tail(&request->client_list, 2518 &file_priv->mm.request_list); 2519 spin_unlock(&file_priv->mm.lock); 2520 } 2521 2522 trace_i915_gem_request_add(ring, request->seqno); 2523 ring->outstanding_lazy_request = 0; 2524 2525 if (!dev_priv->mm.suspended) { 2526 if (i915_enable_hangcheck) { 2527 mod_timer(&dev_priv->hangcheck_timer, 2528 round_jiffies_up(jiffies + DRM_I915_HANGCHECK_JIFFIES)); 2529 } 2530 if (was_empty) { 2531 queue_delayed_work(dev_priv->wq, 2532 &dev_priv->mm.retire_work, 2533 round_jiffies_up_relative(HZ)); 2534 intel_mark_busy(dev_priv->dev); 2535 } 2536 } 2537 2538 if (out_seqno) 2539 *out_seqno = request->seqno; 2540 return 0; 2541 } 2542 2543 static inline void 2544 i915_gem_request_remove_from_client(struct drm_i915_gem_request *request) 2545 { 2546 struct drm_i915_file_private *file_priv = request->file_priv; 2547 2548 if (!file_priv) 2549 return; 2550 2551 spin_lock(&file_priv->mm.lock); 2552 if (request->file_priv) { 2553 list_del(&request->client_list); 2554 request->file_priv = NULL; 2555 } 2556 spin_unlock(&file_priv->mm.lock); 2557 } 2558 2559 static void i915_gem_reset_ring_lists(struct drm_i915_private *dev_priv, 2560 struct intel_ring_buffer *ring) 2561 { 2562 while (!list_empty(&ring->request_list)) { 2563 struct drm_i915_gem_request *request; 2564 2565 request = list_first_entry(&ring->request_list, 2566 struct drm_i915_gem_request, 2567 list); 2568 2569 list_del(&request->list); 2570 i915_gem_request_remove_from_client(request); 2571 kfree(request); 2572 } 2573 2574 while (!list_empty(&ring->active_list)) { 2575 struct drm_i915_gem_object *obj; 2576 2577 obj = list_first_entry(&ring->active_list, 2578 struct drm_i915_gem_object, 2579 ring_list); 2580 2581 i915_gem_object_move_to_inactive(obj); 2582 } 2583 } 2584 2585 static void i915_gem_reset_fences(struct drm_device *dev) 2586 { 2587 struct drm_i915_private *dev_priv = dev->dev_private; 2588 int i; 2589 2590 for (i = 0; i < dev_priv->num_fence_regs; i++) { 2591 struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i]; 2592 2593 i915_gem_write_fence(dev, i, NULL); 2594 2595 if (reg->obj) 2596 i915_gem_object_fence_lost(reg->obj); 2597 2598 reg->pin_count = 0; 2599 reg->obj = NULL; 2600 INIT_LIST_HEAD(®->lru_list); 2601 } 2602 2603 INIT_LIST_HEAD(&dev_priv->mm.fence_list); 2604 } 2605 2606 void i915_gem_reset(struct drm_device *dev) 2607 { 2608 struct drm_i915_private *dev_priv = dev->dev_private; 2609 struct drm_i915_gem_object *obj; 2610 struct intel_ring_buffer *ring; 2611 int i; 2612 2613 for_each_ring(ring, dev_priv, i) 2614 i915_gem_reset_ring_lists(dev_priv, ring); 2615 2616 /* Move everything out of the GPU domains to ensure we do any 2617 * necessary invalidation upon reuse. 2618 */ 2619 list_for_each_entry(obj, 2620 &dev_priv->mm.inactive_list, 2621 mm_list) 2622 { 2623 obj->base.read_domains &= ~I915_GEM_GPU_DOMAINS; 2624 } 2625 2626 /* The fence registers are invalidated so clear them out */ 2627 i915_gem_reset_fences(dev); 2628 } 2629 2630 /** 2631 * This function clears the request list as sequence numbers are passed. 2632 */ 2633 void 2634 i915_gem_retire_requests_ring(struct intel_ring_buffer *ring) 2635 { 2636 uint32_t seqno; 2637 2638 if (list_empty(&ring->request_list)) 2639 return; 2640 2641 WARN_ON(i915_verify_lists(ring->dev)); 2642 2643 seqno = ring->get_seqno(ring, true); 2644 2645 while (!list_empty(&ring->request_list)) { 2646 struct drm_i915_gem_request *request; 2647 2648 request = list_first_entry(&ring->request_list, 2649 struct drm_i915_gem_request, 2650 list); 2651 2652 if (!i915_seqno_passed(seqno, request->seqno)) 2653 break; 2654 2655 trace_i915_gem_request_retire(ring, request->seqno); 2656 /* We know the GPU must have read the request to have 2657 * sent us the seqno + interrupt, so use the position 2658 * of tail of the request to update the last known position 2659 * of the GPU head. 2660 */ 2661 ring->last_retired_head = request->tail; 2662 2663 list_del(&request->list); 2664 i915_gem_request_remove_from_client(request); 2665 kfree(request); 2666 } 2667 2668 /* Move any buffers on the active list that are no longer referenced 2669 * by the ringbuffer to the flushing/inactive lists as appropriate. 2670 */ 2671 while (!list_empty(&ring->active_list)) { 2672 struct drm_i915_gem_object *obj; 2673 2674 obj = list_first_entry(&ring->active_list, 2675 struct drm_i915_gem_object, 2676 ring_list); 2677 2678 if (!i915_seqno_passed(seqno, obj->last_read_seqno)) 2679 break; 2680 2681 i915_gem_object_move_to_inactive(obj); 2682 } 2683 2684 if (unlikely(ring->trace_irq_seqno && 2685 i915_seqno_passed(seqno, ring->trace_irq_seqno))) { 2686 ring->irq_put(ring); 2687 ring->trace_irq_seqno = 0; 2688 } 2689 2690 WARN_ON(i915_verify_lists(ring->dev)); 2691 } 2692 2693 void 2694 i915_gem_retire_requests(struct drm_device *dev) 2695 { 2696 drm_i915_private_t *dev_priv = dev->dev_private; 2697 struct intel_ring_buffer *ring; 2698 int i; 2699 2700 for_each_ring(ring, dev_priv, i) 2701 i915_gem_retire_requests_ring(ring); 2702 } 2703 2704 static void 2705 i915_gem_retire_work_handler(struct work_struct *work) 2706 { 2707 drm_i915_private_t *dev_priv; 2708 struct drm_device *dev; 2709 struct intel_ring_buffer *ring; 2710 bool idle; 2711 int i; 2712 2713 dev_priv = container_of(work, drm_i915_private_t, 2714 mm.retire_work.work); 2715 dev = dev_priv->dev; 2716 2717 /* Come back later if the device is busy... */ 2718 if (!mutex_trylock(&dev->struct_mutex)) { 2719 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 2720 round_jiffies_up_relative(HZ)); 2721 return; 2722 } 2723 2724 i915_gem_retire_requests(dev); 2725 2726 /* Send a periodic flush down the ring so we don't hold onto GEM 2727 * objects indefinitely. 2728 */ 2729 idle = true; 2730 for_each_ring(ring, dev_priv, i) { 2731 if (ring->gpu_caches_dirty) 2732 i915_add_request(ring, NULL, NULL); 2733 2734 idle &= list_empty(&ring->request_list); 2735 } 2736 2737 if (!dev_priv->mm.suspended && !idle) 2738 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 2739 round_jiffies_up_relative(HZ)); 2740 if (idle) 2741 intel_mark_idle(dev); 2742 2743 mutex_unlock(&dev->struct_mutex); 2744 } 2745 2746 /** 2747 * Ensures that an object will eventually get non-busy by flushing any required 2748 * write domains, emitting any outstanding lazy request and retiring and 2749 * completed requests. 2750 */ 2751 static int 2752 i915_gem_object_flush_active(struct drm_i915_gem_object *obj) 2753 { 2754 int ret; 2755 2756 if (obj->active) { 2757 ret = i915_gem_check_olr(obj->ring, obj->last_read_seqno); 2758 if (ret) 2759 return ret; 2760 2761 i915_gem_retire_requests_ring(obj->ring); 2762 } 2763 2764 return 0; 2765 } 2766 2767 /** 2768 * i915_gem_wait_ioctl - implements DRM_IOCTL_I915_GEM_WAIT 2769 * @DRM_IOCTL_ARGS: standard ioctl arguments 2770 * 2771 * Returns 0 if successful, else an error is returned with the remaining time in 2772 * the timeout parameter. 2773 * -ETIME: object is still busy after timeout 2774 * -ERESTARTSYS: signal interrupted the wait 2775 * -ENONENT: object doesn't exist 2776 * Also possible, but rare: 2777 * -EAGAIN: GPU wedged 2778 * -ENOMEM: damn 2779 * -ENODEV: Internal IRQ fail 2780 * -E?: The add request failed 2781 * 2782 * The wait ioctl with a timeout of 0 reimplements the busy ioctl. With any 2783 * non-zero timeout parameter the wait ioctl will wait for the given number of 2784 * nanoseconds on an object becoming unbusy. Since the wait itself does so 2785 * without holding struct_mutex the object may become re-busied before this 2786 * function completes. A similar but shorter * race condition exists in the busy 2787 * ioctl 2788 */ 2789 int 2790 i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) 2791 { 2792 struct drm_i915_gem_wait *args = data; 2793 struct drm_i915_gem_object *obj; 2794 struct intel_ring_buffer *ring = NULL; 2795 struct timespec timeout_stack, *timeout = NULL; 2796 u32 seqno = 0; 2797 int ret = 0; 2798 2799 if (args->timeout_ns >= 0) { 2800 timeout_stack = ns_to_timespec(args->timeout_ns); 2801 timeout = &timeout_stack; 2802 } 2803 2804 ret = i915_mutex_lock_interruptible(dev); 2805 if (ret) 2806 return ret; 2807 2808 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->bo_handle)); 2809 if (&obj->base == NULL) { 2810 mutex_unlock(&dev->struct_mutex); 2811 return -ENOENT; 2812 } 2813 2814 /* Need to make sure the object gets inactive eventually. */ 2815 ret = i915_gem_object_flush_active(obj); 2816 if (ret) 2817 goto out; 2818 2819 if (obj->active) { 2820 seqno = obj->last_read_seqno; 2821 ring = obj->ring; 2822 } 2823 2824 if (seqno == 0) 2825 goto out; 2826 2827 /* Do this after OLR check to make sure we make forward progress polling 2828 * on this IOCTL with a 0 timeout (like busy ioctl) 2829 */ 2830 if (!args->timeout_ns) { 2831 ret = -ETIME; 2832 goto out; 2833 } 2834 2835 drm_gem_object_unreference(&obj->base); 2836 mutex_unlock(&dev->struct_mutex); 2837 2838 ret = __wait_seqno(ring, seqno, true, timeout); 2839 if (timeout) { 2840 WARN_ON(!timespec_valid(timeout)); 2841 args->timeout_ns = timespec_to_ns(timeout); 2842 } 2843 return ret; 2844 2845 out: 2846 drm_gem_object_unreference(&obj->base); 2847 mutex_unlock(&dev->struct_mutex); 2848 return ret; 2849 } 2850 2851 /** 2852 * i915_gem_object_sync - sync an object to a ring. 2853 * 2854 * @obj: object which may be in use on another ring. 2855 * @to: ring we wish to use the object on. May be NULL. 2856 * 2857 * This code is meant to abstract object synchronization with the GPU. 2858 * Calling with NULL implies synchronizing the object with the CPU 2859 * rather than a particular GPU ring. 2860 * 2861 * Returns 0 if successful, else propagates up the lower layer error. 2862 */ 2863 int 2864 i915_gem_object_sync(struct drm_i915_gem_object *obj, 2865 struct intel_ring_buffer *to) 2866 { 2867 struct intel_ring_buffer *from = obj->ring; 2868 u32 seqno; 2869 int ret, idx; 2870 2871 if (from == NULL || to == from) 2872 return 0; 2873 2874 if (to == NULL || !i915_semaphore_is_enabled(obj->base.dev)) 2875 return i915_gem_object_wait_rendering(obj, false); 2876 2877 idx = intel_ring_sync_index(from, to); 2878 2879 seqno = obj->last_read_seqno; 2880 if (seqno <= from->sync_seqno[idx]) 2881 return 0; 2882 2883 ret = i915_gem_check_olr(obj->ring, seqno); 2884 if (ret) 2885 return ret; 2886 2887 ret = to->sync_to(to, from, seqno); 2888 if (!ret) 2889 /* We use last_read_seqno because sync_to() 2890 * might have just caused seqno wrap under 2891 * the radar. 2892 */ 2893 from->sync_seqno[idx] = obj->last_read_seqno; 2894 2895 return ret; 2896 } 2897 2898 static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj) 2899 { 2900 u32 old_write_domain, old_read_domains; 2901 2902 /* Act a barrier for all accesses through the GTT */ 2903 mb(); 2904 2905 /* Force a pagefault for domain tracking on next user access */ 2906 i915_gem_release_mmap(obj); 2907 2908 if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0) 2909 return; 2910 2911 old_read_domains = obj->base.read_domains; 2912 old_write_domain = obj->base.write_domain; 2913 2914 obj->base.read_domains &= ~I915_GEM_DOMAIN_GTT; 2915 obj->base.write_domain &= ~I915_GEM_DOMAIN_GTT; 2916 2917 trace_i915_gem_object_change_domain(obj, 2918 old_read_domains, 2919 old_write_domain); 2920 } 2921 2922 /** 2923 * Unbinds an object from the GTT aperture. 2924 */ 2925 int 2926 i915_gem_object_unbind(struct drm_i915_gem_object *obj) 2927 { 2928 drm_i915_private_t *dev_priv = obj->base.dev->dev_private; 2929 int ret = 0; 2930 2931 if (obj->gtt_space == NULL) 2932 return 0; 2933 2934 if (obj->pin_count) 2935 return -EBUSY; 2936 2937 BUG_ON(obj->pages == NULL); 2938 2939 ret = i915_gem_object_finish_gpu(obj); 2940 if (ret) 2941 return ret; 2942 /* Continue on if we fail due to EIO, the GPU is hung so we 2943 * should be safe and we need to cleanup or else we might 2944 * cause memory corruption through use-after-free. 2945 */ 2946 2947 i915_gem_object_finish_gtt(obj); 2948 2949 /* release the fence reg _after_ flushing */ 2950 ret = i915_gem_object_put_fence(obj); 2951 if (ret) 2952 return ret; 2953 2954 trace_i915_gem_object_unbind(obj); 2955 2956 if (obj->has_global_gtt_mapping) 2957 i915_gem_gtt_unbind_object(obj); 2958 if (obj->has_aliasing_ppgtt_mapping) { 2959 i915_ppgtt_unbind_object(dev_priv->mm.aliasing_ppgtt, obj); 2960 obj->has_aliasing_ppgtt_mapping = 0; 2961 } 2962 i915_gem_gtt_finish_object(obj); 2963 2964 list_del(&obj->mm_list); 2965 list_move_tail(&obj->gtt_list, &dev_priv->mm.unbound_list); 2966 /* Avoid an unnecessary call to unbind on rebind. */ 2967 obj->map_and_fenceable = true; 2968 2969 drm_mm_put_block(obj->gtt_space); 2970 obj->gtt_space = NULL; 2971 obj->gtt_offset = 0; 2972 2973 return 0; 2974 } 2975 2976 int i915_gpu_idle(struct drm_device *dev) 2977 { 2978 drm_i915_private_t *dev_priv = dev->dev_private; 2979 struct intel_ring_buffer *ring; 2980 int ret, i; 2981 2982 /* Flush everything onto the inactive list. */ 2983 for_each_ring(ring, dev_priv, i) { 2984 ret = i915_switch_context(ring, NULL, DEFAULT_CONTEXT_ID); 2985 if (ret) 2986 return ret; 2987 2988 ret = intel_ring_idle(ring); 2989 if (ret) 2990 return ret; 2991 } 2992 2993 return 0; 2994 } 2995 2996 static void sandybridge_write_fence_reg(struct drm_device *dev, int reg, 2997 struct drm_i915_gem_object *obj) 2998 { 2999 drm_i915_private_t *dev_priv = dev->dev_private; 3000 uint64_t val; 3001 3002 if (obj) { 3003 u32 size = obj->gtt_space->size; 3004 3005 val = (uint64_t)((obj->gtt_offset + size - 4096) & 3006 0xfffff000) << 32; 3007 val |= obj->gtt_offset & 0xfffff000; 3008 val |= (uint64_t)((obj->stride / 128) - 1) << 3009 SANDYBRIDGE_FENCE_PITCH_SHIFT; 3010 3011 if (obj->tiling_mode == I915_TILING_Y) 3012 val |= 1 << I965_FENCE_TILING_Y_SHIFT; 3013 val |= I965_FENCE_REG_VALID; 3014 } else 3015 val = 0; 3016 3017 I915_WRITE64(FENCE_REG_SANDYBRIDGE_0 + reg * 8, val); 3018 POSTING_READ(FENCE_REG_SANDYBRIDGE_0 + reg * 8); 3019 } 3020 3021 static void i965_write_fence_reg(struct drm_device *dev, int reg, 3022 struct drm_i915_gem_object *obj) 3023 { 3024 drm_i915_private_t *dev_priv = dev->dev_private; 3025 uint64_t val; 3026 3027 if (obj) { 3028 u32 size = obj->gtt_space->size; 3029 3030 val = (uint64_t)((obj->gtt_offset + size - 4096) & 3031 0xfffff000) << 32; 3032 val |= obj->gtt_offset & 0xfffff000; 3033 val |= ((obj->stride / 128) - 1) << I965_FENCE_PITCH_SHIFT; 3034 if (obj->tiling_mode == I915_TILING_Y) 3035 val |= 1 << I965_FENCE_TILING_Y_SHIFT; 3036 val |= I965_FENCE_REG_VALID; 3037 } else 3038 val = 0; 3039 3040 I915_WRITE64(FENCE_REG_965_0 + reg * 8, val); 3041 POSTING_READ(FENCE_REG_965_0 + reg * 8); 3042 } 3043 3044 static void i915_write_fence_reg(struct drm_device *dev, int reg, 3045 struct drm_i915_gem_object *obj) 3046 { 3047 drm_i915_private_t *dev_priv = dev->dev_private; 3048 u32 val; 3049 3050 if (obj) { 3051 u32 size = obj->gtt_space->size; 3052 int pitch_val; 3053 int tile_width; 3054 3055 WARN((obj->gtt_offset & ~I915_FENCE_START_MASK) || 3056 (size & -size) != size || 3057 (obj->gtt_offset & (size - 1)), 3058 "object 0x%08x [fenceable? %d] not 1M or pot-size (0x%08x) aligned\n", 3059 obj->gtt_offset, obj->map_and_fenceable, size); 3060 3061 if (obj->tiling_mode == I915_TILING_Y && HAS_128_BYTE_Y_TILING(dev)) 3062 tile_width = 128; 3063 else 3064 tile_width = 512; 3065 3066 /* Note: pitch better be a power of two tile widths */ 3067 pitch_val = obj->stride / tile_width; 3068 pitch_val = ffs(pitch_val) - 1; 3069 3070 val = obj->gtt_offset; 3071 if (obj->tiling_mode == I915_TILING_Y) 3072 val |= 1 << I830_FENCE_TILING_Y_SHIFT; 3073 val |= I915_FENCE_SIZE_BITS(size); 3074 val |= pitch_val << I830_FENCE_PITCH_SHIFT; 3075 val |= I830_FENCE_REG_VALID; 3076 } else 3077 val = 0; 3078 3079 if (reg < 8) 3080 reg = FENCE_REG_830_0 + reg * 4; 3081 else 3082 reg = FENCE_REG_945_8 + (reg - 8) * 4; 3083 3084 I915_WRITE(reg, val); 3085 POSTING_READ(reg); 3086 } 3087 3088 static void i830_write_fence_reg(struct drm_device *dev, int reg, 3089 struct drm_i915_gem_object *obj) 3090 { 3091 drm_i915_private_t *dev_priv = dev->dev_private; 3092 uint32_t val; 3093 3094 if (obj) { 3095 u32 size = obj->gtt_space->size; 3096 uint32_t pitch_val; 3097 3098 WARN((obj->gtt_offset & ~I830_FENCE_START_MASK) || 3099 (size & -size) != size || 3100 (obj->gtt_offset & (size - 1)), 3101 "object 0x%08x not 512K or pot-size 0x%08x aligned\n", 3102 obj->gtt_offset, size); 3103 3104 pitch_val = obj->stride / 128; 3105 pitch_val = ffs(pitch_val) - 1; 3106 3107 val = obj->gtt_offset; 3108 if (obj->tiling_mode == I915_TILING_Y) 3109 val |= 1 << I830_FENCE_TILING_Y_SHIFT; 3110 val |= I830_FENCE_SIZE_BITS(size); 3111 val |= pitch_val << I830_FENCE_PITCH_SHIFT; 3112 val |= I830_FENCE_REG_VALID; 3113 } else 3114 val = 0; 3115 3116 I915_WRITE(FENCE_REG_830_0 + reg * 4, val); 3117 POSTING_READ(FENCE_REG_830_0 + reg * 4); 3118 } 3119 3120 static void i915_gem_write_fence(struct drm_device *dev, int reg, 3121 struct drm_i915_gem_object *obj) 3122 { 3123 switch (INTEL_INFO(dev)->gen) { 3124 case 7: 3125 case 6: sandybridge_write_fence_reg(dev, reg, obj); break; 3126 case 5: 3127 case 4: i965_write_fence_reg(dev, reg, obj); break; 3128 case 3: i915_write_fence_reg(dev, reg, obj); break; 3129 case 2: i830_write_fence_reg(dev, reg, obj); break; 3130 default: break; 3131 } 3132 } 3133 3134 static inline int fence_number(struct drm_i915_private *dev_priv, 3135 struct drm_i915_fence_reg *fence) 3136 { 3137 return fence - dev_priv->fence_regs; 3138 } 3139 3140 static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj, 3141 struct drm_i915_fence_reg *fence, 3142 bool enable) 3143 { 3144 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 3145 int reg = fence_number(dev_priv, fence); 3146 3147 i915_gem_write_fence(obj->base.dev, reg, enable ? obj : NULL); 3148 3149 if (enable) { 3150 obj->fence_reg = reg; 3151 fence->obj = obj; 3152 list_move_tail(&fence->lru_list, &dev_priv->mm.fence_list); 3153 } else { 3154 obj->fence_reg = I915_FENCE_REG_NONE; 3155 fence->obj = NULL; 3156 list_del_init(&fence->lru_list); 3157 } 3158 } 3159 3160 static int 3161 i915_gem_object_flush_fence(struct drm_i915_gem_object *obj) 3162 { 3163 if (obj->last_fenced_seqno) { 3164 int ret = i915_wait_seqno(obj->ring, obj->last_fenced_seqno); 3165 if (ret) 3166 return ret; 3167 3168 obj->last_fenced_seqno = 0; 3169 } 3170 3171 /* Ensure that all CPU reads are completed before installing a fence 3172 * and all writes before removing the fence. 3173 */ 3174 if (obj->base.read_domains & I915_GEM_DOMAIN_GTT) 3175 mb(); 3176 3177 obj->fenced_gpu_access = false; 3178 return 0; 3179 } 3180 3181 int 3182 i915_gem_object_put_fence(struct drm_i915_gem_object *obj) 3183 { 3184 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 3185 int ret; 3186 3187 ret = i915_gem_object_flush_fence(obj); 3188 if (ret) 3189 return ret; 3190 3191 if (obj->fence_reg == I915_FENCE_REG_NONE) 3192 return 0; 3193 3194 i915_gem_object_update_fence(obj, 3195 &dev_priv->fence_regs[obj->fence_reg], 3196 false); 3197 i915_gem_object_fence_lost(obj); 3198 3199 return 0; 3200 } 3201 3202 static struct drm_i915_fence_reg * 3203 i915_find_fence_reg(struct drm_device *dev) 3204 { 3205 struct drm_i915_private *dev_priv = dev->dev_private; 3206 struct drm_i915_fence_reg *reg, *avail; 3207 int i; 3208 3209 /* First try to find a free reg */ 3210 avail = NULL; 3211 for (i = dev_priv->fence_reg_start; i < dev_priv->num_fence_regs; i++) { 3212 reg = &dev_priv->fence_regs[i]; 3213 if (!reg->obj) 3214 return reg; 3215 3216 if (!reg->pin_count) 3217 avail = reg; 3218 } 3219 3220 if (avail == NULL) 3221 return NULL; 3222 3223 /* None available, try to steal one or wait for a user to finish */ 3224 list_for_each_entry(reg, &dev_priv->mm.fence_list, lru_list) { 3225 if (reg->pin_count) 3226 continue; 3227 3228 return reg; 3229 } 3230 3231 return NULL; 3232 } 3233 3234 /** 3235 * i915_gem_object_get_fence - set up fencing for an object 3236 * @obj: object to map through a fence reg 3237 * 3238 * When mapping objects through the GTT, userspace wants to be able to write 3239 * to them without having to worry about swizzling if the object is tiled. 3240 * This function walks the fence regs looking for a free one for @obj, 3241 * stealing one if it can't find any. 3242 * 3243 * It then sets up the reg based on the object's properties: address, pitch 3244 * and tiling format. 3245 * 3246 * For an untiled surface, this removes any existing fence. 3247 */ 3248 int 3249 i915_gem_object_get_fence(struct drm_i915_gem_object *obj) 3250 { 3251 struct drm_device *dev = obj->base.dev; 3252 struct drm_i915_private *dev_priv = dev->dev_private; 3253 bool enable = obj->tiling_mode != I915_TILING_NONE; 3254 struct drm_i915_fence_reg *reg; 3255 int ret; 3256 3257 /* Have we updated the tiling parameters upon the object and so 3258 * will need to serialise the write to the associated fence register? 3259 */ 3260 if (obj->fence_dirty) { 3261 ret = i915_gem_object_flush_fence(obj); 3262 if (ret) 3263 return ret; 3264 } 3265 3266 /* Just update our place in the LRU if our fence is getting reused. */ 3267 if (obj->fence_reg != I915_FENCE_REG_NONE) { 3268 reg = &dev_priv->fence_regs[obj->fence_reg]; 3269 if (!obj->fence_dirty) { 3270 list_move_tail(®->lru_list, 3271 &dev_priv->mm.fence_list); 3272 return 0; 3273 } 3274 } else if (enable) { 3275 reg = i915_find_fence_reg(dev); 3276 if (reg == NULL) 3277 return -EDEADLK; 3278 3279 if (reg->obj) { 3280 struct drm_i915_gem_object *old = reg->obj; 3281 3282 ret = i915_gem_object_flush_fence(old); 3283 if (ret) 3284 return ret; 3285 3286 i915_gem_object_fence_lost(old); 3287 } 3288 } else 3289 return 0; 3290 3291 i915_gem_object_update_fence(obj, reg, enable); 3292 obj->fence_dirty = false; 3293 3294 return 0; 3295 } 3296 3297 static bool i915_gem_valid_gtt_space(struct drm_device *dev, 3298 struct drm_mm_node *gtt_space, 3299 unsigned long cache_level) 3300 { 3301 struct drm_mm_node *other; 3302 3303 /* On non-LLC machines we have to be careful when putting differing 3304 * types of snoopable memory together to avoid the prefetcher 3305 * crossing memory domains and dieing. 3306 */ 3307 if (HAS_LLC(dev)) 3308 return true; 3309 3310 if (gtt_space == NULL) 3311 return true; 3312 3313 if (list_empty(>t_space->node_list)) 3314 return true; 3315 3316 other = list_entry(gtt_space->node_list.prev, struct drm_mm_node, node_list); 3317 if (other->allocated && !other->hole_follows && other->color != cache_level) 3318 return false; 3319 3320 other = list_entry(gtt_space->node_list.next, struct drm_mm_node, node_list); 3321 if (other->allocated && !gtt_space->hole_follows && other->color != cache_level) 3322 return false; 3323 3324 return true; 3325 } 3326 3327 static void i915_gem_verify_gtt(struct drm_device *dev) 3328 { 3329 #if WATCH_GTT 3330 struct drm_i915_private *dev_priv = dev->dev_private; 3331 struct drm_i915_gem_object *obj; 3332 int err = 0; 3333 3334 list_for_each_entry(obj, &dev_priv->mm.gtt_list, gtt_list) { 3335 if (obj->gtt_space == NULL) { 3336 printk(KERN_ERR "object found on GTT list with no space reserved\n"); 3337 err++; 3338 continue; 3339 } 3340 3341 if (obj->cache_level != obj->gtt_space->color) { 3342 printk(KERN_ERR "object reserved space [%08lx, %08lx] with wrong color, cache_level=%x, color=%lx\n", 3343 obj->gtt_space->start, 3344 obj->gtt_space->start + obj->gtt_space->size, 3345 obj->cache_level, 3346 obj->gtt_space->color); 3347 err++; 3348 continue; 3349 } 3350 3351 if (!i915_gem_valid_gtt_space(dev, 3352 obj->gtt_space, 3353 obj->cache_level)) { 3354 printk(KERN_ERR "invalid GTT space found at [%08lx, %08lx] - color=%x\n", 3355 obj->gtt_space->start, 3356 obj->gtt_space->start + obj->gtt_space->size, 3357 obj->cache_level); 3358 err++; 3359 continue; 3360 } 3361 } 3362 3363 WARN_ON(err); 3364 #endif 3365 } 3366 3367 /** 3368 * Finds free space in the GTT aperture and binds the object there. 3369 */ 3370 static int 3371 i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj, 3372 unsigned alignment, 3373 bool map_and_fenceable, 3374 bool nonblocking) 3375 { 3376 struct drm_device *dev = obj->base.dev; 3377 drm_i915_private_t *dev_priv = dev->dev_private; 3378 struct drm_mm_node *node; 3379 u32 size, fence_size, fence_alignment, unfenced_alignment; 3380 bool mappable, fenceable; 3381 int ret; 3382 3383 if (obj->madv != I915_MADV_WILLNEED) { 3384 DRM_ERROR("Attempting to bind a purgeable object\n"); 3385 return -EINVAL; 3386 } 3387 3388 fence_size = i915_gem_get_gtt_size(dev, 3389 obj->base.size, 3390 obj->tiling_mode); 3391 fence_alignment = i915_gem_get_gtt_alignment(dev, 3392 obj->base.size, 3393 obj->tiling_mode); 3394 unfenced_alignment = 3395 i915_gem_get_unfenced_gtt_alignment(dev, 3396 obj->base.size, 3397 obj->tiling_mode); 3398 3399 if (alignment == 0) 3400 alignment = map_and_fenceable ? fence_alignment : 3401 unfenced_alignment; 3402 if (map_and_fenceable && alignment & (fence_alignment - 1)) { 3403 DRM_ERROR("Invalid object alignment requested %u\n", alignment); 3404 return -EINVAL; 3405 } 3406 3407 size = map_and_fenceable ? fence_size : obj->base.size; 3408 3409 /* If the object is bigger than the entire aperture, reject it early 3410 * before evicting everything in a vain attempt to find space. 3411 */ 3412 if (obj->base.size > 3413 (map_and_fenceable ? dev_priv->mm.gtt_mappable_end : dev_priv->mm.gtt_total)) { 3414 DRM_ERROR("Attempting to bind an object larger than the aperture\n"); 3415 return -E2BIG; 3416 } 3417 3418 ret = i915_gem_object_get_pages(obj); 3419 if (ret) 3420 return ret; 3421 3422 i915_gem_object_pin_pages(obj); 3423 3424 node = kzalloc(sizeof(*node), GFP_KERNEL); 3425 if (node == NULL) { 3426 i915_gem_object_unpin_pages(obj); 3427 return -ENOMEM; 3428 } 3429 3430 search_free: 3431 if (map_and_fenceable) 3432 ret = drm_mm_insert_node_in_range_generic(&dev_priv->mm.gtt_space, node, 3433 size, alignment, obj->cache_level, 3434 0, dev_priv->mm.gtt_mappable_end); 3435 else 3436 ret = drm_mm_insert_node_generic(&dev_priv->mm.gtt_space, node, 3437 size, alignment, obj->cache_level); 3438 if (ret) { 3439 ret = i915_gem_evict_something(dev, size, alignment, 3440 obj->cache_level, 3441 map_and_fenceable, 3442 nonblocking); 3443 if (ret == 0) 3444 goto search_free; 3445 3446 i915_gem_object_unpin_pages(obj); 3447 kfree(node); 3448 return ret; 3449 } 3450 if (WARN_ON(!i915_gem_valid_gtt_space(dev, node, obj->cache_level))) { 3451 i915_gem_object_unpin_pages(obj); 3452 drm_mm_put_block(node); 3453 return -EINVAL; 3454 } 3455 3456 ret = i915_gem_gtt_prepare_object(obj); 3457 if (ret) { 3458 i915_gem_object_unpin_pages(obj); 3459 drm_mm_put_block(node); 3460 return ret; 3461 } 3462 3463 list_move_tail(&obj->gtt_list, &dev_priv->mm.bound_list); 3464 list_add_tail(&obj->mm_list, &dev_priv->mm.inactive_list); 3465 3466 obj->gtt_space = node; 3467 obj->gtt_offset = node->start; 3468 3469 fenceable = 3470 node->size == fence_size && 3471 (node->start & (fence_alignment - 1)) == 0; 3472 3473 mappable = 3474 obj->gtt_offset + obj->base.size <= dev_priv->mm.gtt_mappable_end; 3475 3476 obj->map_and_fenceable = mappable && fenceable; 3477 3478 i915_gem_object_unpin_pages(obj); 3479 trace_i915_gem_object_bind(obj, map_and_fenceable); 3480 i915_gem_verify_gtt(dev); 3481 return 0; 3482 } 3483 3484 void 3485 i915_gem_clflush_object(struct drm_i915_gem_object *obj) 3486 { 3487 /* If we don't have a page list set up, then we're not pinned 3488 * to GPU, and we can ignore the cache flush because it'll happen 3489 * again at bind time. 3490 */ 3491 if (obj->pages == NULL) 3492 return; 3493 3494 /* If the GPU is snooping the contents of the CPU cache, 3495 * we do not need to manually clear the CPU cache lines. However, 3496 * the caches are only snooped when the render cache is 3497 * flushed/invalidated. As we always have to emit invalidations 3498 * and flushes when moving into and out of the RENDER domain, correct 3499 * snooping behaviour occurs naturally as the result of our domain 3500 * tracking. 3501 */ 3502 if (obj->cache_level != I915_CACHE_NONE) 3503 return; 3504 3505 trace_i915_gem_object_clflush(obj); 3506 3507 #ifdef __NetBSD__ 3508 drm_clflush_pglist(&obj->igo_pageq); 3509 #else 3510 drm_clflush_sg(obj->pages); 3511 #endif 3512 } 3513 3514 /** Flushes the GTT write domain for the object if it's dirty. */ 3515 static void 3516 i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj) 3517 { 3518 uint32_t old_write_domain; 3519 3520 if (obj->base.write_domain != I915_GEM_DOMAIN_GTT) 3521 return; 3522 3523 /* No actual flushing is required for the GTT write domain. Writes 3524 * to it immediately go to main memory as far as we know, so there's 3525 * no chipset flush. It also doesn't land in render cache. 3526 * 3527 * However, we do have to enforce the order so that all writes through 3528 * the GTT land before any writes to the device, such as updates to 3529 * the GATT itself. 3530 */ 3531 wmb(); 3532 3533 old_write_domain = obj->base.write_domain; 3534 obj->base.write_domain = 0; 3535 3536 trace_i915_gem_object_change_domain(obj, 3537 obj->base.read_domains, 3538 old_write_domain); 3539 } 3540 3541 /** Flushes the CPU write domain for the object if it's dirty. */ 3542 static void 3543 i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj) 3544 { 3545 uint32_t old_write_domain; 3546 3547 if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) 3548 return; 3549 3550 i915_gem_clflush_object(obj); 3551 i915_gem_chipset_flush(obj->base.dev); 3552 old_write_domain = obj->base.write_domain; 3553 obj->base.write_domain = 0; 3554 3555 trace_i915_gem_object_change_domain(obj, 3556 obj->base.read_domains, 3557 old_write_domain); 3558 } 3559 3560 /** 3561 * Moves a single object to the GTT read, and possibly write domain. 3562 * 3563 * This function returns when the move is complete, including waiting on 3564 * flushes to occur. 3565 */ 3566 int 3567 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) 3568 { 3569 drm_i915_private_t *dev_priv = obj->base.dev->dev_private; 3570 uint32_t old_write_domain, old_read_domains; 3571 int ret; 3572 3573 /* Not valid to be called on unbound objects. */ 3574 if (obj->gtt_space == NULL) 3575 return -EINVAL; 3576 3577 if (obj->base.write_domain == I915_GEM_DOMAIN_GTT) 3578 return 0; 3579 3580 ret = i915_gem_object_wait_rendering(obj, !write); 3581 if (ret) 3582 return ret; 3583 3584 i915_gem_object_flush_cpu_write_domain(obj); 3585 3586 old_write_domain = obj->base.write_domain; 3587 old_read_domains = obj->base.read_domains; 3588 3589 /* It should now be out of any other write domains, and we can update 3590 * the domain values for our changes. 3591 */ 3592 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_GTT) != 0); 3593 obj->base.read_domains |= I915_GEM_DOMAIN_GTT; 3594 if (write) { 3595 obj->base.read_domains = I915_GEM_DOMAIN_GTT; 3596 obj->base.write_domain = I915_GEM_DOMAIN_GTT; 3597 obj->dirty = 1; 3598 } 3599 3600 trace_i915_gem_object_change_domain(obj, 3601 old_read_domains, 3602 old_write_domain); 3603 3604 /* And bump the LRU for this access */ 3605 if (i915_gem_object_is_inactive(obj)) 3606 list_move_tail(&obj->mm_list, &dev_priv->mm.inactive_list); 3607 3608 return 0; 3609 } 3610 3611 int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, 3612 enum i915_cache_level cache_level) 3613 { 3614 struct drm_device *dev = obj->base.dev; 3615 drm_i915_private_t *dev_priv = dev->dev_private; 3616 int ret; 3617 3618 if (obj->cache_level == cache_level) 3619 return 0; 3620 3621 if (obj->pin_count) { 3622 DRM_DEBUG("can not change the cache level of pinned objects\n"); 3623 return -EBUSY; 3624 } 3625 3626 if (!i915_gem_valid_gtt_space(dev, obj->gtt_space, cache_level)) { 3627 ret = i915_gem_object_unbind(obj); 3628 if (ret) 3629 return ret; 3630 } 3631 3632 if (obj->gtt_space) { 3633 ret = i915_gem_object_finish_gpu(obj); 3634 if (ret) 3635 return ret; 3636 3637 i915_gem_object_finish_gtt(obj); 3638 3639 /* Before SandyBridge, you could not use tiling or fence 3640 * registers with snooped memory, so relinquish any fences 3641 * currently pointing to our region in the aperture. 3642 */ 3643 if (INTEL_INFO(dev)->gen < 6) { 3644 ret = i915_gem_object_put_fence(obj); 3645 if (ret) 3646 return ret; 3647 } 3648 3649 if (obj->has_global_gtt_mapping) 3650 i915_gem_gtt_bind_object(obj, cache_level); 3651 if (obj->has_aliasing_ppgtt_mapping) 3652 i915_ppgtt_bind_object(dev_priv->mm.aliasing_ppgtt, 3653 obj, cache_level); 3654 3655 obj->gtt_space->color = cache_level; 3656 } 3657 3658 if (cache_level == I915_CACHE_NONE) { 3659 u32 old_read_domains, old_write_domain; 3660 3661 /* If we're coming from LLC cached, then we haven't 3662 * actually been tracking whether the data is in the 3663 * CPU cache or not, since we only allow one bit set 3664 * in obj->write_domain and have been skipping the clflushes. 3665 * Just set it to the CPU cache for now. 3666 */ 3667 WARN_ON(obj->base.write_domain & ~I915_GEM_DOMAIN_CPU); 3668 WARN_ON(obj->base.read_domains & ~I915_GEM_DOMAIN_CPU); 3669 3670 old_read_domains = obj->base.read_domains; 3671 old_write_domain = obj->base.write_domain; 3672 3673 obj->base.read_domains = I915_GEM_DOMAIN_CPU; 3674 obj->base.write_domain = I915_GEM_DOMAIN_CPU; 3675 3676 trace_i915_gem_object_change_domain(obj, 3677 old_read_domains, 3678 old_write_domain); 3679 } 3680 3681 obj->cache_level = cache_level; 3682 i915_gem_verify_gtt(dev); 3683 return 0; 3684 } 3685 3686 int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data, 3687 struct drm_file *file) 3688 { 3689 struct drm_i915_gem_caching *args = data; 3690 struct drm_i915_gem_object *obj; 3691 int ret; 3692 3693 ret = i915_mutex_lock_interruptible(dev); 3694 if (ret) 3695 return ret; 3696 3697 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 3698 if (&obj->base == NULL) { 3699 ret = -ENOENT; 3700 goto unlock; 3701 } 3702 3703 args->caching = obj->cache_level != I915_CACHE_NONE; 3704 3705 drm_gem_object_unreference(&obj->base); 3706 unlock: 3707 mutex_unlock(&dev->struct_mutex); 3708 return ret; 3709 } 3710 3711 int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data, 3712 struct drm_file *file) 3713 { 3714 struct drm_i915_gem_caching *args = data; 3715 struct drm_i915_gem_object *obj; 3716 enum i915_cache_level level; 3717 int ret; 3718 3719 switch (args->caching) { 3720 case I915_CACHING_NONE: 3721 level = I915_CACHE_NONE; 3722 break; 3723 case I915_CACHING_CACHED: 3724 level = I915_CACHE_LLC; 3725 break; 3726 default: 3727 return -EINVAL; 3728 } 3729 3730 ret = i915_mutex_lock_interruptible(dev); 3731 if (ret) 3732 return ret; 3733 3734 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 3735 if (&obj->base == NULL) { 3736 ret = -ENOENT; 3737 goto unlock; 3738 } 3739 3740 ret = i915_gem_object_set_cache_level(obj, level); 3741 3742 drm_gem_object_unreference(&obj->base); 3743 unlock: 3744 mutex_unlock(&dev->struct_mutex); 3745 return ret; 3746 } 3747 3748 /* 3749 * Prepare buffer for display plane (scanout, cursors, etc). 3750 * Can be called from an uninterruptible phase (modesetting) and allows 3751 * any flushes to be pipelined (for pageflips). 3752 */ 3753 int 3754 i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, 3755 u32 alignment, 3756 struct intel_ring_buffer *pipelined) 3757 { 3758 u32 old_read_domains, old_write_domain; 3759 int ret; 3760 3761 if (pipelined != obj->ring) { 3762 ret = i915_gem_object_sync(obj, pipelined); 3763 if (ret) 3764 return ret; 3765 } 3766 3767 /* The display engine is not coherent with the LLC cache on gen6. As 3768 * a result, we make sure that the pinning that is about to occur is 3769 * done with uncached PTEs. This is lowest common denominator for all 3770 * chipsets. 3771 * 3772 * However for gen6+, we could do better by using the GFDT bit instead 3773 * of uncaching, which would allow us to flush all the LLC-cached data 3774 * with that bit in the PTE to main memory with just one PIPE_CONTROL. 3775 */ 3776 ret = i915_gem_object_set_cache_level(obj, I915_CACHE_NONE); 3777 if (ret) 3778 return ret; 3779 3780 /* As the user may map the buffer once pinned in the display plane 3781 * (e.g. libkms for the bootup splash), we have to ensure that we 3782 * always use map_and_fenceable for all scanout buffers. 3783 */ 3784 ret = i915_gem_object_pin(obj, alignment, true, false); 3785 if (ret) 3786 return ret; 3787 3788 i915_gem_object_flush_cpu_write_domain(obj); 3789 3790 old_write_domain = obj->base.write_domain; 3791 old_read_domains = obj->base.read_domains; 3792 3793 /* It should now be out of any other write domains, and we can update 3794 * the domain values for our changes. 3795 */ 3796 obj->base.write_domain = 0; 3797 obj->base.read_domains |= I915_GEM_DOMAIN_GTT; 3798 3799 trace_i915_gem_object_change_domain(obj, 3800 old_read_domains, 3801 old_write_domain); 3802 3803 return 0; 3804 } 3805 3806 int 3807 i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj) 3808 { 3809 int ret; 3810 3811 if ((obj->base.read_domains & I915_GEM_GPU_DOMAINS) == 0) 3812 return 0; 3813 3814 ret = i915_gem_object_wait_rendering(obj, false); 3815 if (ret) 3816 return ret; 3817 3818 /* Ensure that we invalidate the GPU's caches and TLBs. */ 3819 obj->base.read_domains &= ~I915_GEM_GPU_DOMAINS; 3820 return 0; 3821 } 3822 3823 /** 3824 * Moves a single object to the CPU read, and possibly write domain. 3825 * 3826 * This function returns when the move is complete, including waiting on 3827 * flushes to occur. 3828 */ 3829 int 3830 i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) 3831 { 3832 uint32_t old_write_domain, old_read_domains; 3833 int ret; 3834 3835 if (obj->base.write_domain == I915_GEM_DOMAIN_CPU) 3836 return 0; 3837 3838 ret = i915_gem_object_wait_rendering(obj, !write); 3839 if (ret) 3840 return ret; 3841 3842 i915_gem_object_flush_gtt_write_domain(obj); 3843 3844 old_write_domain = obj->base.write_domain; 3845 old_read_domains = obj->base.read_domains; 3846 3847 /* Flush the CPU cache if it's still invalid. */ 3848 if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) { 3849 i915_gem_clflush_object(obj); 3850 3851 obj->base.read_domains |= I915_GEM_DOMAIN_CPU; 3852 } 3853 3854 /* It should now be out of any other write domains, and we can update 3855 * the domain values for our changes. 3856 */ 3857 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_CPU) != 0); 3858 3859 /* If we're writing through the CPU, then the GPU read domains will 3860 * need to be invalidated at next use. 3861 */ 3862 if (write) { 3863 obj->base.read_domains = I915_GEM_DOMAIN_CPU; 3864 obj->base.write_domain = I915_GEM_DOMAIN_CPU; 3865 } 3866 3867 trace_i915_gem_object_change_domain(obj, 3868 old_read_domains, 3869 old_write_domain); 3870 3871 return 0; 3872 } 3873 3874 /* Throttle our rendering by waiting until the ring has completed our requests 3875 * emitted over 20 msec ago. 3876 * 3877 * Note that if we were to use the current jiffies each time around the loop, 3878 * we wouldn't escape the function with any frames outstanding if the time to 3879 * render a frame was over 20ms. 3880 * 3881 * This should get us reasonable parallelism between CPU and GPU but also 3882 * relatively low latency when blocking on a particular request to finish. 3883 */ 3884 static int 3885 i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file) 3886 { 3887 struct drm_i915_private *dev_priv = dev->dev_private; 3888 struct drm_i915_file_private *file_priv = file->driver_priv; 3889 unsigned long recent_enough = jiffies - msecs_to_jiffies(20); 3890 struct drm_i915_gem_request *request; 3891 struct intel_ring_buffer *ring = NULL; 3892 u32 seqno = 0; 3893 int ret; 3894 3895 if (atomic_read(&dev_priv->mm.wedged)) 3896 return -EIO; 3897 3898 spin_lock(&file_priv->mm.lock); 3899 list_for_each_entry(request, &file_priv->mm.request_list, client_list) { 3900 if (time_after_eq(request->emitted_jiffies, recent_enough)) 3901 break; 3902 3903 ring = request->ring; 3904 seqno = request->seqno; 3905 } 3906 spin_unlock(&file_priv->mm.lock); 3907 3908 if (seqno == 0) 3909 return 0; 3910 3911 ret = __wait_seqno(ring, seqno, true, NULL); 3912 if (ret == 0) 3913 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 0); 3914 3915 return ret; 3916 } 3917 3918 int 3919 i915_gem_object_pin(struct drm_i915_gem_object *obj, 3920 uint32_t alignment, 3921 bool map_and_fenceable, 3922 bool nonblocking) 3923 { 3924 int ret; 3925 3926 if (WARN_ON(obj->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT)) 3927 return -EBUSY; 3928 3929 if (obj->gtt_space != NULL) { 3930 if ((alignment && obj->gtt_offset & (alignment - 1)) || 3931 (map_and_fenceable && !obj->map_and_fenceable)) { 3932 WARN(obj->pin_count, 3933 "bo is already pinned with incorrect alignment:" 3934 " offset=%x, req.alignment=%x, req.map_and_fenceable=%d," 3935 " obj->map_and_fenceable=%d\n", 3936 obj->gtt_offset, alignment, 3937 map_and_fenceable, 3938 obj->map_and_fenceable); 3939 ret = i915_gem_object_unbind(obj); 3940 if (ret) 3941 return ret; 3942 } 3943 } 3944 3945 if (obj->gtt_space == NULL) { 3946 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 3947 3948 ret = i915_gem_object_bind_to_gtt(obj, alignment, 3949 map_and_fenceable, 3950 nonblocking); 3951 if (ret) 3952 return ret; 3953 3954 if (!dev_priv->mm.aliasing_ppgtt) 3955 i915_gem_gtt_bind_object(obj, obj->cache_level); 3956 } 3957 3958 if (!obj->has_global_gtt_mapping && map_and_fenceable) 3959 i915_gem_gtt_bind_object(obj, obj->cache_level); 3960 3961 obj->pin_count++; 3962 obj->pin_mappable |= map_and_fenceable; 3963 3964 return 0; 3965 } 3966 3967 void 3968 i915_gem_object_unpin(struct drm_i915_gem_object *obj) 3969 { 3970 BUG_ON(obj->pin_count == 0); 3971 BUG_ON(obj->gtt_space == NULL); 3972 3973 if (--obj->pin_count == 0) 3974 obj->pin_mappable = false; 3975 } 3976 3977 int 3978 i915_gem_pin_ioctl(struct drm_device *dev, void *data, 3979 struct drm_file *file) 3980 { 3981 struct drm_i915_gem_pin *args = data; 3982 struct drm_i915_gem_object *obj; 3983 int ret; 3984 3985 ret = i915_mutex_lock_interruptible(dev); 3986 if (ret) 3987 return ret; 3988 3989 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 3990 if (&obj->base == NULL) { 3991 ret = -ENOENT; 3992 goto unlock; 3993 } 3994 3995 if (obj->madv != I915_MADV_WILLNEED) { 3996 DRM_ERROR("Attempting to pin a purgeable buffer\n"); 3997 ret = -EINVAL; 3998 goto out; 3999 } 4000 4001 if (obj->pin_filp != NULL && obj->pin_filp != file) { 4002 DRM_ERROR("Already pinned in i915_gem_pin_ioctl(): %d\n", 4003 args->handle); 4004 ret = -EINVAL; 4005 goto out; 4006 } 4007 4008 if (obj->user_pin_count == 0) { 4009 ret = i915_gem_object_pin(obj, args->alignment, true, false); 4010 if (ret) 4011 goto out; 4012 } 4013 4014 obj->user_pin_count++; 4015 obj->pin_filp = file; 4016 4017 /* XXX - flush the CPU caches for pinned objects 4018 * as the X server doesn't manage domains yet 4019 */ 4020 i915_gem_object_flush_cpu_write_domain(obj); 4021 args->offset = obj->gtt_offset; 4022 out: 4023 drm_gem_object_unreference(&obj->base); 4024 unlock: 4025 mutex_unlock(&dev->struct_mutex); 4026 return ret; 4027 } 4028 4029 int 4030 i915_gem_unpin_ioctl(struct drm_device *dev, void *data, 4031 struct drm_file *file) 4032 { 4033 struct drm_i915_gem_pin *args = data; 4034 struct drm_i915_gem_object *obj; 4035 int ret; 4036 4037 ret = i915_mutex_lock_interruptible(dev); 4038 if (ret) 4039 return ret; 4040 4041 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 4042 if (&obj->base == NULL) { 4043 ret = -ENOENT; 4044 goto unlock; 4045 } 4046 4047 if (obj->pin_filp != file) { 4048 DRM_ERROR("Not pinned by caller in i915_gem_pin_ioctl(): %d\n", 4049 args->handle); 4050 ret = -EINVAL; 4051 goto out; 4052 } 4053 obj->user_pin_count--; 4054 if (obj->user_pin_count == 0) { 4055 obj->pin_filp = NULL; 4056 i915_gem_object_unpin(obj); 4057 } 4058 4059 out: 4060 drm_gem_object_unreference(&obj->base); 4061 unlock: 4062 mutex_unlock(&dev->struct_mutex); 4063 return ret; 4064 } 4065 4066 int 4067 i915_gem_busy_ioctl(struct drm_device *dev, void *data, 4068 struct drm_file *file) 4069 { 4070 struct drm_i915_gem_busy *args = data; 4071 struct drm_i915_gem_object *obj; 4072 int ret; 4073 4074 ret = i915_mutex_lock_interruptible(dev); 4075 if (ret) 4076 return ret; 4077 4078 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 4079 if (&obj->base == NULL) { 4080 ret = -ENOENT; 4081 goto unlock; 4082 } 4083 4084 /* Count all active objects as busy, even if they are currently not used 4085 * by the gpu. Users of this interface expect objects to eventually 4086 * become non-busy without any further actions, therefore emit any 4087 * necessary flushes here. 4088 */ 4089 ret = i915_gem_object_flush_active(obj); 4090 4091 args->busy = obj->active; 4092 if (obj->ring) { 4093 BUILD_BUG_ON(I915_NUM_RINGS > 16); 4094 args->busy |= intel_ring_flag(obj->ring) << 16; 4095 } 4096 4097 drm_gem_object_unreference(&obj->base); 4098 unlock: 4099 mutex_unlock(&dev->struct_mutex); 4100 return ret; 4101 } 4102 4103 int 4104 i915_gem_throttle_ioctl(struct drm_device *dev, void *data, 4105 struct drm_file *file_priv) 4106 { 4107 return i915_gem_ring_throttle(dev, file_priv); 4108 } 4109 4110 int 4111 i915_gem_madvise_ioctl(struct drm_device *dev, void *data, 4112 struct drm_file *file_priv) 4113 { 4114 struct drm_i915_gem_madvise *args = data; 4115 struct drm_i915_gem_object *obj; 4116 int ret; 4117 4118 switch (args->madv) { 4119 case I915_MADV_DONTNEED: 4120 case I915_MADV_WILLNEED: 4121 break; 4122 default: 4123 return -EINVAL; 4124 } 4125 4126 ret = i915_mutex_lock_interruptible(dev); 4127 if (ret) 4128 return ret; 4129 4130 obj = to_intel_bo(drm_gem_object_lookup(dev, file_priv, args->handle)); 4131 if (&obj->base == NULL) { 4132 ret = -ENOENT; 4133 goto unlock; 4134 } 4135 4136 if (obj->pin_count) { 4137 ret = -EINVAL; 4138 goto out; 4139 } 4140 4141 if (obj->madv != __I915_MADV_PURGED) 4142 obj->madv = args->madv; 4143 4144 /* if the object is no longer attached, discard its backing storage */ 4145 if (i915_gem_object_is_purgeable(obj) && obj->pages == NULL) 4146 i915_gem_object_truncate(obj); 4147 4148 args->retained = obj->madv != __I915_MADV_PURGED; 4149 4150 out: 4151 drm_gem_object_unreference(&obj->base); 4152 unlock: 4153 mutex_unlock(&dev->struct_mutex); 4154 return ret; 4155 } 4156 4157 void i915_gem_object_init(struct drm_i915_gem_object *obj, 4158 const struct drm_i915_gem_object_ops *ops) 4159 { 4160 INIT_LIST_HEAD(&obj->mm_list); 4161 INIT_LIST_HEAD(&obj->gtt_list); 4162 INIT_LIST_HEAD(&obj->ring_list); 4163 INIT_LIST_HEAD(&obj->exec_list); 4164 4165 obj->ops = ops; 4166 4167 obj->fence_reg = I915_FENCE_REG_NONE; 4168 obj->madv = I915_MADV_WILLNEED; 4169 /* Avoid an unnecessary call to unbind on the first bind. */ 4170 obj->map_and_fenceable = true; 4171 4172 i915_gem_info_add_obj(obj->base.dev->dev_private, obj->base.size); 4173 } 4174 4175 static const struct drm_i915_gem_object_ops i915_gem_object_ops = { 4176 .get_pages = i915_gem_object_get_pages_gtt, 4177 .put_pages = i915_gem_object_put_pages_gtt, 4178 }; 4179 4180 struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev, 4181 size_t size) 4182 { 4183 struct drm_i915_gem_object *obj; 4184 #ifndef __NetBSD__ /* XXX >32bit dma? */ 4185 struct address_space *mapping; 4186 u32 mask; 4187 #endif 4188 4189 obj = kzalloc(sizeof(*obj), GFP_KERNEL); 4190 if (obj == NULL) 4191 return NULL; 4192 4193 if (drm_gem_object_init(dev, &obj->base, size) != 0) { 4194 kfree(obj); 4195 return NULL; 4196 } 4197 4198 #ifndef __NetBSD__ /* XXX >32bit dma? */ 4199 mask = GFP_HIGHUSER | __GFP_RECLAIMABLE; 4200 if (IS_CRESTLINE(dev) || IS_BROADWATER(dev)) { 4201 /* 965gm cannot relocate objects above 4GiB. */ 4202 mask &= ~__GFP_HIGHMEM; 4203 mask |= __GFP_DMA32; 4204 } 4205 4206 mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping; 4207 mapping_set_gfp_mask(mapping, mask); 4208 #endif 4209 4210 i915_gem_object_init(obj, &i915_gem_object_ops); 4211 4212 obj->base.write_domain = I915_GEM_DOMAIN_CPU; 4213 obj->base.read_domains = I915_GEM_DOMAIN_CPU; 4214 4215 if (HAS_LLC(dev)) { 4216 /* On some devices, we can have the GPU use the LLC (the CPU 4217 * cache) for about a 10% performance improvement 4218 * compared to uncached. Graphics requests other than 4219 * display scanout are coherent with the CPU in 4220 * accessing this cache. This means in this mode we 4221 * don't need to clflush on the CPU side, and on the 4222 * GPU side we only need to flush internal caches to 4223 * get data visible to the CPU. 4224 * 4225 * However, we maintain the display planes as UC, and so 4226 * need to rebind when first used as such. 4227 */ 4228 obj->cache_level = I915_CACHE_LLC; 4229 } else 4230 obj->cache_level = I915_CACHE_NONE; 4231 4232 return obj; 4233 } 4234 4235 int i915_gem_init_object(struct drm_gem_object *obj) 4236 { 4237 BUG(); 4238 4239 return 0; 4240 } 4241 4242 void i915_gem_free_object(struct drm_gem_object *gem_obj) 4243 { 4244 struct drm_i915_gem_object *obj = to_intel_bo(gem_obj); 4245 struct drm_device *dev = obj->base.dev; 4246 drm_i915_private_t *dev_priv = dev->dev_private; 4247 4248 trace_i915_gem_object_destroy(obj); 4249 4250 if (obj->phys_obj) 4251 i915_gem_detach_phys_object(dev, obj); 4252 4253 obj->pin_count = 0; 4254 if (WARN_ON(i915_gem_object_unbind(obj) == -ERESTARTSYS)) { 4255 bool was_interruptible; 4256 4257 was_interruptible = dev_priv->mm.interruptible; 4258 dev_priv->mm.interruptible = false; 4259 4260 WARN_ON(i915_gem_object_unbind(obj)); 4261 4262 dev_priv->mm.interruptible = was_interruptible; 4263 } 4264 4265 obj->pages_pin_count = 0; 4266 i915_gem_object_put_pages(obj); 4267 i915_gem_object_free_mmap_offset(obj); 4268 4269 BUG_ON(obj->pages); 4270 4271 #ifndef __NetBSD__ /* XXX drm prime */ 4272 if (obj->base.import_attach) 4273 drm_prime_gem_destroy(&obj->base, NULL); 4274 #endif 4275 4276 drm_gem_object_release(&obj->base); 4277 i915_gem_info_remove_obj(dev_priv, obj->base.size); 4278 4279 kfree(obj->bit_17); 4280 kfree(obj); 4281 } 4282 4283 int 4284 i915_gem_idle(struct drm_device *dev) 4285 { 4286 drm_i915_private_t *dev_priv = dev->dev_private; 4287 int ret; 4288 4289 mutex_lock(&dev->struct_mutex); 4290 4291 if (dev_priv->mm.suspended) { 4292 mutex_unlock(&dev->struct_mutex); 4293 return 0; 4294 } 4295 4296 ret = i915_gpu_idle(dev); 4297 if (ret) { 4298 mutex_unlock(&dev->struct_mutex); 4299 return ret; 4300 } 4301 i915_gem_retire_requests(dev); 4302 4303 /* Under UMS, be paranoid and evict. */ 4304 if (!drm_core_check_feature(dev, DRIVER_MODESET)) 4305 i915_gem_evict_everything(dev); 4306 4307 i915_gem_reset_fences(dev); 4308 4309 /* Hack! Don't let anybody do execbuf while we don't control the chip. 4310 * We need to replace this with a semaphore, or something. 4311 * And not confound mm.suspended! 4312 */ 4313 dev_priv->mm.suspended = 1; 4314 del_timer_sync(&dev_priv->hangcheck_timer); 4315 4316 i915_kernel_lost_context(dev); 4317 i915_gem_cleanup_ringbuffer(dev); 4318 4319 mutex_unlock(&dev->struct_mutex); 4320 4321 /* Cancel the retire work handler, which should be idle now. */ 4322 cancel_delayed_work_sync(&dev_priv->mm.retire_work); 4323 4324 return 0; 4325 } 4326 4327 void i915_gem_l3_remap(struct drm_device *dev) 4328 { 4329 drm_i915_private_t *dev_priv = dev->dev_private; 4330 u32 misccpctl; 4331 int i; 4332 4333 if (!IS_IVYBRIDGE(dev)) 4334 return; 4335 4336 if (!dev_priv->l3_parity.remap_info) 4337 return; 4338 4339 misccpctl = I915_READ(GEN7_MISCCPCTL); 4340 I915_WRITE(GEN7_MISCCPCTL, misccpctl & ~GEN7_DOP_CLOCK_GATE_ENABLE); 4341 POSTING_READ(GEN7_MISCCPCTL); 4342 4343 for (i = 0; i < GEN7_L3LOG_SIZE; i += 4) { 4344 u32 remap = I915_READ(GEN7_L3LOG_BASE + i); 4345 if (remap && remap != dev_priv->l3_parity.remap_info[i/4]) 4346 DRM_DEBUG("0x%x was already programmed to %x\n", 4347 GEN7_L3LOG_BASE + i, remap); 4348 if (remap && !dev_priv->l3_parity.remap_info[i/4]) 4349 DRM_DEBUG_DRIVER("Clearing remapped register\n"); 4350 I915_WRITE(GEN7_L3LOG_BASE + i, dev_priv->l3_parity.remap_info[i/4]); 4351 } 4352 4353 /* Make sure all the writes land before disabling dop clock gating */ 4354 POSTING_READ(GEN7_L3LOG_BASE); 4355 4356 I915_WRITE(GEN7_MISCCPCTL, misccpctl); 4357 } 4358 4359 void i915_gem_init_swizzling(struct drm_device *dev) 4360 { 4361 drm_i915_private_t *dev_priv = dev->dev_private; 4362 4363 if (INTEL_INFO(dev)->gen < 5 || 4364 dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE) 4365 return; 4366 4367 I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) | 4368 DISP_TILE_SURFACE_SWIZZLING); 4369 4370 if (IS_GEN5(dev)) 4371 return; 4372 4373 I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_SWZCTL); 4374 if (IS_GEN6(dev)) 4375 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_SNB)); 4376 else 4377 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_IVB)); 4378 } 4379 4380 static bool 4381 intel_enable_blt(struct drm_device *dev) 4382 { 4383 if (!HAS_BLT(dev)) 4384 return false; 4385 4386 /* The blitter was dysfunctional on early prototypes */ 4387 if (IS_GEN6(dev) && dev->pdev->revision < 8) { 4388 DRM_INFO("BLT not supported on this pre-production hardware;" 4389 " graphics performance will be degraded.\n"); 4390 return false; 4391 } 4392 4393 return true; 4394 } 4395 4396 int 4397 i915_gem_init_hw(struct drm_device *dev) 4398 { 4399 drm_i915_private_t *dev_priv = dev->dev_private; 4400 int ret; 4401 4402 if (INTEL_INFO(dev)->gen < 6 && !intel_enable_gtt()) 4403 return -EIO; 4404 4405 if (IS_HASWELL(dev) && (I915_READ(0x120010) == 1)) 4406 I915_WRITE(0x9008, I915_READ(0x9008) | 0xf0000); 4407 4408 i915_gem_l3_remap(dev); 4409 4410 i915_gem_init_swizzling(dev); 4411 4412 ret = intel_init_render_ring_buffer(dev); 4413 if (ret) 4414 return ret; 4415 4416 if (HAS_BSD(dev)) { 4417 ret = intel_init_bsd_ring_buffer(dev); 4418 if (ret) 4419 goto cleanup_render_ring; 4420 } 4421 4422 if (intel_enable_blt(dev)) { 4423 ret = intel_init_blt_ring_buffer(dev); 4424 if (ret) 4425 goto cleanup_bsd_ring; 4426 } 4427 4428 dev_priv->next_seqno = 1; 4429 4430 /* 4431 * XXX: There was some w/a described somewhere suggesting loading 4432 * contexts before PPGTT. 4433 */ 4434 i915_gem_context_init(dev); 4435 i915_gem_init_ppgtt(dev); 4436 4437 return 0; 4438 4439 cleanup_bsd_ring: 4440 intel_cleanup_ring_buffer(&dev_priv->ring[VCS]); 4441 cleanup_render_ring: 4442 intel_cleanup_ring_buffer(&dev_priv->ring[RCS]); 4443 return ret; 4444 } 4445 4446 static bool 4447 intel_enable_ppgtt(struct drm_device *dev) 4448 { 4449 #ifdef __NetBSD__ /* XXX ppgtt */ 4450 return false; 4451 #else 4452 if (i915_enable_ppgtt >= 0) 4453 return i915_enable_ppgtt; 4454 4455 #ifdef CONFIG_INTEL_IOMMU 4456 /* Disable ppgtt on SNB if VT-d is on. */ 4457 if (INTEL_INFO(dev)->gen == 6 && intel_iommu_gfx_mapped) 4458 return false; 4459 #endif 4460 4461 return true; 4462 #endif 4463 } 4464 4465 int i915_gem_init(struct drm_device *dev) 4466 { 4467 struct drm_i915_private *dev_priv = dev->dev_private; 4468 unsigned long gtt_size, mappable_size; 4469 int ret; 4470 4471 gtt_size = dev_priv->mm.gtt->gtt_total_entries << PAGE_SHIFT; 4472 mappable_size = dev_priv->mm.gtt->gtt_mappable_entries << PAGE_SHIFT; 4473 4474 mutex_lock(&dev->struct_mutex); 4475 if (intel_enable_ppgtt(dev) && HAS_ALIASING_PPGTT(dev)) { 4476 /* PPGTT pdes are stolen from global gtt ptes, so shrink the 4477 * aperture accordingly when using aliasing ppgtt. */ 4478 gtt_size -= I915_PPGTT_PD_ENTRIES*PAGE_SIZE; 4479 4480 i915_gem_init_global_gtt(dev, 0, mappable_size, gtt_size); 4481 4482 ret = i915_gem_init_aliasing_ppgtt(dev); 4483 if (ret) { 4484 i915_gem_fini_global_gtt(dev); 4485 mutex_unlock(&dev->struct_mutex); 4486 return ret; 4487 } 4488 } else { 4489 /* Let GEM Manage all of the aperture. 4490 * 4491 * However, leave one page at the end still bound to the scratch 4492 * page. There are a number of places where the hardware 4493 * apparently prefetches past the end of the object, and we've 4494 * seen multiple hangs with the GPU head pointer stuck in a 4495 * batchbuffer bound at the last page of the aperture. One page 4496 * should be enough to keep any prefetching inside of the 4497 * aperture. 4498 */ 4499 i915_gem_init_global_gtt(dev, 0, mappable_size, 4500 gtt_size); 4501 } 4502 4503 ret = i915_gem_init_hw(dev); 4504 #ifdef __NetBSD__ /* XXX fini global gtt */ 4505 if (ret) 4506 i915_gem_fini_global_gtt(dev); 4507 #endif 4508 mutex_unlock(&dev->struct_mutex); 4509 if (ret) { 4510 i915_gem_cleanup_aliasing_ppgtt(dev); 4511 return ret; 4512 } 4513 4514 /* Allow hardware batchbuffers unless told otherwise, but not for KMS. */ 4515 if (!drm_core_check_feature(dev, DRIVER_MODESET)) 4516 dev_priv->dri1.allow_batchbuffer = 1; 4517 return 0; 4518 } 4519 4520 void 4521 i915_gem_cleanup_ringbuffer(struct drm_device *dev) 4522 { 4523 drm_i915_private_t *dev_priv = dev->dev_private; 4524 struct intel_ring_buffer *ring; 4525 int i; 4526 4527 for_each_ring(ring, dev_priv, i) 4528 intel_cleanup_ring_buffer(ring); 4529 } 4530 4531 int 4532 i915_gem_entervt_ioctl(struct drm_device *dev, void *data, 4533 struct drm_file *file_priv) 4534 { 4535 drm_i915_private_t *dev_priv = dev->dev_private; 4536 int ret; 4537 4538 if (drm_core_check_feature(dev, DRIVER_MODESET)) 4539 return 0; 4540 4541 if (atomic_read(&dev_priv->mm.wedged)) { 4542 DRM_ERROR("Reenabling wedged hardware, good luck\n"); 4543 atomic_set(&dev_priv->mm.wedged, 0); 4544 } 4545 4546 mutex_lock(&dev->struct_mutex); 4547 dev_priv->mm.suspended = 0; 4548 4549 ret = i915_gem_init_hw(dev); 4550 if (ret != 0) { 4551 mutex_unlock(&dev->struct_mutex); 4552 return ret; 4553 } 4554 4555 BUG_ON(!list_empty(&dev_priv->mm.active_list)); 4556 mutex_unlock(&dev->struct_mutex); 4557 4558 ret = drm_irq_install(dev); 4559 if (ret) 4560 goto cleanup_ringbuffer; 4561 4562 return 0; 4563 4564 cleanup_ringbuffer: 4565 mutex_lock(&dev->struct_mutex); 4566 i915_gem_cleanup_ringbuffer(dev); 4567 dev_priv->mm.suspended = 1; 4568 mutex_unlock(&dev->struct_mutex); 4569 4570 return ret; 4571 } 4572 4573 int 4574 i915_gem_leavevt_ioctl(struct drm_device *dev, void *data, 4575 struct drm_file *file_priv) 4576 { 4577 if (drm_core_check_feature(dev, DRIVER_MODESET)) 4578 return 0; 4579 4580 drm_irq_uninstall(dev); 4581 return i915_gem_idle(dev); 4582 } 4583 4584 void 4585 i915_gem_lastclose(struct drm_device *dev) 4586 { 4587 int ret; 4588 4589 if (drm_core_check_feature(dev, DRIVER_MODESET)) 4590 return; 4591 4592 ret = i915_gem_idle(dev); 4593 if (ret) 4594 DRM_ERROR("failed to idle hardware: %d\n", ret); 4595 } 4596 4597 static void 4598 init_ring_lists(struct intel_ring_buffer *ring) 4599 { 4600 INIT_LIST_HEAD(&ring->active_list); 4601 INIT_LIST_HEAD(&ring->request_list); 4602 } 4603 4604 void 4605 i915_gem_load(struct drm_device *dev) 4606 { 4607 int i; 4608 drm_i915_private_t *dev_priv = dev->dev_private; 4609 4610 INIT_LIST_HEAD(&dev_priv->mm.active_list); 4611 INIT_LIST_HEAD(&dev_priv->mm.inactive_list); 4612 INIT_LIST_HEAD(&dev_priv->mm.unbound_list); 4613 INIT_LIST_HEAD(&dev_priv->mm.bound_list); 4614 INIT_LIST_HEAD(&dev_priv->mm.fence_list); 4615 for (i = 0; i < I915_NUM_RINGS; i++) 4616 init_ring_lists(&dev_priv->ring[i]); 4617 for (i = 0; i < I915_MAX_NUM_FENCES; i++) 4618 INIT_LIST_HEAD(&dev_priv->fence_regs[i].lru_list); 4619 INIT_DELAYED_WORK(&dev_priv->mm.retire_work, 4620 i915_gem_retire_work_handler); 4621 init_completion(&dev_priv->error_completion); 4622 4623 /* On GEN3 we really need to make sure the ARB C3 LP bit is set */ 4624 if (IS_GEN3(dev)) { 4625 I915_WRITE(MI_ARB_STATE, 4626 _MASKED_BIT_ENABLE(MI_ARB_C3_LP_WRITE_ENABLE)); 4627 } 4628 4629 dev_priv->relative_constants_mode = I915_EXEC_CONSTANTS_REL_GENERAL; 4630 4631 /* Old X drivers will take 0-2 for front, back, depth buffers */ 4632 if (!drm_core_check_feature(dev, DRIVER_MODESET)) 4633 dev_priv->fence_reg_start = 3; 4634 4635 if (INTEL_INFO(dev)->gen >= 4 || IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev)) 4636 dev_priv->num_fence_regs = 16; 4637 else 4638 dev_priv->num_fence_regs = 8; 4639 4640 /* Initialize fence registers to zero */ 4641 i915_gem_reset_fences(dev); 4642 4643 i915_gem_detect_bit_6_swizzle(dev); 4644 #ifdef __NetBSD__ 4645 DRM_INIT_WAITQUEUE(&dev_priv->pending_flip_queue, "i915flip"); 4646 spin_lock_init(&dev_priv->pending_flip_lock); 4647 #else 4648 init_waitqueue_head(&dev_priv->pending_flip_queue); 4649 #endif 4650 4651 dev_priv->mm.interruptible = true; 4652 4653 dev_priv->mm.inactive_shrinker.shrink = i915_gem_inactive_shrink; 4654 dev_priv->mm.inactive_shrinker.seeks = DEFAULT_SEEKS; 4655 register_shrinker(&dev_priv->mm.inactive_shrinker); 4656 } 4657 4658 /* 4659 * Create a physically contiguous memory object for this object 4660 * e.g. for cursor + overlay regs 4661 */ 4662 static int i915_gem_init_phys_object(struct drm_device *dev, 4663 int id, int size, int align) 4664 { 4665 drm_i915_private_t *dev_priv = dev->dev_private; 4666 struct drm_i915_gem_phys_object *phys_obj; 4667 int ret; 4668 4669 if (dev_priv->mm.phys_objs[id - 1] || !size) 4670 return 0; 4671 4672 phys_obj = kzalloc(sizeof(struct drm_i915_gem_phys_object), GFP_KERNEL); 4673 if (!phys_obj) 4674 return -ENOMEM; 4675 4676 phys_obj->id = id; 4677 4678 phys_obj->handle = drm_pci_alloc(dev, size, align); 4679 if (!phys_obj->handle) { 4680 ret = -ENOMEM; 4681 goto kfree_obj; 4682 } 4683 #ifndef __NetBSD__ /* XXX x86 wc? */ 4684 #ifdef CONFIG_X86 4685 set_memory_wc((unsigned long)phys_obj->handle->vaddr, phys_obj->handle->size / PAGE_SIZE); 4686 #endif 4687 #endif 4688 4689 dev_priv->mm.phys_objs[id - 1] = phys_obj; 4690 4691 return 0; 4692 kfree_obj: 4693 kfree(phys_obj); 4694 return ret; 4695 } 4696 4697 static void i915_gem_free_phys_object(struct drm_device *dev, int id) 4698 { 4699 drm_i915_private_t *dev_priv = dev->dev_private; 4700 struct drm_i915_gem_phys_object *phys_obj; 4701 4702 if (!dev_priv->mm.phys_objs[id - 1]) 4703 return; 4704 4705 phys_obj = dev_priv->mm.phys_objs[id - 1]; 4706 if (phys_obj->cur_obj) { 4707 i915_gem_detach_phys_object(dev, phys_obj->cur_obj); 4708 } 4709 4710 #ifndef __NetBSD__ /* XXX x86 wb? */ 4711 #ifdef CONFIG_X86 4712 set_memory_wb((unsigned long)phys_obj->handle->vaddr, phys_obj->handle->size / PAGE_SIZE); 4713 #endif 4714 #endif 4715 drm_pci_free(dev, phys_obj->handle); 4716 kfree(phys_obj); 4717 dev_priv->mm.phys_objs[id - 1] = NULL; 4718 } 4719 4720 void i915_gem_free_all_phys_object(struct drm_device *dev) 4721 { 4722 int i; 4723 4724 for (i = I915_GEM_PHYS_CURSOR_0; i <= I915_MAX_PHYS_OBJECT; i++) 4725 i915_gem_free_phys_object(dev, i); 4726 } 4727 4728 void i915_gem_detach_phys_object(struct drm_device *dev, 4729 struct drm_i915_gem_object *obj) 4730 { 4731 #ifndef __NetBSD__ 4732 struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping; 4733 #endif 4734 char *vaddr; 4735 int i; 4736 int page_count; 4737 4738 if (!obj->phys_obj) 4739 return; 4740 vaddr = obj->phys_obj->handle->vaddr; 4741 4742 page_count = obj->base.size / PAGE_SIZE; 4743 for (i = 0; i < page_count; i++) { 4744 #ifdef __NetBSD__ 4745 /* XXX Just use ubc_uiomove? */ 4746 struct pglist pages; 4747 int error; 4748 4749 TAILQ_INIT(&pages); 4750 error = uvm_obj_wirepages(obj->base.gemo_shm_uao, i*PAGE_SIZE, 4751 (i+1)*PAGE_SIZE, &pages); 4752 if (error) { 4753 printf("unable to map page %d of i915 gem obj: %d\n", 4754 i, error); 4755 continue; 4756 } 4757 4758 KASSERT(!TAILQ_EMPTY(&pages)); 4759 struct vm_page *const page = TAILQ_FIRST(&pages); 4760 TAILQ_REMOVE(&pages, page, pageq.queue); 4761 KASSERT(TAILQ_EMPTY(&pages)); 4762 4763 char *const dst = kmap_atomic(container_of(page, struct page, 4764 p_vmp)); 4765 (void)memcpy(dst, vaddr + (i*PAGE_SIZE), PAGE_SIZE); 4766 kunmap_atomic(dst); 4767 4768 drm_clflush_page(container_of(page, struct page, p_vmp)); 4769 page->flags &= ~PG_CLEAN; 4770 /* XXX mark page accessed */ 4771 uvm_obj_unwirepages(obj->base.gemo_shm_uao, i*PAGE_SIZE, 4772 (i+1)*PAGE_SIZE); 4773 #else 4774 struct page *page = shmem_read_mapping_page(mapping, i); 4775 if (!IS_ERR(page)) { 4776 char *dst = kmap_atomic(page); 4777 memcpy(dst, vaddr + i*PAGE_SIZE, PAGE_SIZE); 4778 kunmap_atomic(dst); 4779 4780 drm_clflush_pages(&page, 1); 4781 4782 set_page_dirty(page); 4783 mark_page_accessed(page); 4784 page_cache_release(page); 4785 } 4786 #endif 4787 } 4788 i915_gem_chipset_flush(dev); 4789 4790 obj->phys_obj->cur_obj = NULL; 4791 obj->phys_obj = NULL; 4792 } 4793 4794 int 4795 i915_gem_attach_phys_object(struct drm_device *dev, 4796 struct drm_i915_gem_object *obj, 4797 int id, 4798 int align) 4799 { 4800 #ifndef __NetBSD__ 4801 struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping; 4802 #endif 4803 drm_i915_private_t *dev_priv = dev->dev_private; 4804 int ret = 0; 4805 int page_count; 4806 int i; 4807 4808 if (id > I915_MAX_PHYS_OBJECT) 4809 return -EINVAL; 4810 4811 if (obj->phys_obj) { 4812 if (obj->phys_obj->id == id) 4813 return 0; 4814 i915_gem_detach_phys_object(dev, obj); 4815 } 4816 4817 /* create a new object */ 4818 if (!dev_priv->mm.phys_objs[id - 1]) { 4819 ret = i915_gem_init_phys_object(dev, id, 4820 obj->base.size, align); 4821 if (ret) { 4822 DRM_ERROR("failed to init phys object %d size: %zu\n", 4823 id, obj->base.size); 4824 return ret; 4825 } 4826 } 4827 4828 /* bind to the object */ 4829 obj->phys_obj = dev_priv->mm.phys_objs[id - 1]; 4830 obj->phys_obj->cur_obj = obj; 4831 4832 page_count = obj->base.size / PAGE_SIZE; 4833 4834 for (i = 0; i < page_count; i++) { 4835 #ifdef __NetBSD__ 4836 char *const vaddr = obj->phys_obj->handle->vaddr; 4837 struct pglist pages; 4838 int error; 4839 4840 TAILQ_INIT(&pages); 4841 error = uvm_obj_wirepages(obj->base.gemo_shm_uao, i*PAGE_SIZE, 4842 (i+1)*PAGE_SIZE, &pages); 4843 if (error) 4844 /* XXX errno NetBSD->Linux */ 4845 return -error; 4846 4847 KASSERT(!TAILQ_EMPTY(&pages)); 4848 struct vm_page *const page = TAILQ_FIRST(&pages); 4849 TAILQ_REMOVE(&pages, page, pageq.queue); 4850 KASSERT(TAILQ_EMPTY(&pages)); 4851 4852 char *const src = kmap_atomic(container_of(page, struct page, 4853 p_vmp)); 4854 (void)memcpy(vaddr + (i*PAGE_SIZE), src, PAGE_SIZE); 4855 kunmap_atomic(src); 4856 4857 /* XXX mark page accessed */ 4858 uvm_obj_unwirepages(obj->base.gemo_shm_uao, i*PAGE_SIZE, 4859 (i+1)*PAGE_SIZE); 4860 #else 4861 struct page *page; 4862 char *dst, *src; 4863 4864 page = shmem_read_mapping_page(mapping, i); 4865 if (IS_ERR(page)) 4866 return PTR_ERR(page); 4867 4868 src = kmap_atomic(page); 4869 dst = obj->phys_obj->handle->vaddr + (i * PAGE_SIZE); 4870 memcpy(dst, src, PAGE_SIZE); 4871 kunmap_atomic(src); 4872 4873 mark_page_accessed(page); 4874 page_cache_release(page); 4875 #endif 4876 } 4877 4878 return 0; 4879 } 4880 4881 static int 4882 i915_gem_phys_pwrite(struct drm_device *dev, 4883 struct drm_i915_gem_object *obj, 4884 struct drm_i915_gem_pwrite *args, 4885 struct drm_file *file_priv) 4886 { 4887 void *vaddr = (char *)obj->phys_obj->handle->vaddr + args->offset; 4888 char __user *user_data = (char __user *) (uintptr_t) args->data_ptr; 4889 4890 if (__copy_from_user_inatomic_nocache(vaddr, user_data, args->size)) { 4891 unsigned long unwritten; 4892 4893 /* The physical object once assigned is fixed for the lifetime 4894 * of the obj, so we can safely drop the lock and continue 4895 * to access vaddr. 4896 */ 4897 mutex_unlock(&dev->struct_mutex); 4898 unwritten = copy_from_user(vaddr, user_data, args->size); 4899 mutex_lock(&dev->struct_mutex); 4900 if (unwritten) 4901 return -EFAULT; 4902 } 4903 4904 i915_gem_chipset_flush(dev); 4905 return 0; 4906 } 4907 4908 void i915_gem_release(struct drm_device *dev, struct drm_file *file) 4909 { 4910 struct drm_i915_file_private *file_priv = file->driver_priv; 4911 4912 /* Clean up our request list when the client is going away, so that 4913 * later retire_requests won't dereference our soon-to-be-gone 4914 * file_priv. 4915 */ 4916 spin_lock(&file_priv->mm.lock); 4917 while (!list_empty(&file_priv->mm.request_list)) { 4918 struct drm_i915_gem_request *request; 4919 4920 request = list_first_entry(&file_priv->mm.request_list, 4921 struct drm_i915_gem_request, 4922 client_list); 4923 list_del(&request->client_list); 4924 request->file_priv = NULL; 4925 } 4926 spin_unlock(&file_priv->mm.lock); 4927 } 4928 4929 #ifndef __NetBSD__ /* XXX */ 4930 static bool mutex_is_locked_by(struct mutex *mutex, struct task_struct *task) 4931 { 4932 if (!mutex_is_locked(mutex)) 4933 return false; 4934 4935 #if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_MUTEXES) 4936 return mutex->owner == task; 4937 #else 4938 /* Since UP may be pre-empted, we cannot assume that we own the lock */ 4939 return false; 4940 #endif 4941 } 4942 #endif 4943 4944 static int 4945 i915_gem_inactive_shrink(struct shrinker *shrinker, struct shrink_control *sc) 4946 { 4947 #ifdef __NetBSD__ /* XXX shrinkers */ 4948 return 0; 4949 #else 4950 struct drm_i915_private *dev_priv = 4951 container_of(shrinker, 4952 struct drm_i915_private, 4953 mm.inactive_shrinker); 4954 struct drm_device *dev = dev_priv->dev; 4955 struct drm_i915_gem_object *obj; 4956 int nr_to_scan = sc->nr_to_scan; 4957 bool unlock = true; 4958 int cnt; 4959 4960 if (!mutex_trylock(&dev->struct_mutex)) { 4961 if (!mutex_is_locked_by(&dev->struct_mutex, current)) 4962 return 0; 4963 4964 if (dev_priv->mm.shrinker_no_lock_stealing) 4965 return 0; 4966 4967 unlock = false; 4968 } 4969 4970 if (nr_to_scan) { 4971 nr_to_scan -= i915_gem_purge(dev_priv, nr_to_scan); 4972 if (nr_to_scan > 0) 4973 nr_to_scan -= __i915_gem_shrink(dev_priv, nr_to_scan, 4974 false); 4975 if (nr_to_scan > 0) 4976 i915_gem_shrink_all(dev_priv); 4977 } 4978 4979 cnt = 0; 4980 list_for_each_entry(obj, &dev_priv->mm.unbound_list, gtt_list) 4981 if (obj->pages_pin_count == 0) 4982 cnt += obj->base.size >> PAGE_SHIFT; 4983 list_for_each_entry(obj, &dev_priv->mm.inactive_list, gtt_list) 4984 if (obj->pin_count == 0 && obj->pages_pin_count == 0) 4985 cnt += obj->base.size >> PAGE_SHIFT; 4986 4987 if (unlock) 4988 mutex_unlock(&dev->struct_mutex); 4989 return cnt; 4990 #endif 4991 } 4992