1 /* $OpenBSD: i915_gem.c,v 1.74 2014/07/12 18:48:52 tedu Exp $ */ 2 /* 3 * Copyright (c) 2008-2009 Owain G. Ainsworth <oga@openbsd.org> 4 * 5 * Permission to use, copy, modify, and distribute this software for any 6 * purpose with or without fee is hereby granted, provided that the above 7 * copyright notice and this permission notice appear in all copies. 8 * 9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 16 */ 17 /* 18 * Copyright © 2008 Intel Corporation 19 * 20 * Permission is hereby granted, free of charge, to any person obtaining a 21 * copy of this software and associated documentation files (the "Software"), 22 * to deal in the Software without restriction, including without limitation 23 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 24 * and/or sell copies of the Software, and to permit persons to whom the 25 * Software is furnished to do so, subject to the following conditions: 26 * 27 * The above copyright notice and this permission notice (including the next 28 * paragraph) shall be included in all copies or substantial portions of the 29 * Software. 30 * 31 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 32 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 33 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 34 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 35 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 36 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 37 * IN THE SOFTWARE. 38 * 39 * Authors: 40 * Eric Anholt <eric@anholt.net> 41 * 42 */ 43 44 #include <dev/pci/drm/drmP.h> 45 #include <dev/pci/drm/drm.h> 46 #include <dev/pci/drm/i915_drm.h> 47 #include "i915_drv.h" 48 #include "i915_trace.h" 49 #include "intel_drv.h" 50 51 #include <machine/pmap.h> 52 53 #include <sys/queue.h> 54 #include <sys/task.h> 55 #include <sys/time.h> 56 57 static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj); 58 static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj); 59 static __must_check int i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj, 60 unsigned alignment, 61 bool map_and_fenceable, 62 bool nonblocking); 63 static int i915_gem_phys_pwrite(struct drm_device *dev, 64 struct drm_i915_gem_object *obj, 65 struct drm_i915_gem_pwrite *args, 66 struct drm_file *file); 67 68 static void i915_gem_write_fence(struct drm_device *dev, int reg, 69 struct drm_i915_gem_object *obj); 70 static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj, 71 struct drm_i915_fence_reg *fence, 72 bool enable); 73 74 #ifdef notyet 75 static int i915_gem_inactive_shrink(struct shrinker *shrinker, 76 struct shrink_control *sc); 77 static long i915_gem_purge(struct drm_i915_private *dev_priv, long target); 78 static void i915_gem_shrink_all(struct drm_i915_private *dev_priv); 79 #endif 80 static void i915_gem_object_truncate(struct drm_i915_gem_object *obj); 81 82 static inline int timespec_to_jiffies(const struct timespec *); 83 static inline int timespec_valid(const struct timespec *); 84 static struct timespec ns_to_timespec(const int64_t); 85 static inline int64_t timespec_to_ns(const struct timespec *); 86 87 extern int ticks; 88 89 static inline void i915_gem_object_fence_lost(struct drm_i915_gem_object *obj) 90 { 91 if (obj->tiling_mode) 92 i915_gem_release_mmap(obj); 93 94 /* As we do not have an associated fence register, we will force 95 * a tiling change if we ever need to acquire one. 96 */ 97 obj->fence_dirty = false; 98 obj->fence_reg = I915_FENCE_REG_NONE; 99 } 100 101 /* some bookkeeping */ 102 static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv, 103 size_t size) 104 { 105 dev_priv->mm.object_count++; 106 dev_priv->mm.object_memory += size; 107 } 108 109 static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv, 110 size_t size) 111 { 112 dev_priv->mm.object_count--; 113 dev_priv->mm.object_memory -= size; 114 } 115 116 static int 117 i915_gem_wait_for_error(struct drm_device *dev) 118 { 119 struct drm_i915_private *dev_priv = dev->dev_private; 120 int ret; 121 122 if (!atomic_read(&dev_priv->mm.wedged)) 123 return 0; 124 125 /* 126 * Only wait 10 seconds for the gpu reset to complete to avoid hanging 127 * userspace. If it takes that long something really bad is going on and 128 * we should simply try to bail out and fail as gracefully as possible. 129 */ 130 mtx_enter(&dev_priv->error_completion_lock); 131 while (dev_priv->error_completion == 0) { 132 ret = -msleep(&dev_priv->error_completion, 133 &dev_priv->error_completion_lock, PCATCH, "915wco", 10*hz); 134 if (ret) { 135 mtx_leave(&dev_priv->error_completion_lock); 136 return ret; 137 } 138 } 139 mtx_leave(&dev_priv->error_completion_lock); 140 141 if (atomic_read(&dev_priv->mm.wedged)) { 142 /* GPU is hung, bump the completion count to account for 143 * the token we just consumed so that we never hit zero and 144 * end up waiting upon a subsequent completion event that 145 * will never happen. 146 */ 147 mtx_enter(&dev_priv->error_completion_lock); 148 dev_priv->error_completion++; 149 mtx_leave(&dev_priv->error_completion_lock); 150 } 151 return 0; 152 } 153 154 int i915_mutex_lock_interruptible(struct drm_device *dev) 155 { 156 int ret; 157 158 ret = i915_gem_wait_for_error(dev); 159 if (ret) 160 return ret; 161 162 ret = -rw_enter(&dev->dev_lock, RW_WRITE | RW_INTR); 163 if (ret) 164 return ret; 165 166 WARN_ON(i915_verify_lists(dev)); 167 return 0; 168 } 169 170 static inline bool 171 i915_gem_object_is_inactive(struct drm_i915_gem_object *obj) 172 { 173 return obj->gtt_space && !obj->active; 174 } 175 176 int 177 i915_gem_init_ioctl(struct drm_device *dev, void *data, 178 struct drm_file *file) 179 { 180 struct drm_i915_gem_init *args = data; 181 182 if (drm_core_check_feature(dev, DRIVER_MODESET)) 183 return -ENODEV; 184 185 if (args->gtt_start >= args->gtt_end || 186 (args->gtt_end | args->gtt_start) & (PAGE_SIZE - 1)) 187 return -EINVAL; 188 189 /* GEM with user mode setting was never supported on ilk and later. */ 190 if (INTEL_INFO(dev)->gen >= 5) 191 return -ENODEV; 192 193 DRM_LOCK(); 194 i915_gem_init_global_gtt(dev, args->gtt_start, 195 args->gtt_end, args->gtt_end); 196 DRM_UNLOCK(); 197 198 return 0; 199 } 200 201 int 202 i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, 203 struct drm_file *file) 204 { 205 struct drm_i915_private *dev_priv = dev->dev_private; 206 struct drm_i915_gem_get_aperture *args = data; 207 struct drm_i915_gem_object *obj; 208 size_t pinned; 209 210 pinned = 0; 211 DRM_LOCK(); 212 list_for_each_entry(obj, &dev_priv->mm.bound_list, gtt_list) 213 if (obj->pin_count) 214 pinned += obj->gtt_space->size; 215 DRM_UNLOCK(); 216 217 args->aper_size = dev_priv->mm.gtt_total; 218 args->aper_available_size = args->aper_size - pinned; 219 220 return 0; 221 } 222 223 static int 224 i915_gem_create(struct drm_file *file, 225 struct drm_device *dev, 226 uint64_t size, 227 uint32_t *handle_p) 228 { 229 struct drm_i915_gem_object *obj; 230 int ret; 231 u32 handle; 232 233 size = round_page(size); 234 if (size == 0) 235 return -EINVAL; 236 237 /* Allocate the new object */ 238 obj = i915_gem_alloc_object(dev, size); 239 if (obj == NULL) 240 return -ENOMEM; 241 242 ret = drm_gem_handle_create(file, &obj->base, &handle); 243 if (ret) { 244 drm_gem_object_release(&obj->base); 245 i915_gem_info_remove_obj(dev->dev_private, obj->base.size); 246 pool_put(&dev->objpl, obj); 247 return ret; 248 } 249 250 /* drop reference from allocate - handle holds it now */ 251 drm_gem_object_unreference(&obj->base); 252 trace_i915_gem_object_create(obj); 253 254 *handle_p = handle; 255 return 0; 256 } 257 258 int 259 i915_gem_dumb_create(struct drm_file *file, 260 struct drm_device *dev, 261 struct drm_mode_create_dumb *args) 262 { 263 /* have to work out size/pitch and return them */ 264 args->pitch = roundup2(args->width * ((args->bpp + 7) / 8), 64); 265 args->size = args->pitch * args->height; 266 return i915_gem_create(file, dev, 267 args->size, &args->handle); 268 } 269 270 int i915_gem_dumb_destroy(struct drm_file *file, 271 struct drm_device *dev, 272 uint32_t handle) 273 { 274 return drm_gem_handle_delete(file, handle); 275 } 276 277 /** 278 * Creates a new mm object and returns a handle to it. 279 */ 280 int 281 i915_gem_create_ioctl(struct drm_device *dev, void *data, 282 struct drm_file *file) 283 { 284 struct drm_i915_gem_create *args = data; 285 286 return i915_gem_create(file, dev, 287 args->size, &args->handle); 288 } 289 290 static int i915_gem_object_needs_bit17_swizzle(struct drm_i915_gem_object *obj) 291 { 292 drm_i915_private_t *dev_priv = obj->base.dev->dev_private; 293 294 return dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_9_10_17 && 295 obj->tiling_mode != I915_TILING_NONE; 296 } 297 298 #define offset_in_page(off) ((off) & PAGE_MASK) 299 300 static void * 301 kmap(struct vm_page *pg) 302 { 303 vaddr_t va; 304 305 #if defined (__HAVE_PMAP_DIRECT) 306 va = pmap_map_direct(pg); 307 #else 308 va = uvm_km_valloc_wait(phys_map, PAGE_SIZE); 309 pmap_kenter_pa(va, VM_PAGE_TO_PHYS(pg), VM_PROT_READ|VM_PROT_WRITE); 310 pmap_update(pmap_kernel()); 311 #endif 312 return (void *)va; 313 } 314 315 static void 316 kunmap(void *addr) 317 { 318 vaddr_t va = (vaddr_t)addr; 319 320 #if defined (__HAVE_PMAP_DIRECT) 321 pmap_unmap_direct(va); 322 #else 323 pmap_kremove(va, PAGE_SIZE); 324 pmap_update(pmap_kernel()); 325 uvm_km_free_wakeup(phys_map, va, PAGE_SIZE); 326 #endif 327 } 328 329 static inline void 330 drm_clflush_virt_range(void *addr, size_t len) 331 { 332 pmap_flush_cache((vaddr_t)addr, len); 333 } 334 335 static inline unsigned long 336 __copy_to_user(void *to, const void *from, unsigned len) 337 { 338 if (copyout(from, to, len)) 339 return len; 340 return 0; 341 } 342 343 static inline unsigned long 344 __copy_to_user_inatomic(void *to, const void *from, unsigned len) 345 { 346 struct cpu_info *ci = curcpu(); 347 int error; 348 349 ci->ci_inatomic = 1; 350 error = copyout(from, to, len); 351 ci->ci_inatomic = 0; 352 353 return (error ? len : 0); 354 } 355 356 static inline int 357 __copy_to_user_swizzled(char __user *cpu_vaddr, 358 const char *gpu_vaddr, int gpu_offset, 359 int length) 360 { 361 int ret, cpu_offset = 0; 362 363 while (length > 0) { 364 int cacheline_end = roundup2(gpu_offset + 1, 64); 365 int this_length = min(cacheline_end - gpu_offset, length); 366 int swizzled_gpu_offset = gpu_offset ^ 64; 367 368 ret = __copy_to_user(cpu_vaddr + cpu_offset, 369 gpu_vaddr + swizzled_gpu_offset, 370 this_length); 371 if (ret) 372 return ret + length; 373 374 cpu_offset += this_length; 375 gpu_offset += this_length; 376 length -= this_length; 377 } 378 379 return 0; 380 } 381 382 static inline unsigned long 383 __copy_from_user(void *to, const void *from, unsigned len) 384 { 385 if (copyin(from, to, len)) 386 return len; 387 return 0; 388 } 389 390 static inline unsigned long 391 __copy_from_user_inatomic_nocache(void *to, const void *from, unsigned len) 392 { 393 struct cpu_info *ci = curcpu(); 394 int error; 395 396 ci->ci_inatomic = 1; 397 error = copyin(from, to, len); 398 ci->ci_inatomic = 0; 399 400 return (error ? len : 0); 401 } 402 403 static inline int 404 __copy_from_user_swizzled(char *gpu_vaddr, int gpu_offset, 405 const char __user *cpu_vaddr, 406 int length) 407 { 408 int ret, cpu_offset = 0; 409 410 while (length > 0) { 411 int cacheline_end = roundup2(gpu_offset + 1, 64); 412 int this_length = min(cacheline_end - gpu_offset, length); 413 int swizzled_gpu_offset = gpu_offset ^ 64; 414 415 ret = __copy_from_user(gpu_vaddr + swizzled_gpu_offset, 416 cpu_vaddr + cpu_offset, 417 this_length); 418 if (ret) 419 return ret + length; 420 421 cpu_offset += this_length; 422 gpu_offset += this_length; 423 length -= this_length; 424 } 425 426 return 0; 427 } 428 429 /* Per-page copy function for the shmem pread fastpath. 430 * Flushes invalid cachelines before reading the target if 431 * needs_clflush is set. */ 432 static int 433 shmem_pread_fast(struct vm_page *page, int shmem_page_offset, int page_length, 434 char __user *user_data, 435 bool page_do_bit17_swizzling, bool needs_clflush) 436 { 437 char *vaddr; 438 int ret; 439 440 if (unlikely(page_do_bit17_swizzling)) 441 return -EINVAL; 442 443 vaddr = kmap_atomic(page); 444 if (needs_clflush) 445 drm_clflush_virt_range(vaddr + shmem_page_offset, 446 page_length); 447 ret = __copy_to_user_inatomic(user_data, 448 vaddr + shmem_page_offset, 449 page_length); 450 kunmap_atomic(vaddr); 451 452 return ret ? -EFAULT : 0; 453 } 454 455 #define round_up(x, y) ((((x) + ((y) - 1)) / (y)) * (y)) 456 #define round_down(x, y) (((x) / (y)) * (y)) 457 458 static void 459 shmem_clflush_swizzled_range(char *addr, unsigned long length, 460 bool swizzled) 461 { 462 if (unlikely(swizzled)) { 463 unsigned long start = (unsigned long) addr; 464 unsigned long end = (unsigned long) addr + length; 465 466 /* For swizzling simply ensure that we always flush both 467 * channels. Lame, but simple and it works. Swizzled 468 * pwrite/pread is far from a hotpath - current userspace 469 * doesn't use it at all. */ 470 start = round_down(start, 128); 471 end = round_up(end, 128); 472 473 drm_clflush_virt_range((void *)start, end - start); 474 } else { 475 drm_clflush_virt_range(addr, length); 476 } 477 478 } 479 480 /* Only difference to the fast-path function is that this can handle bit17 481 * and uses non-atomic copy and kmap functions. */ 482 static int 483 shmem_pread_slow(struct vm_page *page, int shmem_page_offset, int page_length, 484 char __user *user_data, 485 bool page_do_bit17_swizzling, bool needs_clflush) 486 { 487 char *vaddr; 488 int ret; 489 490 vaddr = kmap(page); 491 if (needs_clflush) 492 shmem_clflush_swizzled_range(vaddr + shmem_page_offset, 493 page_length, 494 page_do_bit17_swizzling); 495 496 if (page_do_bit17_swizzling) 497 ret = __copy_to_user_swizzled(user_data, 498 vaddr, shmem_page_offset, 499 page_length); 500 else 501 ret = __copy_to_user(user_data, 502 vaddr + shmem_page_offset, 503 page_length); 504 kunmap(vaddr); 505 506 return ret ? - EFAULT : 0; 507 } 508 509 static int 510 i915_gem_shmem_pread(struct drm_device *dev, 511 struct drm_i915_gem_object *obj, 512 struct drm_i915_gem_pread *args, 513 struct drm_file *file) 514 { 515 char __user *user_data; 516 ssize_t remain; 517 off_t offset; 518 int shmem_page_offset, page_length, ret = 0; 519 int obj_do_bit17_swizzling, page_do_bit17_swizzling; 520 int hit_slowpath = 0; 521 int needs_clflush = 0; 522 int i; 523 524 user_data = (char __user *) (uintptr_t) args->data_ptr; 525 remain = args->size; 526 527 obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj); 528 529 if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)) { 530 /* If we're not in the cpu read domain, set ourself into the gtt 531 * read domain and manually flush cachelines (if required). This 532 * optimizes for the case when the gpu will dirty the data 533 * anyway again before the next pread happens. */ 534 if (obj->cache_level == I915_CACHE_NONE) 535 needs_clflush = 1; 536 if (obj->gtt_space) { 537 ret = i915_gem_object_set_to_gtt_domain(obj, false); 538 if (ret) 539 return ret; 540 } 541 } 542 543 ret = i915_gem_object_get_pages(obj); 544 if (ret) 545 return ret; 546 547 i915_gem_object_pin_pages(obj); 548 549 offset = args->offset; 550 551 for (i = 0; i < (obj->base.size >> PAGE_SHIFT); i++) { 552 struct vm_page *page; 553 554 if (i < offset >> PAGE_SHIFT) 555 continue; 556 557 if (remain <= 0) 558 break; 559 560 /* Operation in this page 561 * 562 * shmem_page_offset = offset within page in shmem file 563 * page_length = bytes to copy for this page 564 */ 565 shmem_page_offset = offset_in_page(offset); 566 page_length = remain; 567 if ((shmem_page_offset + page_length) > PAGE_SIZE) 568 page_length = PAGE_SIZE - shmem_page_offset; 569 570 #ifdef __linux__ 571 page = sg_page(sg); 572 page_do_bit17_swizzling = obj_do_bit17_swizzling && 573 (page_to_phys(page) & (1 << 17)) != 0; 574 #else 575 page = obj->pages[i]; 576 page_do_bit17_swizzling = obj_do_bit17_swizzling && 577 (VM_PAGE_TO_PHYS(page) & (1 << 17)) != 0; 578 #endif 579 580 ret = shmem_pread_fast(page, shmem_page_offset, page_length, 581 user_data, page_do_bit17_swizzling, 582 needs_clflush); 583 if (ret == 0) 584 goto next_page; 585 586 hit_slowpath = 1; 587 DRM_UNLOCK(); 588 589 #ifdef __linux__ 590 if (!prefaulted) { 591 ret = fault_in_multipages_writeable(user_data, remain); 592 /* Userspace is tricking us, but we've already clobbered 593 * its pages with the prefault and promised to write the 594 * data up to the first fault. Hence ignore any errors 595 * and just continue. */ 596 (void)ret; 597 prefaulted = 1; 598 } 599 #endif 600 601 ret = shmem_pread_slow(page, shmem_page_offset, page_length, 602 user_data, page_do_bit17_swizzling, 603 needs_clflush); 604 605 DRM_LOCK(); 606 607 next_page: 608 #ifdef __linux__ 609 mark_page_accessed(page); 610 #endif 611 612 if (ret) 613 goto out; 614 615 remain -= page_length; 616 user_data += page_length; 617 offset += page_length; 618 } 619 620 out: 621 i915_gem_object_unpin_pages(obj); 622 623 if (hit_slowpath) { 624 /* Fixup: Kill any reinstated backing storage pages */ 625 if (obj->madv == __I915_MADV_PURGED) 626 i915_gem_object_truncate(obj); 627 } 628 629 return ret; 630 } 631 632 /** 633 * Reads data from the object referenced by handle. 634 * 635 * On error, the contents of *data are undefined. 636 */ 637 int 638 i915_gem_pread_ioctl(struct drm_device *dev, void *data, 639 struct drm_file *file) 640 { 641 struct drm_i915_gem_pread *args = data; 642 struct drm_i915_gem_object *obj; 643 int ret = 0; 644 645 if (args->size == 0) 646 return 0; 647 648 ret = i915_mutex_lock_interruptible(dev); 649 if (ret) 650 return ret; 651 652 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 653 if (&obj->base == NULL) { 654 ret = -ENOENT; 655 goto unlock; 656 } 657 658 /* Bounds check source. */ 659 if (args->offset > obj->base.size || 660 args->size > obj->base.size - args->offset) { 661 ret = -EINVAL; 662 goto out; 663 } 664 665 trace_i915_gem_object_pread(obj, args->offset, args->size); 666 667 ret = i915_gem_shmem_pread(dev, obj, args, file); 668 669 out: 670 drm_gem_object_unreference(&obj->base); 671 unlock: 672 DRM_UNLOCK(); 673 return ret; 674 } 675 676 #ifdef __linux__ 677 /* This is the fast write path which cannot handle 678 * page faults in the source data 679 */ 680 681 static inline int 682 fast_user_write(struct io_mapping *mapping, 683 loff_t page_base, int page_offset, 684 char __user *user_data, 685 int length) 686 { 687 void __iomem *vaddr_atomic; 688 void *vaddr; 689 unsigned long unwritten; 690 691 vaddr_atomic = io_mapping_map_atomic_wc(mapping, page_base); 692 /* We can use the cpu mem copy function because this is X86. */ 693 vaddr = (void __force*)vaddr_atomic + page_offset; 694 unwritten = __copy_from_user_inatomic_nocache(vaddr, 695 user_data, length); 696 io_mapping_unmap_atomic(vaddr_atomic); 697 return unwritten; 698 } 699 #endif 700 701 /** 702 * This is the fast pwrite path, where we copy the data directly from the 703 * user into the GTT, uncached. 704 */ 705 static int 706 i915_gem_gtt_pwrite_fast(struct drm_device *dev, 707 struct drm_i915_gem_object *obj, 708 struct drm_i915_gem_pwrite *args, 709 struct drm_file *file) 710 { 711 drm_i915_private_t *dev_priv = dev->dev_private; 712 bus_space_handle_t bsh; 713 bus_addr_t offset; 714 bus_size_t size; 715 char *vaddr; 716 int ret; 717 718 ret = i915_gem_object_pin(obj, 0, true, true); 719 if (ret) 720 goto out; 721 722 ret = i915_gem_object_set_to_gtt_domain(obj, true); 723 if (ret) 724 goto out_unpin; 725 726 ret = i915_gem_object_put_fence(obj); 727 if (ret) 728 goto out_unpin; 729 730 offset = obj->gtt_offset + args->offset; 731 size = round_page(offset + args->size) - trunc_page(offset); 732 733 if ((ret = agp_map_subregion(dev_priv->agph, 734 trunc_page(offset), size, &bsh)) != 0) 735 goto out_unpin; 736 vaddr = bus_space_vaddr(dev_priv->bst, bsh); 737 if (vaddr == NULL) { 738 ret = -EFAULT; 739 goto out_unmap; 740 } 741 742 ret = -copyin((char *)(uintptr_t)args->data_ptr, 743 vaddr + (offset & PAGE_MASK), args->size); 744 745 out_unmap: 746 agp_unmap_subregion(dev_priv->agph, bsh, size); 747 748 out_unpin: 749 i915_gem_object_unpin(obj); 750 out: 751 return ret; 752 } 753 754 /* Per-page copy function for the shmem pwrite fastpath. 755 * Flushes invalid cachelines before writing to the target if 756 * needs_clflush_before is set and flushes out any written cachelines after 757 * writing if needs_clflush is set. */ 758 static int 759 shmem_pwrite_fast(struct vm_page *page, int shmem_page_offset, int page_length, 760 char __user *user_data, 761 bool page_do_bit17_swizzling, 762 bool needs_clflush_before, 763 bool needs_clflush_after) 764 { 765 char *vaddr; 766 int ret; 767 768 if (unlikely(page_do_bit17_swizzling)) 769 return -EINVAL; 770 771 vaddr = kmap_atomic(page); 772 if (needs_clflush_before) 773 drm_clflush_virt_range(vaddr + shmem_page_offset, 774 page_length); 775 ret = __copy_from_user_inatomic_nocache(vaddr + shmem_page_offset, 776 user_data, 777 page_length); 778 if (needs_clflush_after) 779 drm_clflush_virt_range(vaddr + shmem_page_offset, 780 page_length); 781 kunmap_atomic(vaddr); 782 783 return ret ? -EFAULT : 0; 784 } 785 786 /* Only difference to the fast-path function is that this can handle bit17 787 * and uses non-atomic copy and kmap functions. */ 788 static int 789 shmem_pwrite_slow(struct vm_page *page, int shmem_page_offset, int page_length, 790 char __user *user_data, 791 bool page_do_bit17_swizzling, 792 bool needs_clflush_before, 793 bool needs_clflush_after) 794 { 795 char *vaddr; 796 int ret; 797 798 vaddr = kmap(page); 799 if (unlikely(needs_clflush_before || page_do_bit17_swizzling)) 800 shmem_clflush_swizzled_range(vaddr + shmem_page_offset, 801 page_length, 802 page_do_bit17_swizzling); 803 if (page_do_bit17_swizzling) 804 ret = __copy_from_user_swizzled(vaddr, shmem_page_offset, 805 user_data, 806 page_length); 807 else 808 ret = __copy_from_user(vaddr + shmem_page_offset, 809 user_data, 810 page_length); 811 if (needs_clflush_after) 812 shmem_clflush_swizzled_range(vaddr + shmem_page_offset, 813 page_length, 814 page_do_bit17_swizzling); 815 kunmap(vaddr); 816 817 return ret ? -EFAULT : 0; 818 } 819 820 static int 821 i915_gem_shmem_pwrite(struct drm_device *dev, 822 struct drm_i915_gem_object *obj, 823 struct drm_i915_gem_pwrite *args, 824 struct drm_file *file) 825 { 826 ssize_t remain; 827 off_t offset; 828 char __user *user_data; 829 int shmem_page_offset, page_length, ret = 0; 830 int obj_do_bit17_swizzling, page_do_bit17_swizzling; 831 int hit_slowpath = 0; 832 int needs_clflush_after = 0; 833 int needs_clflush_before = 0; 834 int i; 835 836 user_data = (char __user *) (uintptr_t) args->data_ptr; 837 remain = args->size; 838 839 obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj); 840 841 if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) { 842 /* If we're not in the cpu write domain, set ourself into the gtt 843 * write domain and manually flush cachelines (if required). This 844 * optimizes for the case when the gpu will use the data 845 * right away and we therefore have to clflush anyway. */ 846 if (obj->cache_level == I915_CACHE_NONE) 847 needs_clflush_after = 1; 848 if (obj->gtt_space) { 849 ret = i915_gem_object_set_to_gtt_domain(obj, true); 850 if (ret) 851 return ret; 852 } 853 } 854 /* Same trick applies for invalidate partially written cachelines before 855 * writing. */ 856 if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU) 857 && obj->cache_level == I915_CACHE_NONE) 858 needs_clflush_before = 1; 859 860 ret = i915_gem_object_get_pages(obj); 861 if (ret) 862 return ret; 863 864 i915_gem_object_pin_pages(obj); 865 866 offset = args->offset; 867 obj->dirty = 1; 868 869 for (i = 0; i < (obj->base.size >> PAGE_SHIFT); i++) { 870 struct vm_page *page; 871 int partial_cacheline_write; 872 873 if (i < offset >> PAGE_SHIFT) 874 continue; 875 876 if (remain <= 0) 877 break; 878 879 /* Operation in this page 880 * 881 * shmem_page_offset = offset within page in shmem file 882 * page_length = bytes to copy for this page 883 */ 884 shmem_page_offset = offset_in_page(offset); 885 886 page_length = remain; 887 if ((shmem_page_offset + page_length) > PAGE_SIZE) 888 page_length = PAGE_SIZE - shmem_page_offset; 889 890 /* If we don't overwrite a cacheline completely we need to be 891 * careful to have up-to-date data by first clflushing. Don't 892 * overcomplicate things and flush the entire patch. */ 893 partial_cacheline_write = needs_clflush_before && 894 ((shmem_page_offset | page_length) 895 & (curcpu()->ci_cflushsz - 1)); 896 897 #ifdef __linux__ 898 page = sg_page(sg); 899 page_do_bit17_swizzling = obj_do_bit17_swizzling && 900 (page_to_phys(page) & (1 << 17)) != 0; 901 #else 902 page = obj->pages[i]; 903 page_do_bit17_swizzling = obj_do_bit17_swizzling && 904 (VM_PAGE_TO_PHYS(page) & (1 << 17)) != 0; 905 #endif 906 907 ret = shmem_pwrite_fast(page, shmem_page_offset, page_length, 908 user_data, page_do_bit17_swizzling, 909 partial_cacheline_write, 910 needs_clflush_after); 911 if (ret == 0) 912 goto next_page; 913 914 hit_slowpath = 1; 915 DRM_UNLOCK(); 916 ret = shmem_pwrite_slow(page, shmem_page_offset, page_length, 917 user_data, page_do_bit17_swizzling, 918 partial_cacheline_write, 919 needs_clflush_after); 920 921 DRM_LOCK(); 922 923 next_page: 924 #ifdef __linux__ 925 set_page_dirty(page); 926 mark_page_accessed(page); 927 #else 928 atomic_clearbits_int(&page->pg_flags, PG_CLEAN); 929 #endif 930 931 if (ret) 932 goto out; 933 934 remain -= page_length; 935 user_data += page_length; 936 offset += page_length; 937 } 938 939 out: 940 i915_gem_object_unpin_pages(obj); 941 942 if (hit_slowpath) { 943 /* Fixup: Kill any reinstated backing storage pages */ 944 if (obj->madv == __I915_MADV_PURGED) 945 i915_gem_object_truncate(obj); 946 /* and flush dirty cachelines in case the object isn't in the cpu write 947 * domain anymore. */ 948 if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) { 949 i915_gem_clflush_object(obj); 950 i915_gem_chipset_flush(dev); 951 } 952 } 953 954 if (needs_clflush_after) 955 i915_gem_chipset_flush(dev); 956 957 return ret; 958 } 959 960 /** 961 * Writes data to the object referenced by handle. 962 * 963 * On error, the contents of the buffer that were to be modified are undefined. 964 */ 965 int 966 i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, 967 struct drm_file *file) 968 { 969 struct drm_i915_gem_pwrite *args = data; 970 struct drm_i915_gem_object *obj; 971 int ret; 972 973 if (args->size == 0) 974 return 0; 975 976 ret = i915_mutex_lock_interruptible(dev); 977 if (ret) 978 return ret; 979 980 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 981 if (&obj->base == NULL) { 982 ret = -ENOENT; 983 goto unlock; 984 } 985 986 /* Bounds check destination. */ 987 if (args->offset > obj->base.size || 988 args->size > obj->base.size - args->offset) { 989 ret = -EINVAL; 990 goto out; 991 } 992 993 trace_i915_gem_object_pwrite(obj, args->offset, args->size); 994 995 ret = -EFAULT; 996 /* We can only do the GTT pwrite on untiled buffers, as otherwise 997 * it would end up going through the fenced access, and we'll get 998 * different detiling behavior between reading and writing. 999 * pread/pwrite currently are reading and writing from the CPU 1000 * perspective, requiring manual detiling by the client. 1001 */ 1002 if (obj->phys_obj) { 1003 ret = i915_gem_phys_pwrite(dev, obj, args, file); 1004 goto out; 1005 } 1006 1007 if (obj->cache_level == I915_CACHE_NONE && 1008 obj->tiling_mode == I915_TILING_NONE && 1009 obj->base.write_domain != I915_GEM_DOMAIN_CPU) { 1010 ret = i915_gem_gtt_pwrite_fast(dev, obj, args, file); 1011 /* Note that the gtt paths might fail with non-page-backed user 1012 * pointers (e.g. gtt mappings when moving data between 1013 * textures). Fallback to the shmem path in that case. */ 1014 } 1015 1016 if (ret == -EFAULT || ret == -ENOSPC) 1017 ret = i915_gem_shmem_pwrite(dev, obj, args, file); 1018 1019 out: 1020 drm_gem_object_unreference(&obj->base); 1021 unlock: 1022 DRM_UNLOCK(); 1023 return ret; 1024 } 1025 1026 int 1027 i915_gem_check_wedge(struct drm_i915_private *dev_priv, 1028 bool interruptible) 1029 { 1030 if (atomic_read(&dev_priv->mm.wedged)) { 1031 bool recovery_complete; 1032 1033 /* Give the error handler a chance to run. */ 1034 mtx_enter(&dev_priv->error_completion_lock); 1035 recovery_complete = dev_priv->error_completion > 0; 1036 mtx_leave(&dev_priv->error_completion_lock); 1037 1038 /* Non-interruptible callers can't handle -EAGAIN, hence return 1039 * -EIO unconditionally for these. */ 1040 if (!interruptible) 1041 return -EIO; 1042 1043 /* Recovery complete, but still wedged means reset failure. */ 1044 if (recovery_complete) 1045 return -EIO; 1046 1047 return -EAGAIN; 1048 } 1049 1050 return 0; 1051 } 1052 1053 /* 1054 * Compare seqno against outstanding lazy request. Emit a request if they are 1055 * equal. 1056 */ 1057 static int 1058 i915_gem_check_olr(struct intel_ring_buffer *ring, u32 seqno) 1059 { 1060 int ret; 1061 1062 // BUG_ON(!mutex_is_locked(&ring->dev->struct_mutex)); 1063 1064 ret = 0; 1065 if (seqno == ring->outstanding_lazy_request) 1066 ret = i915_add_request(ring, NULL, NULL); 1067 1068 return ret; 1069 } 1070 1071 /** 1072 * __wait_seqno - wait until execution of seqno has finished 1073 * @ring: the ring expected to report seqno 1074 * @seqno: duh! 1075 * @interruptible: do an interruptible wait (normally yes) 1076 * @timeout: in - how long to wait (NULL forever); out - how much time remaining 1077 * 1078 * Returns 0 if the seqno was found within the alloted time. Else returns the 1079 * errno with remaining time filled in timeout argument. 1080 */ 1081 static int __wait_seqno(struct intel_ring_buffer *ring, u32 seqno, 1082 bool interruptible, struct timespec *timeout) 1083 { 1084 drm_i915_private_t *dev_priv = ring->dev->dev_private; 1085 struct timespec before, now, wait_time={1,0}; 1086 struct timespec sleep_time; 1087 unsigned long timeout_jiffies; 1088 long end; 1089 bool wait_forever = true; 1090 int ret; 1091 1092 if (i915_seqno_passed(ring->get_seqno(ring, true), seqno)) 1093 return 0; 1094 1095 trace_i915_gem_request_wait_begin(ring, seqno); 1096 1097 if (timeout != NULL) { 1098 wait_time = *timeout; 1099 wait_forever = false; 1100 } 1101 1102 timeout_jiffies = timespec_to_jiffies(&wait_time); 1103 1104 if (WARN_ON(!ring->irq_get(ring))) 1105 return -ENODEV; 1106 1107 /* Record current time in case interrupted by signal, or wedged * */ 1108 nanouptime(&before); 1109 1110 #define EXIT_COND \ 1111 (i915_seqno_passed(ring->get_seqno(ring, false), seqno) || \ 1112 atomic_read(&dev_priv->mm.wedged)) 1113 do { 1114 end = timeout_jiffies; 1115 mtx_enter(&dev_priv->irq_lock); 1116 do { 1117 if (EXIT_COND) { 1118 ret = 0; 1119 break; 1120 } 1121 ret = msleep(ring, &dev_priv->irq_lock, 1122 PZERO | (interruptible ? PCATCH : 0), 1123 "gemwt", end); 1124 nanouptime(&now); 1125 timespecsub(&now, &before, &sleep_time); 1126 if (timespeccmp(&sleep_time, &wait_time, >=)) { 1127 end = 0; 1128 break; 1129 } 1130 end = timeout_jiffies - 1131 timespec_to_jiffies(&sleep_time); 1132 } while (ret == 0); 1133 mtx_leave(&dev_priv->irq_lock); 1134 switch (ret) { 1135 case 0: 1136 break; 1137 case ERESTART: 1138 end = -ERESTARTSYS; 1139 break; 1140 case EWOULDBLOCK: 1141 end = 0; 1142 break; 1143 default: 1144 end = -ret; 1145 break; 1146 } 1147 1148 ret = i915_gem_check_wedge(dev_priv, interruptible); 1149 if (ret) 1150 end = ret; 1151 } while (end == 0 && wait_forever); 1152 1153 nanouptime(&now); 1154 1155 ring->irq_put(ring); 1156 trace_i915_gem_request_wait_end(ring, seqno); 1157 #undef EXIT_COND 1158 1159 if (timeout) { 1160 timespecsub(&now, &before, &sleep_time); 1161 timespecsub(timeout, &sleep_time, timeout); 1162 } 1163 1164 switch (end) { 1165 case -EIO: 1166 case -EAGAIN: /* Wedged */ 1167 case -ERESTARTSYS: /* Signal */ 1168 return (int)end; 1169 case 0: /* Timeout */ 1170 if (timeout) 1171 timeout->tv_sec = timeout->tv_nsec = 0; 1172 return -ETIMEDOUT; 1173 default: /* Completed */ 1174 WARN_ON(end < 0); /* We're not aware of other errors */ 1175 return 0; 1176 } 1177 } 1178 1179 /** 1180 * Waits for a sequence number to be signaled, and cleans up the 1181 * request and object lists appropriately for that event. 1182 */ 1183 int 1184 i915_wait_seqno(struct intel_ring_buffer *ring, uint32_t seqno) 1185 { 1186 struct drm_device *dev = ring->dev; 1187 struct drm_i915_private *dev_priv = dev->dev_private; 1188 bool interruptible = dev_priv->mm.interruptible; 1189 int ret; 1190 1191 // BUG_ON(!mutex_is_locked(&dev->struct_mutex)); 1192 BUG_ON(seqno == 0); 1193 1194 ret = i915_gem_check_wedge(dev_priv, interruptible); 1195 if (ret) 1196 return ret; 1197 1198 ret = i915_gem_check_olr(ring, seqno); 1199 if (ret) 1200 return ret; 1201 1202 return __wait_seqno(ring, seqno, interruptible, NULL); 1203 } 1204 1205 /** 1206 * Ensures that all rendering to the object has completed and the object is 1207 * safe to unbind from the GTT or access from the CPU. 1208 */ 1209 static __must_check int 1210 i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj, 1211 bool readonly) 1212 { 1213 struct intel_ring_buffer *ring = obj->ring; 1214 u32 seqno; 1215 int ret; 1216 1217 seqno = readonly ? obj->last_write_seqno : obj->last_read_seqno; 1218 if (seqno == 0) 1219 return 0; 1220 1221 ret = i915_wait_seqno(ring, seqno); 1222 if (ret) 1223 return ret; 1224 1225 i915_gem_retire_requests_ring(ring); 1226 1227 /* Manually manage the write flush as we may have not yet 1228 * retired the buffer. 1229 */ 1230 if (obj->last_write_seqno && 1231 i915_seqno_passed(seqno, obj->last_write_seqno)) { 1232 obj->last_write_seqno = 0; 1233 obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS; 1234 } 1235 1236 return 0; 1237 } 1238 1239 /* A nonblocking variant of the above wait. This is a highly dangerous routine 1240 * as the object state may change during this call. 1241 */ 1242 static __must_check int 1243 i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj, 1244 bool readonly) 1245 { 1246 struct drm_device *dev = obj->base.dev; 1247 struct drm_i915_private *dev_priv = dev->dev_private; 1248 struct intel_ring_buffer *ring = obj->ring; 1249 u32 seqno; 1250 int ret; 1251 1252 rw_assert_wrlock(&dev->dev_lock); 1253 BUG_ON(!dev_priv->mm.interruptible); 1254 1255 seqno = readonly ? obj->last_write_seqno : obj->last_read_seqno; 1256 if (seqno == 0) 1257 return 0; 1258 1259 ret = i915_gem_check_wedge(dev_priv, true); 1260 if (ret) 1261 return ret; 1262 1263 ret = i915_gem_check_olr(ring, seqno); 1264 if (ret) 1265 return ret; 1266 1267 DRM_UNLOCK(); 1268 ret = __wait_seqno(ring, seqno, true, NULL); 1269 DRM_LOCK(); 1270 1271 i915_gem_retire_requests_ring(ring); 1272 1273 /* Manually manage the write flush as we may have not yet 1274 * retired the buffer. 1275 */ 1276 if (ret == 0 && 1277 obj->last_write_seqno && 1278 i915_seqno_passed(seqno, obj->last_write_seqno)) { 1279 obj->last_write_seqno = 0; 1280 obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS; 1281 } 1282 1283 return ret; 1284 } 1285 1286 /** 1287 * Called when user space prepares to use an object with the CPU, either 1288 * through the mmap ioctl's mapping or a GTT mapping. 1289 */ 1290 int 1291 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, 1292 struct drm_file *file) 1293 { 1294 struct drm_i915_gem_set_domain *args = data; 1295 struct drm_i915_gem_object *obj; 1296 uint32_t read_domains = args->read_domains; 1297 uint32_t write_domain = args->write_domain; 1298 int ret; 1299 1300 /* Only handle setting domains to types used by the CPU. */ 1301 if (write_domain & I915_GEM_GPU_DOMAINS) 1302 return -EINVAL; 1303 1304 if (read_domains & I915_GEM_GPU_DOMAINS) 1305 return -EINVAL; 1306 1307 /* Having something in the write domain implies it's in the read 1308 * domain, and only that read domain. Enforce that in the request. 1309 */ 1310 if (write_domain != 0 && read_domains != write_domain) 1311 return -EINVAL; 1312 1313 ret = i915_mutex_lock_interruptible(dev); 1314 if (ret) 1315 return ret; 1316 1317 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 1318 if (&obj->base == NULL) { 1319 ret = -ENOENT; 1320 goto unlock; 1321 } 1322 1323 /* Try to flush the object off the GPU without holding the lock. 1324 * We will repeat the flush holding the lock in the normal manner 1325 * to catch cases where we are gazumped. 1326 */ 1327 ret = i915_gem_object_wait_rendering__nonblocking(obj, !write_domain); 1328 if (ret) 1329 goto unref; 1330 1331 if (read_domains & I915_GEM_DOMAIN_GTT) { 1332 ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0); 1333 1334 /* Silently promote "you're not bound, there was nothing to do" 1335 * to success, since the client was just asking us to 1336 * make sure everything was done. 1337 */ 1338 if (ret == -EINVAL) 1339 ret = 0; 1340 } else { 1341 ret = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0); 1342 } 1343 1344 unref: 1345 drm_gem_object_unreference(&obj->base); 1346 unlock: 1347 DRM_UNLOCK(); 1348 return ret; 1349 } 1350 1351 /** 1352 * Called when user space has done writes to this buffer 1353 */ 1354 int 1355 i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data, 1356 struct drm_file *file) 1357 { 1358 struct drm_i915_gem_sw_finish *args = data; 1359 struct drm_i915_gem_object *obj; 1360 int ret = 0; 1361 1362 ret = i915_mutex_lock_interruptible(dev); 1363 if (ret) 1364 return ret; 1365 1366 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 1367 if (&obj->base == NULL) { 1368 ret = -ENOENT; 1369 goto unlock; 1370 } 1371 1372 /* Pinned buffers may be scanout, so flush the cache */ 1373 if (obj->pin_count) 1374 i915_gem_object_flush_cpu_write_domain(obj); 1375 1376 drm_gem_object_unreference(&obj->base); 1377 unlock: 1378 DRM_UNLOCK(); 1379 return ret; 1380 } 1381 1382 /** 1383 * Maps the contents of an object, returning the address it is mapped 1384 * into. 1385 * 1386 * While the mapping holds a reference on the contents of the object, it doesn't 1387 * imply a ref on the object itself. 1388 */ 1389 int 1390 i915_gem_mmap_ioctl(struct drm_device *dev, void *data, 1391 struct drm_file *file) 1392 { 1393 struct drm_i915_gem_mmap *args = data; 1394 struct drm_gem_object *obj; 1395 vaddr_t addr; 1396 vsize_t size; 1397 int ret; 1398 1399 size = round_page(args->size); 1400 if (size == 0) 1401 return -EINVAL; 1402 1403 if (args->offset + size < args->offset) 1404 return -EINVAL; 1405 if (args->offset & PAGE_MASK) 1406 return -EINVAL; 1407 1408 obj = drm_gem_object_lookup(dev, file, args->handle); 1409 if (obj == NULL) 1410 return -ENOENT; 1411 1412 addr = 0; 1413 ret = -uvm_map(&curproc->p_vmspace->vm_map, &addr, size, 1414 obj->uao, args->offset, 0, UVM_MAPFLAG(UVM_PROT_RW, UVM_PROT_RW, 1415 UVM_INH_SHARE, UVM_ADV_RANDOM, 0)); 1416 if (ret == 0) 1417 uao_reference(obj->uao); 1418 drm_gem_object_unreference_unlocked(obj); 1419 if (ret) 1420 return ret; 1421 1422 args->addr_ptr = (uint64_t) addr; 1423 1424 return 0; 1425 } 1426 1427 int 1428 i915_gem_fault(struct drm_gem_object *gem_obj, struct uvm_faultinfo *ufi, 1429 off_t offset, vaddr_t vaddr, vm_page_t *pps, int npages, int centeridx, 1430 vm_prot_t access_type, int flags) 1431 { 1432 struct drm_i915_gem_object *obj = to_intel_bo(gem_obj); 1433 struct drm_device *dev = obj->base.dev; 1434 drm_i915_private_t *dev_priv = dev->dev_private; 1435 paddr_t paddr; 1436 int lcv, ret; 1437 int write = !!(access_type & VM_PROT_WRITE); 1438 vm_prot_t mapprot; 1439 boolean_t locked = TRUE; 1440 1441 dev_priv->entries++; 1442 1443 /* 1444 * If we already own the lock, we must be doing a copyin or 1445 * copyout in one of the fast paths. Return failure such that 1446 * we fall back on the slow path. 1447 */ 1448 if (!obj->base.map || RWLOCK_OWNER(&dev->dev_lock) == curproc) { 1449 uvmfault_unlockall(ufi, ufi->entry->aref.ar_amap, 1450 &obj->base.uobj, NULL); 1451 dev_priv->entries--; 1452 return (VM_PAGER_BAD); 1453 } 1454 1455 offset -= obj->base.map->ext; 1456 1457 if (rw_enter(&dev->dev_lock, RW_NOSLEEP | RW_WRITE) != 0) { 1458 uvmfault_unlockall(ufi, NULL, &obj->base.uobj, NULL); 1459 DRM_LOCK(); 1460 locked = uvmfault_relock(ufi); 1461 } 1462 if (!locked) { 1463 DRM_UNLOCK(); 1464 dev_priv->entries--; 1465 return (VM_PAGER_REFAULT); 1466 } 1467 1468 /* Now bind it into the GTT if needed */ 1469 ret = i915_gem_object_pin(obj, 0, true, false); 1470 if (ret) 1471 goto unlock; 1472 1473 ret = i915_gem_object_set_to_gtt_domain(obj, write); 1474 if (ret) 1475 goto unpin; 1476 1477 ret = i915_gem_object_get_fence(obj); 1478 if (ret) 1479 goto unpin; 1480 1481 obj->fault_mappable = true; 1482 1483 mapprot = ufi->entry->protection; 1484 /* 1485 * if it's only a read fault, we only put ourselves into the gtt 1486 * read domain, so make sure we fault again and set ourselves to write. 1487 * this prevents us needing userland to do domain management and get 1488 * it wrong, and makes us fully coherent with the gpu re mmap. 1489 */ 1490 if (write == 0) 1491 mapprot &= ~VM_PROT_WRITE; 1492 /* XXX try and be more efficient when we do this */ 1493 for (lcv = 0 ; lcv < npages ; lcv++, offset += PAGE_SIZE, 1494 vaddr += PAGE_SIZE) { 1495 if ((flags & PGO_ALLPAGES) == 0 && lcv != centeridx) 1496 continue; 1497 1498 if (pps[lcv] == PGO_DONTCARE) 1499 continue; 1500 1501 paddr = dev_priv->mm.gtt_base_addr + obj->gtt_offset + offset; 1502 1503 if (pmap_enter(ufi->orig_map->pmap, vaddr, paddr, 1504 mapprot, PMAP_CANFAIL | mapprot) != 0) { 1505 i915_gem_object_unpin(obj); 1506 uvmfault_unlockall(ufi, ufi->entry->aref.ar_amap, 1507 NULL, NULL); 1508 DRM_UNLOCK(); 1509 dev_priv->entries--; 1510 pmap_update(ufi->orig_map->pmap); 1511 uvm_wait("intelflt"); 1512 return (VM_PAGER_REFAULT); 1513 } 1514 } 1515 unpin: 1516 i915_gem_object_unpin(obj); 1517 unlock: 1518 uvmfault_unlockall(ufi, ufi->entry->aref.ar_amap, NULL, NULL); 1519 DRM_UNLOCK(); 1520 dev_priv->entries--; 1521 pmap_update(ufi->orig_map->pmap); 1522 1523 switch (ret) { 1524 case -EIO: 1525 /* If this -EIO is due to a gpu hang, give the reset code a 1526 * chance to clean up the mess. Otherwise return the proper 1527 * SIGBUS. */ 1528 if (!atomic_read(&dev_priv->mm.wedged)) 1529 return VM_PAGER_ERROR; 1530 case -EAGAIN: 1531 /* Give the error handler a chance to run and move the 1532 * objects off the GPU active list. Next time we service the 1533 * fault, we should be able to transition the page into the 1534 * GTT without touching the GPU (and so avoid further 1535 * EIO/EGAIN). If the GPU is wedged, then there is no issue 1536 * with coherency, just lost writes. 1537 */ 1538 #if 0 1539 set_need_resched(); 1540 #endif 1541 case 0: 1542 case -ERESTART: 1543 case -EINTR: 1544 case -EBUSY: 1545 /* 1546 * EBUSY is ok: this just means that another thread 1547 * already did the job. 1548 */ 1549 return VM_PAGER_OK; 1550 case -ENOMEM: 1551 return VM_PAGER_ERROR; 1552 case -ENOSPC: 1553 return VM_PAGER_ERROR; 1554 default: 1555 WARN_ONCE(ret, "unhandled error in i915_gem_fault: %i\n", ret); 1556 return VM_PAGER_ERROR; 1557 } 1558 } 1559 1560 /** 1561 * i915_gem_release_mmap - remove physical page mappings 1562 * @obj: obj in question 1563 * 1564 * Preserve the reservation of the mmapping with the DRM core code, but 1565 * relinquish ownership of the pages back to the system. 1566 * 1567 * It is vital that we remove the page mapping if we have mapped a tiled 1568 * object through the GTT and then lose the fence register due to 1569 * resource pressure. Similarly if the object has been moved out of the 1570 * aperture, than pages mapped into userspace must be revoked. Removing the 1571 * mapping will then trigger a page fault on the next user access, allowing 1572 * fixup by i915_gem_fault(). 1573 */ 1574 void 1575 i915_gem_release_mmap(struct drm_i915_gem_object *obj) 1576 { 1577 struct inteldrm_softc *dev_priv = obj->base.dev->dev_private; 1578 struct vm_page *pg; 1579 1580 if (!obj->fault_mappable) 1581 return; 1582 1583 for (pg = &dev_priv->pgs[atop(obj->gtt_offset)]; 1584 pg != &dev_priv->pgs[atop(obj->gtt_offset + obj->base.size)]; 1585 pg++) 1586 pmap_page_protect(pg, VM_PROT_NONE); 1587 1588 obj->fault_mappable = false; 1589 } 1590 1591 static uint32_t 1592 i915_gem_get_gtt_size(struct drm_device *dev, uint32_t size, int tiling_mode) 1593 { 1594 uint32_t gtt_size; 1595 1596 if (INTEL_INFO(dev)->gen >= 4 || 1597 tiling_mode == I915_TILING_NONE) 1598 return size; 1599 1600 /* Previous chips need a power-of-two fence region when tiling */ 1601 if (INTEL_INFO(dev)->gen == 3) 1602 gtt_size = 1024*1024; 1603 else 1604 gtt_size = 512*1024; 1605 1606 while (gtt_size < size) 1607 gtt_size <<= 1; 1608 1609 return gtt_size; 1610 } 1611 1612 /** 1613 * i915_gem_get_gtt_alignment - return required GTT alignment for an object 1614 * @obj: object to check 1615 * 1616 * Return the required GTT alignment for an object, taking into account 1617 * potential fence register mapping. 1618 */ 1619 static uint32_t 1620 i915_gem_get_gtt_alignment(struct drm_device *dev, 1621 uint32_t size, 1622 int tiling_mode) 1623 { 1624 /* 1625 * Minimum alignment is 4k (GTT page size), but might be greater 1626 * if a fence register is needed for the object. 1627 */ 1628 if (INTEL_INFO(dev)->gen >= 4 || 1629 tiling_mode == I915_TILING_NONE) 1630 return 4096; 1631 1632 /* 1633 * Previous chips need to be aligned to the size of the smallest 1634 * fence register that can contain the object. 1635 */ 1636 return i915_gem_get_gtt_size(dev, size, tiling_mode); 1637 } 1638 1639 /** 1640 * i915_gem_get_unfenced_gtt_alignment - return required GTT alignment for an 1641 * unfenced object 1642 * @dev: the device 1643 * @size: size of the object 1644 * @tiling_mode: tiling mode of the object 1645 * 1646 * Return the required GTT alignment for an object, only taking into account 1647 * unfenced tiled surface requirements. 1648 */ 1649 uint32_t 1650 i915_gem_get_unfenced_gtt_alignment(struct drm_device *dev, 1651 uint32_t size, 1652 int tiling_mode) 1653 { 1654 /* 1655 * Minimum alignment is 4k (GTT page size) for sane hw. 1656 */ 1657 if (INTEL_INFO(dev)->gen >= 4 || IS_G33(dev) || 1658 tiling_mode == I915_TILING_NONE) 1659 return 4096; 1660 1661 /* Previous hardware however needs to be aligned to a power-of-two 1662 * tile height. The simplest method for determining this is to reuse 1663 * the power-of-tile object size. 1664 */ 1665 return i915_gem_get_gtt_size(dev, size, tiling_mode); 1666 } 1667 1668 static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj) 1669 { 1670 #if 0 1671 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 1672 #endif 1673 int ret; 1674 1675 if (obj->base.map) 1676 return 0; 1677 1678 #if 0 1679 dev_priv->mm.shrinker_no_lock_stealing = true; 1680 #endif 1681 1682 ret = drm_gem_create_mmap_offset(&obj->base); 1683 #if 0 1684 if (ret != -ENOSPC) 1685 goto out; 1686 1687 /* Badly fragmented mmap space? The only way we can recover 1688 * space is by destroying unwanted objects. We can't randomly release 1689 * mmap_offsets as userspace expects them to be persistent for the 1690 * lifetime of the objects. The closest we can is to release the 1691 * offsets on purgeable objects by truncating it and marking it purged, 1692 * which prevents userspace from ever using that object again. 1693 */ 1694 i915_gem_purge(dev_priv, obj->base.size >> PAGE_SHIFT); 1695 ret = drm_gem_create_mmap_offset(&obj->base); 1696 if (ret != -ENOSPC) 1697 goto out; 1698 1699 i915_gem_shrink_all(dev_priv); 1700 ret = drm_gem_create_mmap_offset(&obj->base); 1701 out: 1702 dev_priv->mm.shrinker_no_lock_stealing = false; 1703 #endif 1704 1705 return ret; 1706 } 1707 1708 static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj) 1709 { 1710 if (!obj->base.map) 1711 return; 1712 1713 drm_gem_free_mmap_offset(&obj->base); 1714 } 1715 1716 int 1717 i915_gem_mmap_gtt(struct drm_file *file, 1718 struct drm_device *dev, 1719 uint32_t handle, 1720 uint64_t *offset) 1721 { 1722 struct drm_i915_private *dev_priv = dev->dev_private; 1723 struct drm_i915_gem_object *obj; 1724 int ret; 1725 1726 ret = i915_mutex_lock_interruptible(dev); 1727 if (ret) 1728 return ret; 1729 1730 obj = to_intel_bo(drm_gem_object_lookup(dev, file, handle)); 1731 if (&obj->base == NULL) { 1732 ret = -ENOENT; 1733 goto unlock; 1734 } 1735 1736 if (obj->base.size > dev_priv->mm.gtt_mappable_end) { 1737 ret = -E2BIG; 1738 goto out; 1739 } 1740 1741 if (obj->madv != I915_MADV_WILLNEED) { 1742 DRM_ERROR("Attempting to mmap a purgeable buffer\n"); 1743 ret = -EINVAL; 1744 goto out; 1745 } 1746 1747 ret = i915_gem_object_create_mmap_offset(obj); 1748 if (ret) 1749 goto out; 1750 1751 *offset = (u64)obj->base.map->ext; 1752 1753 out: 1754 drm_gem_object_unreference(&obj->base); 1755 unlock: 1756 DRM_UNLOCK(); 1757 return ret; 1758 } 1759 1760 /** 1761 * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing 1762 * @dev: DRM device 1763 * @data: GTT mapping ioctl data 1764 * @file: GEM object info 1765 * 1766 * Simply returns the fake offset to userspace so it can mmap it. 1767 * The mmap call will end up in drm_gem_mmap(), which will set things 1768 * up so we can get faults in the handler above. 1769 * 1770 * The fault handler will take care of binding the object into the GTT 1771 * (since it may have been evicted to make room for something), allocating 1772 * a fence register, and mapping the appropriate aperture address into 1773 * userspace. 1774 */ 1775 int 1776 i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data, 1777 struct drm_file *file) 1778 { 1779 struct drm_i915_gem_mmap_gtt *args = data; 1780 1781 return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset); 1782 } 1783 1784 /* Immediately discard the backing storage */ 1785 static void 1786 i915_gem_object_truncate(struct drm_i915_gem_object *obj) 1787 { 1788 i915_gem_object_free_mmap_offset(obj); 1789 1790 obj->base.uao->pgops->pgo_flush(obj->base.uao, 0, obj->base.size, 1791 PGO_ALLPAGES | PGO_FREE); 1792 1793 obj->madv = __I915_MADV_PURGED; 1794 } 1795 1796 // i915_gem_object_is_purgeable 1797 1798 static void 1799 i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj) 1800 { 1801 int page_count = obj->base.size / PAGE_SIZE; 1802 #ifdef __linux__ 1803 struct scatterlist *sg; 1804 #endif 1805 int ret, i; 1806 1807 BUG_ON(obj->madv == __I915_MADV_PURGED); 1808 1809 ret = i915_gem_object_set_to_cpu_domain(obj, true); 1810 if (ret) { 1811 /* In the event of a disaster, abandon all caches and 1812 * hope for the best. 1813 */ 1814 WARN_ON(ret != -EIO); 1815 i915_gem_clflush_object(obj); 1816 obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU; 1817 } 1818 1819 if (i915_gem_object_needs_bit17_swizzle(obj)) 1820 i915_gem_object_save_bit_17_swizzle(obj); 1821 1822 if (obj->madv == I915_MADV_DONTNEED) 1823 obj->dirty = 0; 1824 1825 #ifdef __linux__ 1826 for_each_sg(obj->pages->sgl, sg, page_count, i) { 1827 struct page *page = sg_page(sg); 1828 1829 if (obj->dirty) 1830 set_page_dirty(page); 1831 1832 if (obj->madv == I915_MADV_WILLNEED) 1833 mark_page_accessed(page); 1834 1835 page_cache_release(page); 1836 } 1837 #else 1838 for (i = 0; i < page_count; i++) { 1839 struct vm_page *page = obj->pages[i]; 1840 1841 if (obj->dirty) 1842 atomic_clearbits_int(&page->pg_flags, PG_CLEAN); 1843 } 1844 uvm_objunwire(obj->base.uao, 0, obj->base.size); 1845 #endif 1846 obj->dirty = 0; 1847 1848 #ifdef __linux__ 1849 sg_free_table(obj->pages); 1850 #endif 1851 kfree(obj->pages); 1852 } 1853 1854 static int 1855 i915_gem_object_put_pages(struct drm_i915_gem_object *obj) 1856 { 1857 const struct drm_i915_gem_object_ops *ops = obj->ops; 1858 1859 if (obj->pages == NULL) 1860 return 0; 1861 1862 BUG_ON(obj->gtt_space); 1863 1864 if (obj->pages_pin_count) 1865 return -EBUSY; 1866 1867 /* ->put_pages might need to allocate memory for the bit17 swizzle 1868 * array, hence protect them from being reaped by removing them from gtt 1869 * lists early. */ 1870 list_del(&obj->gtt_list); 1871 1872 ops->put_pages(obj); 1873 obj->pages = NULL; 1874 1875 if (i915_gem_object_is_purgeable(obj)) 1876 i915_gem_object_truncate(obj); 1877 1878 return 0; 1879 } 1880 1881 #ifdef notyet 1882 static long 1883 __i915_gem_shrink(struct drm_i915_private *dev_priv, long target, 1884 bool purgeable_only) 1885 { 1886 struct drm_i915_gem_object *obj, *next; 1887 long count = 0; 1888 1889 list_for_each_entry_safe(obj, next, 1890 &dev_priv->mm.unbound_list, 1891 gtt_list) { 1892 if ((i915_gem_object_is_purgeable(obj) || !purgeable_only) && 1893 i915_gem_object_put_pages(obj) == 0) { 1894 count += obj->base.size >> PAGE_SHIFT; 1895 if (count >= target) 1896 return count; 1897 } 1898 } 1899 1900 list_for_each_entry_safe(obj, next, 1901 &dev_priv->mm.inactive_list, 1902 mm_list) { 1903 if ((i915_gem_object_is_purgeable(obj) || !purgeable_only) && 1904 i915_gem_object_unbind(obj) == 0 && 1905 i915_gem_object_put_pages(obj) == 0) { 1906 count += obj->base.size >> PAGE_SHIFT; 1907 if (count >= target) 1908 return count; 1909 } 1910 } 1911 1912 return count; 1913 } 1914 1915 static long 1916 i915_gem_purge(struct drm_i915_private *dev_priv, long target) 1917 { 1918 return __i915_gem_shrink(dev_priv, target, true); 1919 } 1920 1921 static void 1922 i915_gem_shrink_all(struct drm_i915_private *dev_priv) 1923 { 1924 struct drm_i915_gem_object *obj, *next; 1925 1926 i915_gem_evict_everything(dev_priv->dev); 1927 1928 list_for_each_entry_safe(obj, next, &dev_priv->mm.unbound_list, gtt_list) 1929 i915_gem_object_put_pages(obj); 1930 } 1931 #endif /* notyet */ 1932 1933 static int 1934 i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) 1935 { 1936 #ifdef __linux__ 1937 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 1938 int page_count, i; 1939 struct address_space *mapping; 1940 struct sg_table *st; 1941 struct scatterlist *sg; 1942 struct page *page; 1943 gfp_t gfp; 1944 #else 1945 int page_count, i; 1946 struct vm_page **st; 1947 struct pglist plist; 1948 struct vm_page *page; 1949 #endif 1950 1951 /* Assert that the object is not currently in any GPU domain. As it 1952 * wasn't in the GTT, there shouldn't be any way it could have been in 1953 * a GPU cache 1954 */ 1955 BUG_ON(obj->base.read_domains & I915_GEM_GPU_DOMAINS); 1956 BUG_ON(obj->base.write_domain & I915_GEM_GPU_DOMAINS); 1957 1958 #ifdef __linux__ 1959 st = kmalloc(sizeof(*st), GFP_KERNEL); 1960 if (st == NULL) 1961 return -ENOMEM; 1962 1963 page_count = obj->base.size / PAGE_SIZE; 1964 if (sg_alloc_table(st, page_count, GFP_KERNEL)) { 1965 sg_free_table(st); 1966 kfree(st); 1967 return -ENOMEM; 1968 } 1969 1970 /* Get the list of pages out of our struct file. They'll be pinned 1971 * at this point until we release them. 1972 * 1973 * Fail silently without starting the shrinker 1974 */ 1975 mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping; 1976 gfp = mapping_gfp_mask(mapping); 1977 gfp |= __GFP_NORETRY | __GFP_NOWARN | __GFP_NO_KSWAPD; 1978 gfp &= ~(__GFP_IO | __GFP_WAIT); 1979 for_each_sg(st->sgl, sg, page_count, i) { 1980 page = shmem_read_mapping_page_gfp(mapping, i, gfp); 1981 if (IS_ERR(page)) { 1982 i915_gem_purge(dev_priv, page_count); 1983 page = shmem_read_mapping_page_gfp(mapping, i, gfp); 1984 } 1985 if (IS_ERR(page)) { 1986 /* We've tried hard to allocate the memory by reaping 1987 * our own buffer, now let the real VM do its job and 1988 * go down in flames if truly OOM. 1989 */ 1990 gfp &= ~(__GFP_NORETRY | __GFP_NOWARN | __GFP_NO_KSWAPD); 1991 gfp |= __GFP_IO | __GFP_WAIT; 1992 1993 i915_gem_shrink_all(dev_priv); 1994 page = shmem_read_mapping_page_gfp(mapping, i, gfp); 1995 if (IS_ERR(page)) 1996 goto err_pages; 1997 1998 gfp |= __GFP_NORETRY | __GFP_NOWARN | __GFP_NO_KSWAPD; 1999 gfp &= ~(__GFP_IO | __GFP_WAIT); 2000 } 2001 2002 sg_set_page(sg, page, PAGE_SIZE, 0); 2003 } 2004 2005 obj->pages = st; 2006 #else 2007 page_count = obj->base.size / PAGE_SIZE; 2008 st = malloc(page_count * sizeof(struct vm_page *), M_DRM, 2009 M_WAITOK | M_CANFAIL); 2010 if (st == NULL) 2011 return -ENOMEM; 2012 2013 TAILQ_INIT(&plist); 2014 if (uvm_objwire(obj->base.uao, 0, obj->base.size, &plist)) 2015 goto err_pages; 2016 2017 i = 0; 2018 TAILQ_FOREACH(page, &plist, pageq) { 2019 st[i] = page; 2020 i++; 2021 } 2022 obj->pages = st; 2023 #endif 2024 2025 if (i915_gem_object_needs_bit17_swizzle(obj)) 2026 i915_gem_object_do_bit_17_swizzle(obj); 2027 2028 return 0; 2029 2030 #ifdef __linux__ 2031 err_pages: 2032 for_each_sg(st->sgl, sg, i, page_count) 2033 page_cache_release(sg_page(sg)); 2034 sg_free_table(st); 2035 kfree(st); 2036 return PTR_ERR(page); 2037 #else 2038 err_pages: 2039 free(st, M_DRM, 0); 2040 return -ENOMEM; 2041 #endif 2042 } 2043 2044 /* Ensure that the associated pages are gathered from the backing storage 2045 * and pinned into our object. i915_gem_object_get_pages() may be called 2046 * multiple times before they are released by a single call to 2047 * i915_gem_object_put_pages() - once the pages are no longer referenced 2048 * either as a result of memory pressure (reaping pages under the shrinker) 2049 * or as the object is itself released. 2050 */ 2051 int 2052 i915_gem_object_get_pages(struct drm_i915_gem_object *obj) 2053 { 2054 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 2055 const struct drm_i915_gem_object_ops *ops = obj->ops; 2056 int ret; 2057 2058 if (obj->pages) 2059 return 0; 2060 2061 BUG_ON(obj->pages_pin_count); 2062 2063 ret = ops->get_pages(obj); 2064 if (ret) 2065 return ret; 2066 2067 list_add_tail(&obj->gtt_list, &dev_priv->mm.unbound_list); 2068 return 0; 2069 } 2070 2071 void 2072 i915_gem_object_move_to_active(struct drm_i915_gem_object *obj, 2073 struct intel_ring_buffer *ring) 2074 { 2075 struct drm_device *dev = obj->base.dev; 2076 struct drm_i915_private *dev_priv = dev->dev_private; 2077 u32 seqno = intel_ring_get_seqno(ring); 2078 2079 BUG_ON(ring == NULL); 2080 if (obj->ring != ring && obj->last_write_seqno) { 2081 /* Keep the seqno relative to the current ring */ 2082 obj->last_write_seqno = seqno; 2083 } 2084 obj->ring = ring; 2085 2086 /* Add a reference if we're newly entering the active list. */ 2087 if (!obj->active) { 2088 drm_gem_object_reference(&obj->base); 2089 obj->active = 1; 2090 } 2091 2092 /* Move from whatever list we were on to the tail of execution. */ 2093 list_move_tail(&obj->mm_list, &dev_priv->mm.active_list); 2094 list_move_tail(&obj->ring_list, &ring->active_list); 2095 2096 obj->last_read_seqno = seqno; 2097 2098 if (obj->fenced_gpu_access) { 2099 obj->last_fenced_seqno = seqno; 2100 2101 /* Bump MRU to take account of the delayed flush */ 2102 if (obj->fence_reg != I915_FENCE_REG_NONE) { 2103 struct drm_i915_fence_reg *reg; 2104 2105 reg = &dev_priv->fence_regs[obj->fence_reg]; 2106 list_move_tail(®->lru_list, 2107 &dev_priv->mm.fence_list); 2108 } 2109 } 2110 } 2111 2112 static void 2113 i915_gem_object_move_to_inactive(struct drm_i915_gem_object *obj) 2114 { 2115 struct drm_device *dev = obj->base.dev; 2116 struct drm_i915_private *dev_priv = dev->dev_private; 2117 2118 BUG_ON(obj->base.write_domain & ~I915_GEM_GPU_DOMAINS); 2119 BUG_ON(!obj->active); 2120 2121 list_move_tail(&obj->mm_list, &dev_priv->mm.inactive_list); 2122 2123 list_del_init(&obj->ring_list); 2124 obj->ring = NULL; 2125 2126 obj->last_read_seqno = 0; 2127 obj->last_write_seqno = 0; 2128 obj->base.write_domain = 0; 2129 2130 obj->last_fenced_seqno = 0; 2131 obj->fenced_gpu_access = false; 2132 2133 obj->active = 0; 2134 drm_gem_object_unreference(&obj->base); 2135 2136 WARN_ON(i915_verify_lists(dev)); 2137 } 2138 2139 static int 2140 i915_gem_handle_seqno_wrap(struct drm_device *dev) 2141 { 2142 struct drm_i915_private *dev_priv = dev->dev_private; 2143 struct intel_ring_buffer *ring; 2144 int ret, i, j; 2145 2146 /* The hardware uses various monotonic 32-bit counters, if we 2147 * detect that they will wraparound we need to idle the GPU 2148 * and reset those counters. 2149 */ 2150 ret = 0; 2151 for_each_ring(ring, dev_priv, i) { 2152 for (j = 0; j < ARRAY_SIZE(ring->sync_seqno); j++) 2153 ret |= ring->sync_seqno[j] != 0; 2154 } 2155 if (ret == 0) 2156 return ret; 2157 2158 ret = i915_gpu_idle(dev); 2159 if (ret) 2160 return ret; 2161 2162 i915_gem_retire_requests(dev); 2163 for_each_ring(ring, dev_priv, i) { 2164 for (j = 0; j < ARRAY_SIZE(ring->sync_seqno); j++) 2165 ring->sync_seqno[j] = 0; 2166 } 2167 2168 return 0; 2169 } 2170 2171 int 2172 i915_gem_get_seqno(struct drm_device *dev, u32 *seqno) 2173 { 2174 struct drm_i915_private *dev_priv = dev->dev_private; 2175 2176 /* reserve 0 for non-seqno */ 2177 if (dev_priv->next_seqno == 0) { 2178 int ret = i915_gem_handle_seqno_wrap(dev); 2179 if (ret) 2180 return ret; 2181 2182 dev_priv->next_seqno = 1; 2183 } 2184 2185 *seqno = dev_priv->next_seqno++; 2186 return 0; 2187 } 2188 2189 int 2190 i915_add_request(struct intel_ring_buffer *ring, 2191 struct drm_file *file, 2192 u32 *out_seqno) 2193 { 2194 drm_i915_private_t *dev_priv = ring->dev->dev_private; 2195 struct drm_i915_gem_request *request; 2196 u32 request_ring_position; 2197 int was_empty; 2198 int ret; 2199 2200 /* 2201 * Emit any outstanding flushes - execbuf can fail to emit the flush 2202 * after having emitted the batchbuffer command. Hence we need to fix 2203 * things up similar to emitting the lazy request. The difference here 2204 * is that the flush _must_ happen before the next request, no matter 2205 * what. 2206 */ 2207 ret = intel_ring_flush_all_caches(ring); 2208 if (ret) 2209 return ret; 2210 2211 request = kmalloc(sizeof(*request), GFP_KERNEL); 2212 if (request == NULL) 2213 return -ENOMEM; 2214 2215 2216 /* Record the position of the start of the request so that 2217 * should we detect the updated seqno part-way through the 2218 * GPU processing the request, we never over-estimate the 2219 * position of the head. 2220 */ 2221 request_ring_position = intel_ring_get_tail(ring); 2222 2223 ret = ring->add_request(ring); 2224 if (ret) { 2225 kfree(request); 2226 return ret; 2227 } 2228 2229 request->seqno = intel_ring_get_seqno(ring); 2230 request->ring = ring; 2231 request->tail = request_ring_position; 2232 request->emitted_ticks = ticks; 2233 was_empty = list_empty(&ring->request_list); 2234 list_add_tail(&request->list, &ring->request_list); 2235 request->file_priv = NULL; 2236 2237 if (file) { 2238 struct drm_i915_file_private *file_priv = file->driver_priv; 2239 2240 mtx_enter(&file_priv->mm.lock); 2241 request->file_priv = file_priv; 2242 list_add_tail(&request->client_list, 2243 &file_priv->mm.request_list); 2244 mtx_leave(&file_priv->mm.lock); 2245 } 2246 2247 trace_i915_gem_request_add(ring, request->seqno); 2248 ring->outstanding_lazy_request = 0; 2249 2250 if (!dev_priv->mm.suspended) { 2251 if (i915_enable_hangcheck) { 2252 timeout_add_msec(&dev_priv->hangcheck_timer, 2253 DRM_I915_HANGCHECK_PERIOD); 2254 } 2255 if (was_empty) { 2256 timeout_add_sec(&dev_priv->mm.retire_timer, 1); 2257 intel_mark_busy(ring->dev); 2258 } 2259 } 2260 2261 if (out_seqno) 2262 *out_seqno = request->seqno; 2263 return 0; 2264 } 2265 2266 static inline void 2267 i915_gem_request_remove_from_client(struct drm_i915_gem_request *request) 2268 { 2269 struct drm_i915_file_private *file_priv = request->file_priv; 2270 2271 if (!file_priv) 2272 return; 2273 2274 mtx_enter(&file_priv->mm.lock); 2275 if (request->file_priv) { 2276 list_del(&request->client_list); 2277 request->file_priv = NULL; 2278 } 2279 mtx_leave(&file_priv->mm.lock); 2280 } 2281 2282 static void i915_gem_reset_ring_lists(struct drm_i915_private *dev_priv, 2283 struct intel_ring_buffer *ring) 2284 { 2285 while (!list_empty(&ring->request_list)) { 2286 struct drm_i915_gem_request *request; 2287 2288 request = list_first_entry(&ring->request_list, 2289 struct drm_i915_gem_request, 2290 list); 2291 2292 list_del(&request->list); 2293 i915_gem_request_remove_from_client(request); 2294 kfree(request); 2295 } 2296 2297 while (!list_empty(&ring->active_list)) { 2298 struct drm_i915_gem_object *obj; 2299 2300 obj = list_first_entry(&ring->active_list, 2301 struct drm_i915_gem_object, 2302 ring_list); 2303 2304 i915_gem_object_move_to_inactive(obj); 2305 } 2306 } 2307 2308 static void i915_gem_reset_fences(struct drm_device *dev) 2309 { 2310 struct drm_i915_private *dev_priv = dev->dev_private; 2311 int i; 2312 2313 for (i = 0; i < dev_priv->num_fence_regs; i++) { 2314 struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i]; 2315 2316 i915_gem_write_fence(dev, i, NULL); 2317 2318 if (reg->obj) 2319 i915_gem_object_fence_lost(reg->obj); 2320 2321 reg->pin_count = 0; 2322 reg->obj = NULL; 2323 INIT_LIST_HEAD(®->lru_list); 2324 } 2325 2326 INIT_LIST_HEAD(&dev_priv->mm.fence_list); 2327 } 2328 2329 void i915_gem_reset(struct drm_device *dev) 2330 { 2331 struct drm_i915_private *dev_priv = dev->dev_private; 2332 struct drm_i915_gem_object *obj; 2333 struct intel_ring_buffer *ring; 2334 int i; 2335 2336 for_each_ring(ring, dev_priv, i) 2337 i915_gem_reset_ring_lists(dev_priv, ring); 2338 2339 /* Move everything out of the GPU domains to ensure we do any 2340 * necessary invalidation upon reuse. 2341 */ 2342 list_for_each_entry(obj, 2343 &dev_priv->mm.inactive_list, 2344 mm_list) 2345 { 2346 obj->base.read_domains &= ~I915_GEM_GPU_DOMAINS; 2347 } 2348 2349 /* The fence registers are invalidated so clear them out */ 2350 i915_gem_reset_fences(dev); 2351 } 2352 2353 /** 2354 * This function clears the request list as sequence numbers are passed. 2355 */ 2356 void 2357 i915_gem_retire_requests_ring(struct intel_ring_buffer *ring) 2358 { 2359 uint32_t seqno; 2360 2361 if (list_empty(&ring->request_list)) 2362 return; 2363 2364 WARN_ON(i915_verify_lists(ring->dev)); 2365 2366 seqno = ring->get_seqno(ring, true); 2367 2368 while (!list_empty(&ring->request_list)) { 2369 struct drm_i915_gem_request *request; 2370 2371 request = list_first_entry(&ring->request_list, 2372 struct drm_i915_gem_request, 2373 list); 2374 2375 if (!i915_seqno_passed(seqno, request->seqno)) 2376 break; 2377 2378 trace_i915_gem_request_retire(ring, request->seqno); 2379 /* We know the GPU must have read the request to have 2380 * sent us the seqno + interrupt, so use the position 2381 * of tail of the request to update the last known position 2382 * of the GPU head. 2383 */ 2384 ring->last_retired_head = request->tail; 2385 2386 list_del(&request->list); 2387 i915_gem_request_remove_from_client(request); 2388 kfree(request); 2389 } 2390 2391 /* Move any buffers on the active list that are no longer referenced 2392 * by the ringbuffer to the flushing/inactive lists as appropriate. 2393 */ 2394 while (!list_empty(&ring->active_list)) { 2395 struct drm_i915_gem_object *obj; 2396 2397 obj = list_first_entry(&ring->active_list, 2398 struct drm_i915_gem_object, 2399 ring_list); 2400 2401 if (!i915_seqno_passed(seqno, obj->last_read_seqno)) 2402 break; 2403 2404 i915_gem_object_move_to_inactive(obj); 2405 } 2406 2407 if (unlikely(ring->trace_irq_seqno && 2408 i915_seqno_passed(seqno, ring->trace_irq_seqno))) { 2409 ring->irq_put(ring); 2410 ring->trace_irq_seqno = 0; 2411 } 2412 2413 WARN_ON(i915_verify_lists(ring->dev)); 2414 } 2415 2416 void 2417 i915_gem_retire_requests(struct drm_device *dev) 2418 { 2419 drm_i915_private_t *dev_priv = dev->dev_private; 2420 struct intel_ring_buffer *ring; 2421 int i; 2422 2423 for_each_ring(ring, dev_priv, i) 2424 i915_gem_retire_requests_ring(ring); 2425 } 2426 2427 void 2428 i915_gem_retire_work_handler(void *arg1, void *unused) 2429 { 2430 drm_i915_private_t *dev_priv = arg1; 2431 struct drm_device *dev; 2432 struct intel_ring_buffer *ring; 2433 bool idle; 2434 int i; 2435 2436 dev = (struct drm_device *)dev_priv->drmdev; 2437 2438 /* Come back later if the device is busy... */ 2439 if (rw_enter(&dev->dev_lock, RW_NOSLEEP | RW_WRITE)) { 2440 timeout_add_sec(&dev_priv->mm.retire_timer, 1); 2441 return; 2442 } 2443 2444 i915_gem_retire_requests(dev); 2445 2446 /* Send a periodic flush down the ring so we don't hold onto GEM 2447 * objects indefinitely. 2448 */ 2449 idle = true; 2450 for_each_ring(ring, dev_priv, i) { 2451 if (ring->gpu_caches_dirty) 2452 i915_add_request(ring, NULL, NULL); 2453 2454 idle &= list_empty(&ring->request_list); 2455 } 2456 2457 if (!dev_priv->mm.suspended && !idle) 2458 timeout_add_sec(&dev_priv->mm.retire_timer, 1); 2459 if (idle) 2460 intel_mark_idle(dev); 2461 2462 DRM_UNLOCK(); 2463 } 2464 2465 /** 2466 * Ensures that an object will eventually get non-busy by flushing any required 2467 * write domains, emitting any outstanding lazy request and retiring and 2468 * completed requests. 2469 */ 2470 static int 2471 i915_gem_object_flush_active(struct drm_i915_gem_object *obj) 2472 { 2473 int ret; 2474 2475 if (obj->active) { 2476 ret = i915_gem_check_olr(obj->ring, obj->last_read_seqno); 2477 if (ret) 2478 return ret; 2479 2480 i915_gem_retire_requests_ring(obj->ring); 2481 } 2482 2483 return 0; 2484 } 2485 2486 /** 2487 * i915_gem_wait_ioctl - implements DRM_IOCTL_I915_GEM_WAIT 2488 * @DRM_IOCTL_ARGS: standard ioctl arguments 2489 * 2490 * Returns 0 if successful, else an error is returned with the remaining time in 2491 * the timeout parameter. 2492 * -ETIME: object is still busy after timeout 2493 * -ERESTARTSYS: signal interrupted the wait 2494 * -ENONENT: object doesn't exist 2495 * Also possible, but rare: 2496 * -EAGAIN: GPU wedged 2497 * -ENOMEM: damn 2498 * -ENODEV: Internal IRQ fail 2499 * -E?: The add request failed 2500 * 2501 * The wait ioctl with a timeout of 0 reimplements the busy ioctl. With any 2502 * non-zero timeout parameter the wait ioctl will wait for the given number of 2503 * nanoseconds on an object becoming unbusy. Since the wait itself does so 2504 * without holding struct_mutex the object may become re-busied before this 2505 * function completes. A similar but shorter * race condition exists in the busy 2506 * ioctl 2507 */ 2508 int 2509 i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) 2510 { 2511 struct drm_i915_gem_wait *args = data; 2512 struct drm_i915_gem_object *obj; 2513 struct intel_ring_buffer *ring = NULL; 2514 struct timespec timeout_stack, *timeout = NULL; 2515 u32 seqno = 0; 2516 int ret = 0; 2517 2518 if (args->timeout_ns >= 0) { 2519 timeout_stack = ns_to_timespec(args->timeout_ns); 2520 timeout = &timeout_stack; 2521 } 2522 2523 ret = i915_mutex_lock_interruptible(dev); 2524 if (ret) 2525 return ret; 2526 2527 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->bo_handle)); 2528 if (&obj->base == NULL) { 2529 DRM_UNLOCK(); 2530 return -ENOENT; 2531 } 2532 2533 /* Need to make sure the object gets inactive eventually. */ 2534 ret = i915_gem_object_flush_active(obj); 2535 if (ret) 2536 goto out; 2537 2538 if (obj->active) { 2539 seqno = obj->last_read_seqno; 2540 ring = obj->ring; 2541 } 2542 2543 if (seqno == 0) 2544 goto out; 2545 2546 /* Do this after OLR check to make sure we make forward progress polling 2547 * on this IOCTL with a 0 timeout (like busy ioctl) 2548 */ 2549 if (!args->timeout_ns) { 2550 ret = -ETIMEDOUT; 2551 goto out; 2552 } 2553 2554 drm_gem_object_unreference(&obj->base); 2555 DRM_UNLOCK(); 2556 2557 ret = __wait_seqno(ring, seqno, true, timeout); 2558 if (timeout) { 2559 WARN_ON(!timespec_valid(timeout)); 2560 args->timeout_ns = timespec_to_ns(timeout); 2561 } 2562 return ret; 2563 2564 out: 2565 drm_gem_object_unreference(&obj->base); 2566 DRM_UNLOCK(); 2567 return ret; 2568 } 2569 2570 /** 2571 * i915_gem_object_sync - sync an object to a ring. 2572 * 2573 * @obj: object which may be in use on another ring. 2574 * @to: ring we wish to use the object on. May be NULL. 2575 * 2576 * This code is meant to abstract object synchronization with the GPU. 2577 * Calling with NULL implies synchronizing the object with the CPU 2578 * rather than a particular GPU ring. 2579 * 2580 * Returns 0 if successful, else propagates up the lower layer error. 2581 */ 2582 int 2583 i915_gem_object_sync(struct drm_i915_gem_object *obj, 2584 struct intel_ring_buffer *to) 2585 { 2586 struct intel_ring_buffer *from = obj->ring; 2587 u32 seqno; 2588 int ret, idx; 2589 2590 if (from == NULL || to == from) 2591 return 0; 2592 2593 if (to == NULL || !i915_semaphore_is_enabled(obj->base.dev)) 2594 return i915_gem_object_wait_rendering(obj, false); 2595 2596 idx = intel_ring_sync_index(from, to); 2597 2598 seqno = obj->last_read_seqno; 2599 if (seqno <= from->sync_seqno[idx]) 2600 return 0; 2601 2602 ret = i915_gem_check_olr(obj->ring, seqno); 2603 if (ret) 2604 return ret; 2605 2606 ret = to->sync_to(to, from, seqno); 2607 if (!ret) 2608 /* We use last_read_seqno because sync_to() 2609 * might have just caused seqno wrap under 2610 * the radar. 2611 */ 2612 from->sync_seqno[idx] = obj->last_read_seqno; 2613 2614 return ret; 2615 } 2616 2617 static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj) 2618 { 2619 u32 old_write_domain, old_read_domains; 2620 2621 /* Act a barrier for all accesses through the GTT */ 2622 DRM_MEMORYBARRIER(); 2623 2624 /* Force a pagefault for domain tracking on next user access */ 2625 i915_gem_release_mmap(obj); 2626 2627 if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0) 2628 return; 2629 2630 old_read_domains = obj->base.read_domains; 2631 old_write_domain = obj->base.write_domain; 2632 2633 obj->base.read_domains &= ~I915_GEM_DOMAIN_GTT; 2634 obj->base.write_domain &= ~I915_GEM_DOMAIN_GTT; 2635 2636 trace_i915_gem_object_change_domain(obj, 2637 old_read_domains, 2638 old_write_domain); 2639 } 2640 2641 /** 2642 * Unbinds an object from the GTT aperture. 2643 */ 2644 int 2645 i915_gem_object_unbind(struct drm_i915_gem_object *obj) 2646 { 2647 drm_i915_private_t *dev_priv = obj->base.dev->dev_private; 2648 int ret = 0; 2649 2650 if (obj->gtt_space == NULL) 2651 return 0; 2652 2653 if (obj->pin_count) 2654 return -EBUSY; 2655 2656 BUG_ON(obj->pages == NULL); 2657 2658 ret = i915_gem_object_finish_gpu(obj); 2659 if (ret) 2660 return ret; 2661 /* Continue on if we fail due to EIO, the GPU is hung so we 2662 * should be safe and we need to cleanup or else we might 2663 * cause memory corruption through use-after-free. 2664 */ 2665 2666 i915_gem_object_finish_gtt(obj); 2667 2668 /* release the fence reg _after_ flushing */ 2669 ret = i915_gem_object_put_fence(obj); 2670 if (ret) 2671 return ret; 2672 2673 trace_i915_gem_object_unbind(obj); 2674 2675 if (obj->has_global_gtt_mapping) 2676 i915_gem_gtt_unbind_object(obj); 2677 #ifdef notyet 2678 if (obj->has_aliasing_ppgtt_mapping) { 2679 i915_ppgtt_unbind_object(dev_priv->mm.aliasing_ppgtt, obj); 2680 obj->has_aliasing_ppgtt_mapping = 0; 2681 } 2682 #endif 2683 i915_gem_gtt_finish_object(obj); 2684 2685 list_del(&obj->mm_list); 2686 list_move_tail(&obj->gtt_list, &dev_priv->mm.unbound_list); 2687 /* Avoid an unnecessary call to unbind on rebind. */ 2688 obj->map_and_fenceable = true; 2689 2690 drm_mm_put_block(obj->gtt_space); 2691 obj->gtt_space = NULL; 2692 obj->gtt_offset = 0; 2693 2694 /* XXX Until we've hooked up the shrinking functions. */ 2695 i915_gem_object_put_pages(obj); 2696 2697 return 0; 2698 } 2699 2700 int i915_gpu_idle(struct drm_device *dev) 2701 { 2702 drm_i915_private_t *dev_priv = dev->dev_private; 2703 struct intel_ring_buffer *ring; 2704 int ret, i; 2705 2706 /* Flush everything onto the inactive list. */ 2707 for_each_ring(ring, dev_priv, i) { 2708 ret = i915_switch_context(ring, NULL, DEFAULT_CONTEXT_ID); 2709 if (ret) 2710 return ret; 2711 2712 ret = intel_ring_idle(ring); 2713 if (ret) 2714 return ret; 2715 } 2716 2717 return 0; 2718 } 2719 2720 static void i965_write_fence_reg(struct drm_device *dev, int reg, 2721 struct drm_i915_gem_object *obj) 2722 { 2723 drm_i915_private_t *dev_priv = dev->dev_private; 2724 int fence_reg; 2725 int fence_pitch_shift; 2726 2727 if (INTEL_INFO(dev)->gen >= 6) { 2728 fence_reg = FENCE_REG_SANDYBRIDGE_0; 2729 fence_pitch_shift = SANDYBRIDGE_FENCE_PITCH_SHIFT; 2730 } else { 2731 fence_reg = FENCE_REG_965_0; 2732 fence_pitch_shift = I965_FENCE_PITCH_SHIFT; 2733 } 2734 2735 fence_reg += reg * 8; 2736 2737 /* To w/a incoherency with non-atomic 64-bit register updates, 2738 * we split the 64-bit update into two 32-bit writes. In order 2739 * for a partial fence not to be evaluated between writes, we 2740 * precede the update with write to turn off the fence register, 2741 * and only enable the fence as the last step. 2742 * 2743 * For extra levels of paranoia, we make sure each step lands 2744 * before applying the next step. 2745 */ 2746 I915_WRITE(fence_reg, 0); 2747 POSTING_READ(fence_reg); 2748 2749 if (obj) { 2750 u32 size = obj->gtt_space->size; 2751 uint64_t val; 2752 2753 val = (uint64_t)((obj->gtt_offset + size - 4096) & 2754 0xfffff000) << 32; 2755 val |= obj->gtt_offset & 0xfffff000; 2756 val |= (uint64_t)((obj->stride / 128) - 1) << fence_pitch_shift; 2757 if (obj->tiling_mode == I915_TILING_Y) 2758 val |= 1 << I965_FENCE_TILING_Y_SHIFT; 2759 val |= I965_FENCE_REG_VALID; 2760 2761 I915_WRITE(fence_reg + 4, val >> 32); 2762 POSTING_READ(fence_reg + 4); 2763 2764 I915_WRITE(fence_reg + 0, val); 2765 POSTING_READ(fence_reg); 2766 } else { 2767 I915_WRITE(fence_reg + 4, 0); 2768 POSTING_READ(fence_reg + 4); 2769 } 2770 } 2771 2772 static void i915_write_fence_reg(struct drm_device *dev, int reg, 2773 struct drm_i915_gem_object *obj) 2774 { 2775 drm_i915_private_t *dev_priv = dev->dev_private; 2776 u32 val; 2777 2778 if (obj) { 2779 u32 size = obj->gtt_space->size; 2780 int pitch_val; 2781 int tile_width; 2782 2783 WARN((obj->gtt_offset & ~I915_FENCE_START_MASK) || 2784 (size & -size) != size || 2785 (obj->gtt_offset & (size - 1)), 2786 "object 0x%08x [fenceable? %d] not 1M or pot-size (0x%08x) aligned\n", 2787 obj->gtt_offset, obj->map_and_fenceable, size); 2788 2789 if (obj->tiling_mode == I915_TILING_Y && HAS_128_BYTE_Y_TILING(dev)) 2790 tile_width = 128; 2791 else 2792 tile_width = 512; 2793 2794 /* Note: pitch better be a power of two tile widths */ 2795 pitch_val = obj->stride / tile_width; 2796 pitch_val = ffs(pitch_val) - 1; 2797 2798 val = obj->gtt_offset; 2799 if (obj->tiling_mode == I915_TILING_Y) 2800 val |= 1 << I830_FENCE_TILING_Y_SHIFT; 2801 val |= I915_FENCE_SIZE_BITS(size); 2802 val |= pitch_val << I830_FENCE_PITCH_SHIFT; 2803 val |= I830_FENCE_REG_VALID; 2804 } else 2805 val = 0; 2806 2807 if (reg < 8) 2808 reg = FENCE_REG_830_0 + reg * 4; 2809 else 2810 reg = FENCE_REG_945_8 + (reg - 8) * 4; 2811 2812 I915_WRITE(reg, val); 2813 POSTING_READ(reg); 2814 } 2815 2816 static void i830_write_fence_reg(struct drm_device *dev, int reg, 2817 struct drm_i915_gem_object *obj) 2818 { 2819 drm_i915_private_t *dev_priv = dev->dev_private; 2820 uint32_t val; 2821 2822 if (obj) { 2823 u32 size = obj->gtt_space->size; 2824 uint32_t pitch_val; 2825 2826 WARN((obj->gtt_offset & ~I830_FENCE_START_MASK) || 2827 (size & -size) != size || 2828 (obj->gtt_offset & (size - 1)), 2829 "object 0x%08x not 512K or pot-size 0x%08x aligned\n", 2830 obj->gtt_offset, size); 2831 2832 pitch_val = obj->stride / 128; 2833 pitch_val = ffs(pitch_val) - 1; 2834 2835 val = obj->gtt_offset; 2836 if (obj->tiling_mode == I915_TILING_Y) 2837 val |= 1 << I830_FENCE_TILING_Y_SHIFT; 2838 val |= I830_FENCE_SIZE_BITS(size); 2839 val |= pitch_val << I830_FENCE_PITCH_SHIFT; 2840 val |= I830_FENCE_REG_VALID; 2841 } else 2842 val = 0; 2843 2844 I915_WRITE(FENCE_REG_830_0 + reg * 4, val); 2845 POSTING_READ(FENCE_REG_830_0 + reg * 4); 2846 } 2847 2848 static void i915_gem_write_fence(struct drm_device *dev, int reg, 2849 struct drm_i915_gem_object *obj) 2850 { 2851 switch (INTEL_INFO(dev)->gen) { 2852 case 7: 2853 case 6: 2854 case 5: 2855 case 4: i965_write_fence_reg(dev, reg, obj); break; 2856 case 3: i915_write_fence_reg(dev, reg, obj); break; 2857 case 2: i830_write_fence_reg(dev, reg, obj); break; 2858 default: break; 2859 } 2860 } 2861 2862 static inline int fence_number(struct drm_i915_private *dev_priv, 2863 struct drm_i915_fence_reg *fence) 2864 { 2865 return fence - dev_priv->fence_regs; 2866 } 2867 2868 static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj, 2869 struct drm_i915_fence_reg *fence, 2870 bool enable) 2871 { 2872 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 2873 int reg = fence_number(dev_priv, fence); 2874 2875 i915_gem_write_fence(obj->base.dev, reg, enable ? obj : NULL); 2876 2877 if (enable) { 2878 obj->fence_reg = reg; 2879 fence->obj = obj; 2880 list_move_tail(&fence->lru_list, &dev_priv->mm.fence_list); 2881 } else { 2882 obj->fence_reg = I915_FENCE_REG_NONE; 2883 fence->obj = NULL; 2884 list_del_init(&fence->lru_list); 2885 } 2886 } 2887 2888 static int 2889 i915_gem_object_flush_fence(struct drm_i915_gem_object *obj) 2890 { 2891 if (obj->last_fenced_seqno) { 2892 int ret = i915_wait_seqno(obj->ring, obj->last_fenced_seqno); 2893 if (ret) 2894 return ret; 2895 2896 obj->last_fenced_seqno = 0; 2897 } 2898 2899 /* Ensure that all CPU reads are completed before installing a fence 2900 * and all writes before removing the fence. 2901 */ 2902 if (obj->base.read_domains & I915_GEM_DOMAIN_GTT) 2903 DRM_WRITEMEMORYBARRIER(); 2904 2905 obj->fenced_gpu_access = false; 2906 return 0; 2907 } 2908 2909 int 2910 i915_gem_object_put_fence(struct drm_i915_gem_object *obj) 2911 { 2912 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 2913 int ret; 2914 2915 ret = i915_gem_object_flush_fence(obj); 2916 if (ret) 2917 return ret; 2918 2919 if (obj->fence_reg == I915_FENCE_REG_NONE) 2920 return 0; 2921 2922 i915_gem_object_update_fence(obj, 2923 &dev_priv->fence_regs[obj->fence_reg], 2924 false); 2925 i915_gem_object_fence_lost(obj); 2926 2927 return 0; 2928 } 2929 2930 static struct drm_i915_fence_reg * 2931 i915_find_fence_reg(struct drm_device *dev) 2932 { 2933 struct drm_i915_private *dev_priv = dev->dev_private; 2934 struct drm_i915_fence_reg *reg, *avail; 2935 int i; 2936 2937 /* First try to find a free reg */ 2938 avail = NULL; 2939 for (i = dev_priv->fence_reg_start; i < dev_priv->num_fence_regs; i++) { 2940 reg = &dev_priv->fence_regs[i]; 2941 if (!reg->obj) 2942 return reg; 2943 2944 if (!reg->pin_count) 2945 avail = reg; 2946 } 2947 2948 if (avail == NULL) 2949 return NULL; 2950 2951 /* None available, try to steal one or wait for a user to finish */ 2952 list_for_each_entry(reg, &dev_priv->mm.fence_list, lru_list) { 2953 if (reg->pin_count) 2954 continue; 2955 2956 return reg; 2957 } 2958 2959 return NULL; 2960 } 2961 2962 /** 2963 * i915_gem_object_get_fence - set up fencing for an object 2964 * @obj: object to map through a fence reg 2965 * 2966 * When mapping objects through the GTT, userspace wants to be able to write 2967 * to them without having to worry about swizzling if the object is tiled. 2968 * This function walks the fence regs looking for a free one for @obj, 2969 * stealing one if it can't find any. 2970 * 2971 * It then sets up the reg based on the object's properties: address, pitch 2972 * and tiling format. 2973 * 2974 * For an untiled surface, this removes any existing fence. 2975 */ 2976 int 2977 i915_gem_object_get_fence(struct drm_i915_gem_object *obj) 2978 { 2979 struct drm_device *dev = obj->base.dev; 2980 struct drm_i915_private *dev_priv = dev->dev_private; 2981 bool enable = obj->tiling_mode != I915_TILING_NONE; 2982 struct drm_i915_fence_reg *reg; 2983 int ret; 2984 2985 /* Have we updated the tiling parameters upon the object and so 2986 * will need to serialise the write to the associated fence register? 2987 */ 2988 if (obj->fence_dirty) { 2989 ret = i915_gem_object_flush_fence(obj); 2990 if (ret) 2991 return ret; 2992 } 2993 2994 /* Just update our place in the LRU if our fence is getting reused. */ 2995 if (obj->fence_reg != I915_FENCE_REG_NONE) { 2996 reg = &dev_priv->fence_regs[obj->fence_reg]; 2997 if (!obj->fence_dirty) { 2998 list_move_tail(®->lru_list, 2999 &dev_priv->mm.fence_list); 3000 return 0; 3001 } 3002 } else if (enable) { 3003 reg = i915_find_fence_reg(dev); 3004 if (reg == NULL) 3005 return -EDEADLK; 3006 3007 if (reg->obj) { 3008 struct drm_i915_gem_object *old = reg->obj; 3009 3010 ret = i915_gem_object_flush_fence(old); 3011 if (ret) 3012 return ret; 3013 3014 i915_gem_object_fence_lost(old); 3015 } 3016 } else 3017 return 0; 3018 3019 i915_gem_object_update_fence(obj, reg, enable); 3020 obj->fence_dirty = false; 3021 3022 return 0; 3023 } 3024 3025 static bool i915_gem_valid_gtt_space(struct drm_device *dev, 3026 struct drm_mm_node *gtt_space, 3027 unsigned long cache_level) 3028 { 3029 struct drm_mm_node *other; 3030 3031 /* On non-LLC machines we have to be careful when putting differing 3032 * types of snoopable memory together to avoid the prefetcher 3033 * crossing memory domains and dieing. 3034 */ 3035 if (HAS_LLC(dev)) 3036 return true; 3037 3038 if (gtt_space == NULL) 3039 return true; 3040 3041 if (list_empty(>t_space->node_list)) 3042 return true; 3043 3044 other = list_entry(gtt_space->node_list.prev, struct drm_mm_node, node_list); 3045 if (other->allocated && !other->hole_follows && other->color != cache_level) 3046 return false; 3047 3048 other = list_entry(gtt_space->node_list.next, struct drm_mm_node, node_list); 3049 if (other->allocated && !gtt_space->hole_follows && other->color != cache_level) 3050 return false; 3051 3052 return true; 3053 } 3054 3055 static void i915_gem_verify_gtt(struct drm_device *dev) 3056 { 3057 } 3058 3059 /** 3060 * Finds free space in the GTT aperture and binds the object there. 3061 */ 3062 static int 3063 i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj, 3064 unsigned alignment, 3065 bool map_and_fenceable, 3066 bool nonblocking) 3067 { 3068 struct drm_device *dev = obj->base.dev; 3069 drm_i915_private_t *dev_priv = dev->dev_private; 3070 struct drm_mm_node *node; 3071 u32 size, fence_size, fence_alignment, unfenced_alignment; 3072 bool mappable, fenceable; 3073 int ret; 3074 3075 if (obj->madv != I915_MADV_WILLNEED) { 3076 DRM_ERROR("Attempting to bind a purgeable object\n"); 3077 return -EINVAL; 3078 } 3079 3080 fence_size = i915_gem_get_gtt_size(dev, 3081 obj->base.size, 3082 obj->tiling_mode); 3083 fence_alignment = i915_gem_get_gtt_alignment(dev, 3084 obj->base.size, 3085 obj->tiling_mode); 3086 unfenced_alignment = 3087 i915_gem_get_unfenced_gtt_alignment(dev, 3088 obj->base.size, 3089 obj->tiling_mode); 3090 3091 if (alignment == 0) 3092 alignment = map_and_fenceable ? fence_alignment : 3093 unfenced_alignment; 3094 if (map_and_fenceable && alignment & (fence_alignment - 1)) { 3095 DRM_ERROR("Invalid object alignment requested %u\n", alignment); 3096 return -EINVAL; 3097 } 3098 3099 size = map_and_fenceable ? fence_size : obj->base.size; 3100 3101 /* If the object is bigger than the entire aperture, reject it early 3102 * before evicting everything in a vain attempt to find space. 3103 */ 3104 if (obj->base.size > 3105 (map_and_fenceable ? dev_priv->mm.gtt_mappable_end : dev_priv->mm.gtt_total)) { 3106 DRM_ERROR("Attempting to bind an object larger than the aperture\n"); 3107 return -E2BIG; 3108 } 3109 3110 ret = i915_gem_object_get_pages(obj); 3111 if (ret) 3112 return ret; 3113 3114 i915_gem_object_pin_pages(obj); 3115 3116 node = kzalloc(sizeof(*node), GFP_KERNEL); 3117 if (node == NULL) { 3118 i915_gem_object_unpin_pages(obj); 3119 /* XXX Until we've hooked up the shrinking functions. */ 3120 i915_gem_object_put_pages(obj); 3121 return -ENOMEM; 3122 } 3123 3124 search_free: 3125 if (map_and_fenceable) 3126 ret = drm_mm_insert_node_in_range_generic(&dev_priv->mm.gtt_space, node, 3127 size, alignment, obj->cache_level, 3128 0, dev_priv->mm.gtt_mappable_end); 3129 else 3130 ret = drm_mm_insert_node_generic(&dev_priv->mm.gtt_space, node, 3131 size, alignment, obj->cache_level); 3132 if (ret) { 3133 ret = i915_gem_evict_something(dev, size, alignment, 3134 obj->cache_level, 3135 map_and_fenceable, 3136 nonblocking); 3137 if (ret == 0) 3138 goto search_free; 3139 3140 i915_gem_object_unpin_pages(obj); 3141 /* XXX Until we've hooked up the shrinking functions. */ 3142 i915_gem_object_put_pages(obj); 3143 kfree(node); 3144 return ret; 3145 } 3146 if (WARN_ON(!i915_gem_valid_gtt_space(dev, node, obj->cache_level))) { 3147 i915_gem_object_unpin_pages(obj); 3148 /* XXX Until we've hooked up the shrinking functions. */ 3149 i915_gem_object_put_pages(obj); 3150 drm_mm_put_block(node); 3151 return -EINVAL; 3152 } 3153 3154 ret = i915_gem_gtt_prepare_object(obj); 3155 if (ret) { 3156 i915_gem_object_unpin_pages(obj); 3157 /* XXX Until we've hooked up the shrinking functions. */ 3158 i915_gem_object_put_pages(obj); 3159 drm_mm_put_block(node); 3160 return ret; 3161 } 3162 3163 list_move_tail(&obj->gtt_list, &dev_priv->mm.bound_list); 3164 list_add_tail(&obj->mm_list, &dev_priv->mm.inactive_list); 3165 3166 obj->gtt_space = node; 3167 obj->gtt_offset = node->start; 3168 3169 fenceable = 3170 node->size == fence_size && 3171 (node->start & (fence_alignment - 1)) == 0; 3172 3173 mappable = 3174 obj->gtt_offset + obj->base.size <= dev_priv->mm.gtt_mappable_end; 3175 3176 obj->map_and_fenceable = mappable && fenceable; 3177 3178 i915_gem_object_unpin_pages(obj); 3179 trace_i915_gem_object_bind(obj, map_and_fenceable); 3180 i915_gem_verify_gtt(dev); 3181 return 0; 3182 } 3183 3184 void 3185 i915_gem_clflush_object(struct drm_i915_gem_object *obj) 3186 { 3187 /* If we don't have a page list set up, then we're not pinned 3188 * to GPU, and we can ignore the cache flush because it'll happen 3189 * again at bind time. 3190 */ 3191 if (obj->pages == NULL) 3192 return; 3193 3194 /* If the GPU is snooping the contents of the CPU cache, 3195 * we do not need to manually clear the CPU cache lines. However, 3196 * the caches are only snooped when the render cache is 3197 * flushed/invalidated. As we always have to emit invalidations 3198 * and flushes when moving into and out of the RENDER domain, correct 3199 * snooping behaviour occurs naturally as the result of our domain 3200 * tracking. 3201 */ 3202 if (obj->cache_level != I915_CACHE_NONE) 3203 return; 3204 3205 #if 0 3206 trace_i915_gem_object_clflush(obj); 3207 3208 drm_clflush_sg(obj->pages); 3209 #else 3210 { 3211 int page_count = obj->base.size >> PAGE_SHIFT; 3212 int i; 3213 3214 for (i = 0; i < page_count; i++) 3215 pmap_flush_page(VM_PAGE_TO_PHYS(obj->pages[i])); 3216 } 3217 #endif 3218 } 3219 3220 /** Flushes the GTT write domain for the object if it's dirty. */ 3221 static void 3222 i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj) 3223 { 3224 uint32_t old_write_domain; 3225 3226 if (obj->base.write_domain != I915_GEM_DOMAIN_GTT) 3227 return; 3228 3229 /* No actual flushing is required for the GTT write domain. Writes 3230 * to it immediately go to main memory as far as we know, so there's 3231 * no chipset flush. It also doesn't land in render cache. 3232 * 3233 * However, we do have to enforce the order so that all writes through 3234 * the GTT land before any writes to the device, such as updates to 3235 * the GATT itself. 3236 */ 3237 DRM_WRITEMEMORYBARRIER(); 3238 3239 old_write_domain = obj->base.write_domain; 3240 obj->base.write_domain = 0; 3241 3242 trace_i915_gem_object_change_domain(obj, 3243 obj->base.read_domains, 3244 old_write_domain); 3245 } 3246 3247 /** Flushes the CPU write domain for the object if it's dirty. */ 3248 static void 3249 i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj) 3250 { 3251 uint32_t old_write_domain; 3252 3253 if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) 3254 return; 3255 3256 i915_gem_clflush_object(obj); 3257 i915_gem_chipset_flush(obj->base.dev); 3258 old_write_domain = obj->base.write_domain; 3259 obj->base.write_domain = 0; 3260 3261 trace_i915_gem_object_change_domain(obj, 3262 obj->base.read_domains, 3263 old_write_domain); 3264 } 3265 3266 /** 3267 * Moves a single object to the GTT read, and possibly write domain. 3268 * 3269 * This function returns when the move is complete, including waiting on 3270 * flushes to occur. 3271 */ 3272 int 3273 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) 3274 { 3275 drm_i915_private_t *dev_priv = obj->base.dev->dev_private; 3276 uint32_t old_write_domain, old_read_domains; 3277 int ret; 3278 3279 /* Not valid to be called on unbound objects. */ 3280 if (obj->gtt_space == NULL) 3281 return -EINVAL; 3282 3283 if (obj->base.write_domain == I915_GEM_DOMAIN_GTT) 3284 return 0; 3285 3286 ret = i915_gem_object_wait_rendering(obj, !write); 3287 if (ret) 3288 return ret; 3289 3290 i915_gem_object_flush_cpu_write_domain(obj); 3291 3292 old_write_domain = obj->base.write_domain; 3293 old_read_domains = obj->base.read_domains; 3294 3295 /* It should now be out of any other write domains, and we can update 3296 * the domain values for our changes. 3297 */ 3298 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_GTT) != 0); 3299 obj->base.read_domains |= I915_GEM_DOMAIN_GTT; 3300 if (write) { 3301 obj->base.read_domains = I915_GEM_DOMAIN_GTT; 3302 obj->base.write_domain = I915_GEM_DOMAIN_GTT; 3303 obj->dirty = 1; 3304 } 3305 3306 trace_i915_gem_object_change_domain(obj, 3307 old_read_domains, 3308 old_write_domain); 3309 3310 /* And bump the LRU for this access */ 3311 if (i915_gem_object_is_inactive(obj)) 3312 list_move_tail(&obj->mm_list, &dev_priv->mm.inactive_list); 3313 3314 return 0; 3315 } 3316 3317 int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, 3318 enum i915_cache_level cache_level) 3319 { 3320 struct drm_device *dev = obj->base.dev; 3321 #ifdef notyet 3322 drm_i915_private_t *dev_priv = dev->dev_private; 3323 #endif 3324 int ret; 3325 3326 if (obj->cache_level == cache_level) 3327 return 0; 3328 3329 if (obj->pin_count) { 3330 DRM_DEBUG("can not change the cache level of pinned objects\n"); 3331 return -EBUSY; 3332 } 3333 3334 if (!i915_gem_valid_gtt_space(dev, obj->gtt_space, cache_level)) { 3335 ret = i915_gem_object_unbind(obj); 3336 if (ret) 3337 return ret; 3338 } 3339 3340 if (obj->gtt_space) { 3341 ret = i915_gem_object_finish_gpu(obj); 3342 if (ret) 3343 return ret; 3344 3345 i915_gem_object_finish_gtt(obj); 3346 3347 /* Before SandyBridge, you could not use tiling or fence 3348 * registers with snooped memory, so relinquish any fences 3349 * currently pointing to our region in the aperture. 3350 */ 3351 if (INTEL_INFO(dev)->gen < 6) { 3352 ret = i915_gem_object_put_fence(obj); 3353 if (ret) 3354 return ret; 3355 } 3356 3357 if (obj->has_global_gtt_mapping) 3358 i915_gem_gtt_bind_object(obj, cache_level); 3359 #ifdef notyet 3360 if (obj->has_aliasing_ppgtt_mapping) 3361 i915_ppgtt_bind_object(dev_priv->mm.aliasing_ppgtt, 3362 obj, cache_level); 3363 #endif 3364 3365 obj->gtt_space->color = cache_level; 3366 } 3367 3368 if (cache_level == I915_CACHE_NONE) { 3369 u32 old_read_domains, old_write_domain; 3370 3371 /* If we're coming from LLC cached, then we haven't 3372 * actually been tracking whether the data is in the 3373 * CPU cache or not, since we only allow one bit set 3374 * in obj->write_domain and have been skipping the clflushes. 3375 * Just set it to the CPU cache for now. 3376 */ 3377 WARN_ON(obj->base.write_domain & ~I915_GEM_DOMAIN_CPU); 3378 WARN_ON(obj->base.read_domains & ~I915_GEM_DOMAIN_CPU); 3379 3380 old_read_domains = obj->base.read_domains; 3381 old_write_domain = obj->base.write_domain; 3382 3383 obj->base.read_domains = I915_GEM_DOMAIN_CPU; 3384 obj->base.write_domain = I915_GEM_DOMAIN_CPU; 3385 3386 trace_i915_gem_object_change_domain(obj, 3387 old_read_domains, 3388 old_write_domain); 3389 } 3390 3391 obj->cache_level = cache_level; 3392 i915_gem_verify_gtt(dev); 3393 return 0; 3394 } 3395 3396 int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data, 3397 struct drm_file *file) 3398 { 3399 struct drm_i915_gem_caching *args = data; 3400 struct drm_i915_gem_object *obj; 3401 int ret; 3402 3403 ret = i915_mutex_lock_interruptible(dev); 3404 if (ret) 3405 return ret; 3406 3407 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 3408 if (&obj->base == NULL) { 3409 ret = -ENOENT; 3410 goto unlock; 3411 } 3412 3413 args->caching = obj->cache_level != I915_CACHE_NONE; 3414 3415 drm_gem_object_unreference(&obj->base); 3416 unlock: 3417 DRM_UNLOCK(); 3418 return ret; 3419 } 3420 3421 int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data, 3422 struct drm_file *file) 3423 { 3424 struct drm_i915_gem_caching *args = data; 3425 struct drm_i915_gem_object *obj; 3426 enum i915_cache_level level; 3427 int ret; 3428 3429 switch (args->caching) { 3430 case I915_CACHING_NONE: 3431 level = I915_CACHE_NONE; 3432 break; 3433 case I915_CACHING_CACHED: 3434 level = I915_CACHE_LLC; 3435 break; 3436 default: 3437 return -EINVAL; 3438 } 3439 3440 ret = i915_mutex_lock_interruptible(dev); 3441 if (ret) 3442 return ret; 3443 3444 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 3445 if (&obj->base == NULL) { 3446 ret = -ENOENT; 3447 goto unlock; 3448 } 3449 3450 ret = i915_gem_object_set_cache_level(obj, level); 3451 3452 drm_gem_object_unreference(&obj->base); 3453 unlock: 3454 DRM_UNLOCK(); 3455 return ret; 3456 } 3457 3458 /* 3459 * Prepare buffer for display plane (scanout, cursors, etc). 3460 * Can be called from an uninterruptible phase (modesetting) and allows 3461 * any flushes to be pipelined (for pageflips). 3462 */ 3463 int 3464 i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, 3465 u32 alignment, 3466 struct intel_ring_buffer *pipelined) 3467 { 3468 u32 old_read_domains, old_write_domain; 3469 int ret; 3470 3471 if (pipelined != obj->ring) { 3472 ret = i915_gem_object_sync(obj, pipelined); 3473 if (ret) 3474 return ret; 3475 } 3476 3477 /* The display engine is not coherent with the LLC cache on gen6. As 3478 * a result, we make sure that the pinning that is about to occur is 3479 * done with uncached PTEs. This is lowest common denominator for all 3480 * chipsets. 3481 * 3482 * However for gen6+, we could do better by using the GFDT bit instead 3483 * of uncaching, which would allow us to flush all the LLC-cached data 3484 * with that bit in the PTE to main memory with just one PIPE_CONTROL. 3485 */ 3486 ret = i915_gem_object_set_cache_level(obj, I915_CACHE_NONE); 3487 if (ret) 3488 return ret; 3489 3490 /* As the user may map the buffer once pinned in the display plane 3491 * (e.g. libkms for the bootup splash), we have to ensure that we 3492 * always use map_and_fenceable for all scanout buffers. 3493 */ 3494 ret = i915_gem_object_pin(obj, alignment, true, false); 3495 if (ret) 3496 return ret; 3497 3498 i915_gem_object_flush_cpu_write_domain(obj); 3499 3500 old_write_domain = obj->base.write_domain; 3501 old_read_domains = obj->base.read_domains; 3502 3503 /* It should now be out of any other write domains, and we can update 3504 * the domain values for our changes. 3505 */ 3506 obj->base.write_domain = 0; 3507 obj->base.read_domains |= I915_GEM_DOMAIN_GTT; 3508 3509 trace_i915_gem_object_change_domain(obj, 3510 old_read_domains, 3511 old_write_domain); 3512 3513 return 0; 3514 } 3515 3516 int 3517 i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj) 3518 { 3519 int ret; 3520 3521 if ((obj->base.read_domains & I915_GEM_GPU_DOMAINS) == 0) 3522 return 0; 3523 3524 ret = i915_gem_object_wait_rendering(obj, false); 3525 if (ret) 3526 return ret; 3527 3528 /* Ensure that we invalidate the GPU's caches and TLBs. */ 3529 obj->base.read_domains &= ~I915_GEM_GPU_DOMAINS; 3530 return 0; 3531 } 3532 3533 /** 3534 * Moves a single object to the CPU read, and possibly write domain. 3535 * 3536 * This function returns when the move is complete, including waiting on 3537 * flushes to occur. 3538 */ 3539 int 3540 i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) 3541 { 3542 uint32_t old_write_domain, old_read_domains; 3543 int ret; 3544 3545 if (obj->base.write_domain == I915_GEM_DOMAIN_CPU) 3546 return 0; 3547 3548 ret = i915_gem_object_wait_rendering(obj, !write); 3549 if (ret) 3550 return ret; 3551 3552 i915_gem_object_flush_gtt_write_domain(obj); 3553 3554 old_write_domain = obj->base.write_domain; 3555 old_read_domains = obj->base.read_domains; 3556 3557 /* Flush the CPU cache if it's still invalid. */ 3558 if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) { 3559 i915_gem_clflush_object(obj); 3560 3561 obj->base.read_domains |= I915_GEM_DOMAIN_CPU; 3562 } 3563 3564 /* It should now be out of any other write domains, and we can update 3565 * the domain values for our changes. 3566 */ 3567 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_CPU) != 0); 3568 3569 /* If we're writing through the CPU, then the GPU read domains will 3570 * need to be invalidated at next use. 3571 */ 3572 if (write) { 3573 obj->base.read_domains = I915_GEM_DOMAIN_CPU; 3574 obj->base.write_domain = I915_GEM_DOMAIN_CPU; 3575 } 3576 3577 trace_i915_gem_object_change_domain(obj, 3578 old_read_domains, 3579 old_write_domain); 3580 3581 return 0; 3582 } 3583 3584 /* Throttle our rendering by waiting until the ring has completed our requests 3585 * emitted over 20 msec ago. 3586 * 3587 * Note that if we were to use the current jiffies each time around the loop, 3588 * we wouldn't escape the function with any frames outstanding if the time to 3589 * render a frame was over 20ms. 3590 * 3591 * This should get us reasonable parallelism between CPU and GPU but also 3592 * relatively low latency when blocking on a particular request to finish. 3593 */ 3594 int 3595 i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file) 3596 { 3597 struct drm_i915_private *dev_priv = dev->dev_private; 3598 struct drm_i915_file_private *file_priv = file->driver_priv; 3599 unsigned long recent_enough = ticks - msecs_to_jiffies(20); 3600 struct drm_i915_gem_request *request; 3601 struct intel_ring_buffer *ring = NULL; 3602 u32 seqno = 0; 3603 int ret; 3604 3605 if (atomic_read(&dev_priv->mm.wedged)) 3606 return -EIO; 3607 3608 mtx_enter(&file_priv->mm.lock); 3609 list_for_each_entry(request, &file_priv->mm.request_list, client_list) { 3610 if (time_after_eq(request->emitted_ticks, recent_enough)) 3611 break; 3612 3613 ring = request->ring; 3614 seqno = request->seqno; 3615 } 3616 mtx_leave(&file_priv->mm.lock); 3617 3618 if (seqno == 0) 3619 return 0; 3620 3621 ret = __wait_seqno(ring, seqno, true, NULL); 3622 if (ret == 0) 3623 timeout_add_sec(&dev_priv->mm.retire_timer, 0); 3624 3625 return ret; 3626 } 3627 3628 int 3629 i915_gem_object_pin(struct drm_i915_gem_object *obj, 3630 uint32_t alignment, 3631 bool map_and_fenceable, 3632 bool nonblocking) 3633 { 3634 int ret; 3635 3636 if (WARN_ON(obj->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT)) 3637 return -EBUSY; 3638 3639 if (obj->gtt_space != NULL) { 3640 if ((alignment && obj->gtt_offset & (alignment - 1)) || 3641 (map_and_fenceable && !obj->map_and_fenceable)) { 3642 WARN(obj->pin_count, 3643 "bo is already pinned with incorrect alignment:" 3644 " offset=%x, req.alignment=%x, req.map_and_fenceable=%d," 3645 " obj->map_and_fenceable=%d\n", 3646 obj->gtt_offset, alignment, 3647 map_and_fenceable, 3648 obj->map_and_fenceable); 3649 ret = i915_gem_object_unbind(obj); 3650 if (ret) 3651 return ret; 3652 } 3653 } 3654 3655 if (obj->gtt_space == NULL) { 3656 #ifdef notyet 3657 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 3658 #endif 3659 3660 ret = i915_gem_object_bind_to_gtt(obj, alignment, 3661 map_and_fenceable, 3662 nonblocking); 3663 if (ret) 3664 return ret; 3665 3666 #ifdef notyet 3667 if (!dev_priv->mm.aliasing_ppgtt) 3668 #endif 3669 i915_gem_gtt_bind_object(obj, obj->cache_level); 3670 } 3671 3672 if (!obj->has_global_gtt_mapping && map_and_fenceable) 3673 i915_gem_gtt_bind_object(obj, obj->cache_level); 3674 3675 obj->pin_count++; 3676 obj->pin_mappable |= map_and_fenceable; 3677 3678 return 0; 3679 } 3680 3681 void 3682 i915_gem_object_unpin(struct drm_i915_gem_object *obj) 3683 { 3684 BUG_ON(obj->pin_count == 0); 3685 BUG_ON(obj->gtt_space == NULL); 3686 3687 if (--obj->pin_count == 0) 3688 obj->pin_mappable = false; 3689 } 3690 3691 int 3692 i915_gem_pin_ioctl(struct drm_device *dev, void *data, 3693 struct drm_file *file) 3694 { 3695 struct drm_i915_gem_pin *args = data; 3696 struct drm_i915_gem_object *obj; 3697 int ret; 3698 3699 ret = i915_mutex_lock_interruptible(dev); 3700 if (ret) 3701 return ret; 3702 3703 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 3704 if (&obj->base == NULL) { 3705 ret = -ENOENT; 3706 goto unlock; 3707 } 3708 3709 if (obj->madv != I915_MADV_WILLNEED) { 3710 DRM_ERROR("Attempting to pin a purgeable buffer\n"); 3711 ret = -EINVAL; 3712 goto out; 3713 } 3714 3715 if (obj->pin_filp != NULL && obj->pin_filp != file) { 3716 DRM_ERROR("Already pinned in i915_gem_pin_ioctl(): %d\n", 3717 args->handle); 3718 ret = -EINVAL; 3719 goto out; 3720 } 3721 3722 if (obj->user_pin_count == 0) { 3723 ret = i915_gem_object_pin(obj, args->alignment, true, false); 3724 if (ret) 3725 goto out; 3726 } 3727 3728 obj->user_pin_count++; 3729 obj->pin_filp = file; 3730 3731 /* XXX - flush the CPU caches for pinned objects 3732 * as the X server doesn't manage domains yet 3733 */ 3734 i915_gem_object_flush_cpu_write_domain(obj); 3735 args->offset = obj->gtt_offset; 3736 out: 3737 drm_gem_object_unreference(&obj->base); 3738 unlock: 3739 DRM_UNLOCK(); 3740 return ret; 3741 } 3742 3743 int 3744 i915_gem_unpin_ioctl(struct drm_device *dev, void *data, 3745 struct drm_file *file) 3746 { 3747 struct drm_i915_gem_pin *args = data; 3748 struct drm_i915_gem_object *obj; 3749 int ret; 3750 3751 ret = i915_mutex_lock_interruptible(dev); 3752 if (ret) 3753 return ret; 3754 3755 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 3756 if (&obj->base == NULL) { 3757 ret = -ENOENT; 3758 goto unlock; 3759 } 3760 3761 if (obj->pin_filp != file) { 3762 DRM_ERROR("Not pinned by caller in i915_gem_pin_ioctl(): %d\n", 3763 args->handle); 3764 ret = -EINVAL; 3765 goto out; 3766 } 3767 obj->user_pin_count--; 3768 if (obj->user_pin_count == 0) { 3769 obj->pin_filp = NULL; 3770 i915_gem_object_unpin(obj); 3771 } 3772 3773 out: 3774 drm_gem_object_unreference(&obj->base); 3775 unlock: 3776 DRM_UNLOCK(); 3777 return ret; 3778 } 3779 3780 int 3781 i915_gem_busy_ioctl(struct drm_device *dev, void *data, 3782 struct drm_file *file) 3783 { 3784 struct drm_i915_gem_busy *args = data; 3785 struct drm_i915_gem_object *obj; 3786 int ret; 3787 3788 ret = i915_mutex_lock_interruptible(dev); 3789 if (ret) 3790 return ret; 3791 3792 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 3793 if (&obj->base == NULL) { 3794 ret = -ENOENT; 3795 goto unlock; 3796 } 3797 3798 /* Count all active objects as busy, even if they are currently not used 3799 * by the gpu. Users of this interface expect objects to eventually 3800 * become non-busy without any further actions, therefore emit any 3801 * necessary flushes here. 3802 */ 3803 ret = i915_gem_object_flush_active(obj); 3804 3805 args->busy = obj->active; 3806 if (obj->ring) { 3807 BUILD_BUG_ON(I915_NUM_RINGS > 16); 3808 args->busy |= intel_ring_flag(obj->ring) << 16; 3809 } 3810 3811 drm_gem_object_unreference(&obj->base); 3812 unlock: 3813 DRM_UNLOCK(); 3814 return ret; 3815 } 3816 3817 #ifdef notyet 3818 int 3819 i915_gem_throttle_ioctl(struct drm_device *dev, void *data, 3820 struct drm_file *file_priv) 3821 { 3822 return i915_gem_ring_throttle(dev, file_priv); 3823 } 3824 #endif 3825 3826 int 3827 i915_gem_madvise_ioctl(struct drm_device *dev, void *data, 3828 struct drm_file *file_priv) 3829 { 3830 struct drm_i915_gem_madvise *args = data; 3831 struct drm_i915_gem_object *obj; 3832 int ret; 3833 3834 switch (args->madv) { 3835 case I915_MADV_DONTNEED: 3836 case I915_MADV_WILLNEED: 3837 break; 3838 default: 3839 return -EINVAL; 3840 } 3841 3842 ret = i915_mutex_lock_interruptible(dev); 3843 if (ret) 3844 return ret; 3845 3846 obj = to_intel_bo(drm_gem_object_lookup(dev, file_priv, args->handle)); 3847 if (&obj->base == NULL) { 3848 ret = -ENOENT; 3849 goto unlock; 3850 } 3851 3852 if (obj->pin_count) { 3853 ret = -EINVAL; 3854 goto out; 3855 } 3856 3857 if (obj->madv != __I915_MADV_PURGED) 3858 obj->madv = args->madv; 3859 3860 /* if the object is no longer attached, discard its backing storage */ 3861 if (i915_gem_object_is_purgeable(obj) && obj->pages == NULL) 3862 i915_gem_object_truncate(obj); 3863 3864 args->retained = obj->madv != __I915_MADV_PURGED; 3865 3866 out: 3867 drm_gem_object_unreference(&obj->base); 3868 unlock: 3869 DRM_UNLOCK(); 3870 return ret; 3871 } 3872 3873 void i915_gem_object_init(struct drm_i915_gem_object *obj, 3874 const struct drm_i915_gem_object_ops *ops) 3875 { 3876 INIT_LIST_HEAD(&obj->mm_list); 3877 INIT_LIST_HEAD(&obj->gtt_list); 3878 INIT_LIST_HEAD(&obj->ring_list); 3879 INIT_LIST_HEAD(&obj->exec_list); 3880 3881 obj->ops = ops; 3882 3883 obj->fence_reg = I915_FENCE_REG_NONE; 3884 obj->madv = I915_MADV_WILLNEED; 3885 /* Avoid an unnecessary call to unbind on the first bind. */ 3886 obj->map_and_fenceable = true; 3887 3888 i915_gem_info_add_obj(obj->base.dev->dev_private, obj->base.size); 3889 } 3890 3891 static const struct drm_i915_gem_object_ops i915_gem_object_ops = { 3892 .get_pages = i915_gem_object_get_pages_gtt, 3893 .put_pages = i915_gem_object_put_pages_gtt, 3894 }; 3895 3896 struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev, 3897 size_t size) 3898 { 3899 struct drm_i915_gem_object *obj; 3900 3901 obj = pool_get(&dev->objpl, PR_WAITOK | PR_ZERO); 3902 if (obj == NULL) 3903 return NULL; 3904 3905 if (drm_gem_object_init(dev, &obj->base, size) != 0) { 3906 pool_put(&dev->objpl, obj); 3907 return NULL; 3908 } 3909 3910 i915_gem_object_init(obj, &i915_gem_object_ops); 3911 3912 obj->base.write_domain = I915_GEM_DOMAIN_CPU; 3913 obj->base.read_domains = I915_GEM_DOMAIN_CPU; 3914 3915 if (HAS_LLC(dev)) { 3916 /* On some devices, we can have the GPU use the LLC (the CPU 3917 * cache) for about a 10% performance improvement 3918 * compared to uncached. Graphics requests other than 3919 * display scanout are coherent with the CPU in 3920 * accessing this cache. This means in this mode we 3921 * don't need to clflush on the CPU side, and on the 3922 * GPU side we only need to flush internal caches to 3923 * get data visible to the CPU. 3924 * 3925 * However, we maintain the display planes as UC, and so 3926 * need to rebind when first used as such. 3927 */ 3928 obj->cache_level = I915_CACHE_LLC; 3929 } else 3930 obj->cache_level = I915_CACHE_NONE; 3931 3932 return obj; 3933 } 3934 3935 int i915_gem_init_object(struct drm_gem_object *obj) 3936 { 3937 BUG(); 3938 3939 return 0; 3940 } 3941 3942 void i915_gem_free_object(struct drm_gem_object *gem_obj) 3943 { 3944 struct drm_i915_gem_object *obj = to_intel_bo(gem_obj); 3945 struct drm_device *dev = obj->base.dev; 3946 drm_i915_private_t *dev_priv = dev->dev_private; 3947 3948 if (obj->phys_obj) 3949 i915_gem_detach_phys_object(dev, obj); 3950 3951 obj->pin_count = 0; 3952 i915_gem_object_unbind(obj); 3953 3954 obj->pages_pin_count = 0; 3955 i915_gem_object_put_pages(obj); 3956 i915_gem_object_free_mmap_offset(obj); 3957 3958 BUG_ON(obj->pages); 3959 3960 drm_gem_object_release(&obj->base); 3961 i915_gem_info_remove_obj(dev_priv, obj->base.size); 3962 3963 kfree(obj->bit_17); 3964 pool_put(&dev->objpl, obj); 3965 } 3966 3967 int 3968 i915_gem_idle(struct drm_device *dev) 3969 { 3970 drm_i915_private_t *dev_priv = dev->dev_private; 3971 int ret; 3972 3973 DRM_LOCK(); 3974 3975 if (dev_priv->mm.suspended) { 3976 DRM_UNLOCK(); 3977 return 0; 3978 } 3979 3980 ret = i915_gpu_idle(dev); 3981 if (ret) { 3982 DRM_UNLOCK(); 3983 return ret; 3984 } 3985 i915_gem_retire_requests(dev); 3986 3987 /* Under UMS, be paranoid and evict. */ 3988 if (!drm_core_check_feature(dev, DRIVER_MODESET)) 3989 i915_gem_evict_everything(dev); 3990 3991 i915_gem_reset_fences(dev); 3992 3993 /* Hack! Don't let anybody do execbuf while we don't control the chip. 3994 * We need to replace this with a semaphore, or something. 3995 * And not confound mm.suspended! 3996 */ 3997 dev_priv->mm.suspended = 1; 3998 timeout_del(&dev_priv->hangcheck_timer); 3999 4000 i915_kernel_lost_context(dev); 4001 i915_gem_cleanup_ringbuffer(dev); 4002 4003 DRM_UNLOCK(); 4004 4005 /* Cancel the retire work handler, which should be idle now. */ 4006 timeout_del(&dev_priv->mm.retire_timer); 4007 task_del(dev_priv->mm.retire_taskq, &dev_priv->mm.retire_task); 4008 4009 return 0; 4010 } 4011 4012 #ifdef notyet 4013 void i915_gem_l3_remap(struct drm_device *dev) 4014 { 4015 drm_i915_private_t *dev_priv = dev->dev_private; 4016 u32 misccpctl; 4017 int i; 4018 4019 if (!HAS_L3_GPU_CACHE(dev)) 4020 return; 4021 4022 if (!dev_priv->l3_parity.remap_info) 4023 return; 4024 4025 misccpctl = I915_READ(GEN7_MISCCPCTL); 4026 I915_WRITE(GEN7_MISCCPCTL, misccpctl & ~GEN7_DOP_CLOCK_GATE_ENABLE); 4027 POSTING_READ(GEN7_MISCCPCTL); 4028 4029 for (i = 0; i < GEN7_L3LOG_SIZE; i += 4) { 4030 u32 remap = I915_READ(GEN7_L3LOG_BASE + i); 4031 if (remap && remap != dev_priv->l3_parity.remap_info[i/4]) 4032 DRM_DEBUG("0x%x was already programmed to %x\n", 4033 GEN7_L3LOG_BASE + i, remap); 4034 if (remap && !dev_priv->l3_parity.remap_info[i/4]) 4035 DRM_DEBUG_DRIVER("Clearing remapped register\n"); 4036 I915_WRITE(GEN7_L3LOG_BASE + i, dev_priv->l3_parity.remap_info[i/4]); 4037 } 4038 4039 /* Make sure all the writes land before disabling dop clock gating */ 4040 POSTING_READ(GEN7_L3LOG_BASE); 4041 4042 I915_WRITE(GEN7_MISCCPCTL, misccpctl); 4043 } 4044 #endif /* notyet */ 4045 4046 void i915_gem_init_swizzling(struct drm_device *dev) 4047 { 4048 drm_i915_private_t *dev_priv = dev->dev_private; 4049 4050 if (INTEL_INFO(dev)->gen < 5 || 4051 dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE) 4052 return; 4053 4054 I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) | 4055 DISP_TILE_SURFACE_SWIZZLING); 4056 4057 if (IS_GEN5(dev)) 4058 return; 4059 4060 I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_SWZCTL); 4061 if (IS_GEN6(dev)) 4062 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_SNB)); 4063 else 4064 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_IVB)); 4065 } 4066 4067 static bool 4068 intel_enable_blt(struct drm_device *dev) 4069 { 4070 if (!HAS_BLT(dev)) 4071 return false; 4072 4073 #ifdef notyet 4074 /* The blitter was dysfunctional on early prototypes */ 4075 if (IS_GEN6(dev) && dev->pdev->revision < 8) { 4076 DRM_INFO("BLT not supported on this pre-production hardware;" 4077 " graphics performance will be degraded.\n"); 4078 return false; 4079 } 4080 #endif 4081 4082 return true; 4083 } 4084 4085 int 4086 i915_gem_init_hw(struct drm_device *dev) 4087 { 4088 drm_i915_private_t *dev_priv = dev->dev_private; 4089 int ret; 4090 4091 #ifdef notyet 4092 if (INTEL_INFO(dev)->gen < 6 && !intel_enable_gtt()) 4093 return -EIO; 4094 #endif 4095 4096 if (IS_HASWELL(dev) && (I915_READ(0x120010) == 1)) 4097 I915_WRITE(0x9008, I915_READ(0x9008) | 0xf0000); 4098 4099 #ifdef notyet 4100 i915_gem_l3_remap(dev); 4101 #endif 4102 4103 i915_gem_init_swizzling(dev); 4104 4105 ret = intel_init_render_ring_buffer(dev); 4106 if (ret) 4107 return ret; 4108 4109 if (HAS_BSD(dev)) { 4110 ret = intel_init_bsd_ring_buffer(dev); 4111 if (ret) 4112 goto cleanup_render_ring; 4113 } 4114 4115 if (intel_enable_blt(dev)) { 4116 ret = intel_init_blt_ring_buffer(dev); 4117 if (ret) 4118 goto cleanup_bsd_ring; 4119 } 4120 4121 dev_priv->next_seqno = 1; 4122 4123 /* 4124 * XXX: There was some w/a described somewhere suggesting loading 4125 * contexts before PPGTT. 4126 */ 4127 i915_gem_context_init(dev); 4128 #ifdef notyet 4129 i915_gem_init_ppgtt(dev); 4130 #endif 4131 4132 return 0; 4133 4134 cleanup_bsd_ring: 4135 intel_cleanup_ring_buffer(&dev_priv->ring[VCS]); 4136 cleanup_render_ring: 4137 intel_cleanup_ring_buffer(&dev_priv->ring[RCS]); 4138 return ret; 4139 } 4140 4141 #ifdef notyet 4142 static bool 4143 intel_enable_ppgtt(struct drm_device *dev) 4144 { 4145 if (i915_enable_ppgtt >= 0) 4146 return i915_enable_ppgtt; 4147 4148 #ifdef CONFIG_INTEL_IOMMU 4149 /* Disable ppgtt on SNB if VT-d is on. */ 4150 if (INTEL_INFO(dev)->gen == 6 && intel_iommu_gfx_mapped) 4151 return false; 4152 #endif 4153 4154 return true; 4155 } 4156 #endif /* notyet */ 4157 4158 int i915_gem_init(struct drm_device *dev) 4159 { 4160 struct drm_i915_private *dev_priv = dev->dev_private; 4161 unsigned long gtt_size, mappable_size; 4162 int ret; 4163 4164 gtt_size = dev_priv->mm.gtt->gtt_total_entries << PAGE_SHIFT; 4165 mappable_size = dev_priv->mm.gtt->gtt_mappable_entries << PAGE_SHIFT; 4166 4167 DRM_LOCK(); 4168 #ifdef notyet 4169 if (intel_enable_ppgtt(dev) && HAS_ALIASING_PPGTT(dev)) { 4170 /* PPGTT pdes are stolen from global gtt ptes, so shrink the 4171 * aperture accordingly when using aliasing ppgtt. */ 4172 gtt_size -= I915_PPGTT_PD_ENTRIES*PAGE_SIZE; 4173 4174 i915_gem_init_global_gtt(dev, 0, mappable_size, gtt_size); 4175 4176 ret = i915_gem_init_aliasing_ppgtt(dev); 4177 if (ret) { 4178 mutex_unlock(&dev->struct_mutex); 4179 return ret; 4180 } 4181 } else { 4182 #endif 4183 /* Let GEM Manage all of the aperture. 4184 * 4185 * However, leave one page at the end still bound to the scratch 4186 * page. There are a number of places where the hardware 4187 * apparently prefetches past the end of the object, and we've 4188 * seen multiple hangs with the GPU head pointer stuck in a 4189 * batchbuffer bound at the last page of the aperture. One page 4190 * should be enough to keep any prefetching inside of the 4191 * aperture. 4192 */ 4193 i915_gem_init_global_gtt(dev, 0, mappable_size, 4194 gtt_size); 4195 #ifdef notyet 4196 } 4197 #endif 4198 4199 ret = i915_gem_init_hw(dev); 4200 DRM_UNLOCK(); 4201 if (ret) { 4202 i915_gem_cleanup_aliasing_ppgtt(dev); 4203 return ret; 4204 } 4205 4206 #ifdef __linux__ 4207 /* Allow hardware batchbuffers unless told otherwise, but not for KMS. */ 4208 if (!drm_core_check_feature(dev, DRIVER_MODESET)) 4209 dev_priv->dri1.allow_batchbuffer = 1; 4210 #endif 4211 return 0; 4212 } 4213 4214 void 4215 i915_gem_cleanup_ringbuffer(struct drm_device *dev) 4216 { 4217 drm_i915_private_t *dev_priv = dev->dev_private; 4218 struct intel_ring_buffer *ring; 4219 int i; 4220 4221 for_each_ring(ring, dev_priv, i) 4222 intel_cleanup_ring_buffer(ring); 4223 } 4224 4225 int 4226 i915_gem_entervt_ioctl(struct drm_device *dev, void *data, 4227 struct drm_file *file_priv) 4228 { 4229 drm_i915_private_t *dev_priv = dev->dev_private; 4230 int ret; 4231 4232 if (drm_core_check_feature(dev, DRIVER_MODESET)) 4233 return 0; 4234 4235 if (atomic_read(&dev_priv->mm.wedged)) { 4236 DRM_ERROR("Reenabling wedged hardware, good luck\n"); 4237 atomic_set(&dev_priv->mm.wedged, 0); 4238 } 4239 4240 DRM_LOCK(); 4241 dev_priv->mm.suspended = 0; 4242 4243 ret = i915_gem_init_hw(dev); 4244 if (ret != 0) { 4245 DRM_UNLOCK(); 4246 return ret; 4247 } 4248 4249 BUG_ON(!list_empty(&dev_priv->mm.active_list)); 4250 DRM_UNLOCK(); 4251 4252 ret = drm_irq_install(dev); 4253 if (ret) 4254 goto cleanup_ringbuffer; 4255 4256 return 0; 4257 4258 cleanup_ringbuffer: 4259 DRM_LOCK(); 4260 i915_gem_cleanup_ringbuffer(dev); 4261 dev_priv->mm.suspended = 1; 4262 DRM_UNLOCK(); 4263 4264 return ret; 4265 } 4266 4267 int 4268 i915_gem_leavevt_ioctl(struct drm_device *dev, void *data, 4269 struct drm_file *file_priv) 4270 { 4271 if (drm_core_check_feature(dev, DRIVER_MODESET)) 4272 return 0; 4273 4274 drm_irq_uninstall(dev); 4275 return i915_gem_idle(dev); 4276 } 4277 4278 #ifdef notyet 4279 void 4280 i915_gem_lastclose(struct drm_device *dev) 4281 { 4282 int ret; 4283 4284 if (drm_core_check_feature(dev, DRIVER_MODESET)) 4285 return; 4286 4287 ret = i915_gem_idle(dev); 4288 if (ret) 4289 DRM_ERROR("failed to idle hardware: %d\n", ret); 4290 } 4291 #endif /* notyet */ 4292 4293 static void 4294 init_ring_lists(struct intel_ring_buffer *ring) 4295 { 4296 INIT_LIST_HEAD(&ring->active_list); 4297 INIT_LIST_HEAD(&ring->request_list); 4298 } 4299 4300 void 4301 i915_gem_load(struct drm_device *dev) 4302 { 4303 int i; 4304 drm_i915_private_t *dev_priv = dev->dev_private; 4305 4306 INIT_LIST_HEAD(&dev_priv->mm.active_list); 4307 INIT_LIST_HEAD(&dev_priv->mm.inactive_list); 4308 INIT_LIST_HEAD(&dev_priv->mm.unbound_list); 4309 INIT_LIST_HEAD(&dev_priv->mm.bound_list); 4310 INIT_LIST_HEAD(&dev_priv->mm.fence_list); 4311 for (i = 0; i < I915_NUM_RINGS; i++) 4312 init_ring_lists(&dev_priv->ring[i]); 4313 for (i = 0; i < I915_MAX_NUM_FENCES; i++) 4314 INIT_LIST_HEAD(&dev_priv->fence_regs[i].lru_list); 4315 task_set(&dev_priv->mm.retire_task, i915_gem_retire_work_handler, 4316 dev_priv, NULL); 4317 timeout_set(&dev_priv->mm.retire_timer, inteldrm_timeout, dev_priv); 4318 #if 0 4319 init_completion(&dev_priv->error_completion); 4320 #else 4321 dev_priv->error_completion = 0; 4322 #endif 4323 4324 /* On GEN3 we really need to make sure the ARB C3 LP bit is set */ 4325 if (IS_GEN3(dev)) { 4326 I915_WRITE(MI_ARB_STATE, 4327 _MASKED_BIT_ENABLE(MI_ARB_C3_LP_WRITE_ENABLE)); 4328 } 4329 4330 dev_priv->relative_constants_mode = I915_EXEC_CONSTANTS_REL_GENERAL; 4331 4332 /* Old X drivers will take 0-2 for front, back, depth buffers */ 4333 if (!drm_core_check_feature(dev, DRIVER_MODESET)) 4334 dev_priv->fence_reg_start = 3; 4335 4336 if (INTEL_INFO(dev)->gen >= 4 || IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev)) 4337 dev_priv->num_fence_regs = 16; 4338 else 4339 dev_priv->num_fence_regs = 8; 4340 4341 /* Initialize fence registers to zero */ 4342 i915_gem_reset_fences(dev); 4343 4344 i915_gem_detect_bit_6_swizzle(dev); 4345 #if 0 4346 init_waitqueue_head(&dev_priv->pending_flip_queue); 4347 #endif 4348 4349 dev_priv->mm.interruptible = true; 4350 4351 #if 0 4352 dev_priv->mm.inactive_shrinker.shrink = i915_gem_inactive_shrink; 4353 dev_priv->mm.inactive_shrinker.seeks = DEFAULT_SEEKS; 4354 register_shrinker(&dev_priv->mm.inactive_shrinker); 4355 #endif 4356 } 4357 4358 /* 4359 * Create a physically contiguous memory object for this object 4360 * e.g. for cursor + overlay regs 4361 */ 4362 static int i915_gem_init_phys_object(struct drm_device *dev, 4363 int id, int size, int align) 4364 { 4365 drm_i915_private_t *dev_priv = dev->dev_private; 4366 struct drm_i915_gem_phys_object *phys_obj; 4367 int ret; 4368 4369 if (dev_priv->mm.phys_objs[id - 1] || !size) 4370 return 0; 4371 4372 phys_obj = kzalloc(sizeof(struct drm_i915_gem_phys_object), GFP_KERNEL); 4373 if (!phys_obj) 4374 return -ENOMEM; 4375 4376 phys_obj->id = id; 4377 4378 phys_obj->handle = drm_dmamem_alloc(dev->dmat, size, align, 1, size, BUS_DMA_NOCACHE, 0); 4379 if (!phys_obj->handle) { 4380 ret = -ENOMEM; 4381 goto kfree_obj; 4382 } 4383 4384 dev_priv->mm.phys_objs[id - 1] = phys_obj; 4385 4386 return 0; 4387 kfree_obj: 4388 kfree(phys_obj); 4389 return ret; 4390 } 4391 4392 #ifdef notyet 4393 static void i915_gem_free_phys_object(struct drm_device *dev, int id) 4394 { 4395 drm_i915_private_t *dev_priv = dev->dev_private; 4396 struct drm_i915_gem_phys_object *phys_obj; 4397 4398 if (!dev_priv->mm.phys_objs[id - 1]) 4399 return; 4400 4401 phys_obj = dev_priv->mm.phys_objs[id - 1]; 4402 if (phys_obj->cur_obj) { 4403 i915_gem_detach_phys_object(dev, phys_obj->cur_obj); 4404 } 4405 4406 #ifdef CONFIG_X86 4407 set_memory_wb((unsigned long)phys_obj->handle->vaddr, phys_obj->handle->size / PAGE_SIZE); 4408 #endif 4409 drm_pci_free(dev, phys_obj->handle); 4410 kfree(phys_obj); 4411 dev_priv->mm.phys_objs[id - 1] = NULL; 4412 } 4413 4414 void i915_gem_free_all_phys_object(struct drm_device *dev) 4415 { 4416 int i; 4417 4418 for (i = I915_GEM_PHYS_CURSOR_0; i <= I915_MAX_PHYS_OBJECT; i++) 4419 i915_gem_free_phys_object(dev, i); 4420 } 4421 #endif /* notyet */ 4422 4423 void i915_gem_detach_phys_object(struct drm_device *dev, 4424 struct drm_i915_gem_object *obj) 4425 { 4426 char *vaddr; 4427 int i; 4428 int page_count; 4429 4430 if (!obj->phys_obj) 4431 return; 4432 vaddr = obj->phys_obj->handle->kva; 4433 4434 page_count = obj->base.size / PAGE_SIZE; 4435 for (i = 0; i < page_count; i++) { 4436 #ifdef notyet 4437 struct page *page = shmem_read_mapping_page(mapping, i); 4438 if (!IS_ERR(page)) { 4439 char *dst = kmap_atomic(page); 4440 memcpy(dst, vaddr + i*PAGE_SIZE, PAGE_SIZE); 4441 kunmap_atomic(dst); 4442 4443 drm_clflush_pages(&page, 1); 4444 4445 set_page_dirty(page); 4446 mark_page_accessed(page); 4447 page_cache_release(page); 4448 } 4449 #endif 4450 } 4451 i915_gem_chipset_flush(dev); 4452 4453 obj->phys_obj->cur_obj = NULL; 4454 obj->phys_obj = NULL; 4455 } 4456 4457 int 4458 i915_gem_attach_phys_object(struct drm_device *dev, 4459 struct drm_i915_gem_object *obj, 4460 int id, 4461 int align) 4462 { 4463 drm_i915_private_t *dev_priv = dev->dev_private; 4464 int ret = 0; 4465 int page_count; 4466 int i; 4467 4468 if (id > I915_MAX_PHYS_OBJECT) 4469 return -EINVAL; 4470 4471 if (obj->phys_obj) { 4472 if (obj->phys_obj->id == id) 4473 return 0; 4474 i915_gem_detach_phys_object(dev, obj); 4475 } 4476 4477 /* create a new object */ 4478 if (!dev_priv->mm.phys_objs[id - 1]) { 4479 ret = i915_gem_init_phys_object(dev, id, 4480 obj->base.size, align); 4481 if (ret) { 4482 DRM_ERROR("failed to init phys object %d size: %zu\n", 4483 id, obj->base.size); 4484 return ret; 4485 } 4486 } 4487 4488 /* bind to the object */ 4489 obj->phys_obj = dev_priv->mm.phys_objs[id - 1]; 4490 obj->phys_obj->cur_obj = obj; 4491 4492 page_count = obj->base.size / PAGE_SIZE; 4493 4494 for (i = 0; i < page_count; i++) { 4495 #ifdef notyet 4496 struct page *page; 4497 char *dst, *src; 4498 4499 page = shmem_read_mapping_page(mapping, i); 4500 if (IS_ERR(page)) 4501 return PTR_ERR(page); 4502 4503 src = kmap_atomic(page); 4504 dst = obj->phys_obj->handle->kva + (i * PAGE_SIZE); 4505 memcpy(dst, src, PAGE_SIZE); 4506 kunmap_atomic(src); 4507 4508 mark_page_accessed(page); 4509 page_cache_release(page); 4510 #endif 4511 } 4512 4513 return 0; 4514 } 4515 4516 static int 4517 i915_gem_phys_pwrite(struct drm_device *dev, 4518 struct drm_i915_gem_object *obj, 4519 struct drm_i915_gem_pwrite *args, 4520 struct drm_file *file_priv) 4521 { 4522 void *vaddr = obj->phys_obj->handle->kva + args->offset; 4523 int ret; 4524 4525 ret = -copyin((char *)(uintptr_t)args->data_ptr, 4526 vaddr, args->size); 4527 4528 i915_gem_chipset_flush(dev); 4529 4530 return ret; 4531 } 4532 4533 void i915_gem_release(struct drm_device *dev, struct drm_file *file) 4534 { 4535 struct drm_i915_file_private *file_priv = file->driver_priv; 4536 4537 /* Clean up our request list when the client is going away, so that 4538 * later retire_requests won't dereference our soon-to-be-gone 4539 * file_priv. 4540 */ 4541 mtx_enter(&file_priv->mm.lock); 4542 while (!list_empty(&file_priv->mm.request_list)) { 4543 struct drm_i915_gem_request *request; 4544 4545 request = list_first_entry(&file_priv->mm.request_list, 4546 struct drm_i915_gem_request, 4547 client_list); 4548 list_del(&request->client_list); 4549 request->file_priv = NULL; 4550 } 4551 mtx_leave(&file_priv->mm.lock); 4552 } 4553 4554 #ifdef notyet 4555 static bool mutex_is_locked_by(struct mutex *mutex, struct task_struct *task) 4556 { 4557 if (!mutex_is_locked(mutex)) 4558 return false; 4559 4560 #if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_MUTEXES) 4561 return mutex->owner == task; 4562 #else 4563 /* Since UP may be pre-empted, we cannot assume that we own the lock */ 4564 return false; 4565 #endif 4566 } 4567 4568 static int 4569 i915_gem_inactive_shrink(struct shrinker *shrinker, struct shrink_control *sc) 4570 { 4571 struct drm_i915_private *dev_priv = 4572 container_of(shrinker, 4573 struct drm_i915_private, 4574 mm.inactive_shrinker); 4575 struct drm_device *dev = dev_priv->dev; 4576 struct drm_i915_gem_object *obj; 4577 int nr_to_scan = sc->nr_to_scan; 4578 bool unlock = true; 4579 int cnt; 4580 4581 if (!mutex_trylock(&dev->struct_mutex)) { 4582 if (!mutex_is_locked_by(&dev->struct_mutex, current)) 4583 return 0; 4584 4585 if (dev_priv->mm.shrinker_no_lock_stealing) 4586 return 0; 4587 4588 unlock = false; 4589 } 4590 4591 if (nr_to_scan) { 4592 nr_to_scan -= i915_gem_purge(dev_priv, nr_to_scan); 4593 if (nr_to_scan > 0) 4594 nr_to_scan -= __i915_gem_shrink(dev_priv, nr_to_scan, 4595 false); 4596 if (nr_to_scan > 0) 4597 i915_gem_shrink_all(dev_priv); 4598 } 4599 4600 cnt = 0; 4601 list_for_each_entry(obj, &dev_priv->mm.unbound_list, gtt_list) 4602 if (obj->pages_pin_count == 0) 4603 cnt += obj->base.size >> PAGE_SHIFT; 4604 list_for_each_entry(obj, &dev_priv->mm.inactive_list, mm_list) 4605 if (obj->pin_count == 0 && obj->pages_pin_count == 0) 4606 cnt += obj->base.size >> PAGE_SHIFT; 4607 4608 if (unlock) 4609 mutex_unlock(&dev->struct_mutex); 4610 return cnt; 4611 } 4612 #endif /* notyet */ 4613 4614 #define NSEC_PER_SEC 1000000000L 4615 4616 static inline int64_t 4617 timespec_to_ns(const struct timespec *ts) 4618 { 4619 return ((ts->tv_sec * NSEC_PER_SEC) + ts->tv_nsec); 4620 } 4621 4622 static inline int 4623 timespec_to_jiffies(const struct timespec *ts) 4624 { 4625 long long to_ticks; 4626 4627 to_ticks = (long long)hz * ts->tv_sec + ts->tv_nsec / (tick * 1000); 4628 if (to_ticks > INT_MAX) 4629 to_ticks = INT_MAX; 4630 4631 return ((int)to_ticks); 4632 } 4633 4634 static struct timespec 4635 ns_to_timespec(const int64_t nsec) 4636 { 4637 struct timespec ts; 4638 int32_t rem; 4639 4640 if (nsec == 0) { 4641 ts.tv_sec = 0; 4642 ts.tv_nsec = 0; 4643 return (ts); 4644 } 4645 4646 ts.tv_sec = nsec / NSEC_PER_SEC; 4647 rem = nsec % NSEC_PER_SEC; 4648 if (rem < 0) { 4649 ts.tv_sec--; 4650 rem += NSEC_PER_SEC; 4651 } 4652 ts.tv_nsec = rem; 4653 return (ts); 4654 } 4655 4656 static inline int 4657 timespec_valid(const struct timespec *ts) 4658 { 4659 if (ts->tv_sec < 0 || ts->tv_sec > 100000000 || 4660 ts->tv_nsec < 0 || ts->tv_nsec >= 1000000000) 4661 return (0); 4662 return (1); 4663 } 4664