1 /* 2 * Copyright © 2008 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Eric Anholt <eric@anholt.net> 25 * 26 * Copyright (c) 2011 The FreeBSD Foundation 27 * All rights reserved. 28 * 29 * This software was developed by Konstantin Belousov under sponsorship from 30 * the FreeBSD Foundation. 31 * 32 * Redistribution and use in source and binary forms, with or without 33 * modification, are permitted provided that the following conditions 34 * are met: 35 * 1. Redistributions of source code must retain the above copyright 36 * notice, this list of conditions and the following disclaimer. 37 * 2. Redistributions in binary form must reproduce the above copyright 38 * notice, this list of conditions and the following disclaimer in the 39 * documentation and/or other materials provided with the distribution. 40 * 41 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 51 * SUCH DAMAGE. 52 * 53 */ 54 55 #include <sys/resourcevar.h> 56 #include <sys/sfbuf.h> 57 58 #include <drm/drmP.h> 59 #include <drm/i915_drm.h> 60 #include "i915_drv.h" 61 #include "intel_drv.h" 62 #include <linux/shmem_fs.h> 63 #include <linux/completion.h> 64 #include <linux/highmem.h> 65 #include <linux/jiffies.h> 66 #include <linux/time.h> 67 68 static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj); 69 static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj); 70 static __must_check int i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj, 71 unsigned alignment, 72 bool map_and_fenceable, 73 bool nonblocking); 74 static int i915_gem_phys_pwrite(struct drm_device *dev, 75 struct drm_i915_gem_object *obj, 76 struct drm_i915_gem_pwrite *args, 77 struct drm_file *file); 78 79 static void i915_gem_write_fence(struct drm_device *dev, int reg, 80 struct drm_i915_gem_object *obj); 81 static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj, 82 struct drm_i915_fence_reg *fence, 83 bool enable); 84 85 static uint32_t i915_gem_get_gtt_size(struct drm_device *dev, uint32_t size, 86 int tiling_mode); 87 static uint32_t i915_gem_get_gtt_alignment(struct drm_device *dev, 88 uint32_t size, int tiling_mode); 89 static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj); 90 static void i915_gem_object_truncate(struct drm_i915_gem_object *obj); 91 92 static inline void i915_gem_object_fence_lost(struct drm_i915_gem_object *obj) 93 { 94 if (obj->tiling_mode) 95 i915_gem_release_mmap(obj); 96 97 /* As we do not have an associated fence register, we will force 98 * a tiling change if we ever need to acquire one. 99 */ 100 obj->fence_dirty = false; 101 obj->fence_reg = I915_FENCE_REG_NONE; 102 } 103 104 static int i915_gem_object_is_purgeable(struct drm_i915_gem_object *obj); 105 static bool i915_gem_object_is_inactive(struct drm_i915_gem_object *obj); 106 static int i915_gem_object_needs_bit17_swizzle(struct drm_i915_gem_object *obj); 107 static void i915_gem_reset_fences(struct drm_device *dev); 108 static void i915_gem_lowmem(void *arg); 109 110 /* some bookkeeping */ 111 static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv, 112 size_t size) 113 { 114 dev_priv->mm.object_count++; 115 dev_priv->mm.object_memory += size; 116 } 117 118 static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv, 119 size_t size) 120 { 121 dev_priv->mm.object_count--; 122 dev_priv->mm.object_memory -= size; 123 } 124 125 static int 126 i915_gem_wait_for_error(struct drm_device *dev) 127 { 128 struct drm_i915_private *dev_priv = dev->dev_private; 129 struct completion *x = &dev_priv->error_completion; 130 int ret; 131 132 if (!atomic_read(&dev_priv->mm.wedged)) 133 return 0; 134 135 /* 136 * Only wait 10 seconds for the gpu reset to complete to avoid hanging 137 * userspace. If it takes that long something really bad is going on and 138 * we should simply try to bail out and fail as gracefully as possible. 139 */ 140 ret = wait_for_completion_interruptible_timeout(x, 10*hz); 141 if (ret == 0) { 142 DRM_ERROR("Timed out waiting for the gpu reset to complete\n"); 143 return -EIO; 144 } else if (ret < 0) { 145 return ret; 146 } 147 148 if (atomic_read(&dev_priv->mm.wedged)) { 149 /* GPU is hung, bump the completion count to account for 150 * the token we just consumed so that we never hit zero and 151 * end up waiting upon a subsequent completion event that 152 * will never happen. 153 */ 154 lockmgr(&x->wait.lock, LK_EXCLUSIVE); 155 x->done++; 156 lockmgr(&x->wait.lock, LK_RELEASE); 157 } 158 return 0; 159 } 160 161 int i915_mutex_lock_interruptible(struct drm_device *dev) 162 { 163 int ret; 164 165 ret = i915_gem_wait_for_error(dev); 166 if (ret) 167 return ret; 168 169 ret = lockmgr(&dev->dev_struct_lock, LK_EXCLUSIVE|LK_SLEEPFAIL); 170 if (ret) 171 return -EINTR; 172 173 WARN_ON(i915_verify_lists(dev)); 174 return 0; 175 } 176 177 static inline bool 178 i915_gem_object_is_inactive(struct drm_i915_gem_object *obj) 179 { 180 return !obj->active; 181 } 182 183 int 184 i915_gem_init_ioctl(struct drm_device *dev, void *data, 185 struct drm_file *file) 186 { 187 struct drm_i915_gem_init *args = data; 188 189 if (drm_core_check_feature(dev, DRIVER_MODESET)) 190 return -ENODEV; 191 192 if (args->gtt_start >= args->gtt_end || 193 (args->gtt_end | args->gtt_start) & (PAGE_SIZE - 1)) 194 return -EINVAL; 195 196 /* GEM with user mode setting was never supported on ilk and later. */ 197 if (INTEL_INFO(dev)->gen >= 5) 198 return -ENODEV; 199 200 lockmgr(&dev->dev_lock, LK_EXCLUSIVE|LK_RETRY|LK_CANRECURSE); 201 i915_gem_init_global_gtt(dev, args->gtt_start, 202 args->gtt_end, args->gtt_end); 203 lockmgr(&dev->dev_lock, LK_RELEASE); 204 205 return 0; 206 } 207 208 int 209 i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, 210 struct drm_file *file) 211 { 212 struct drm_i915_private *dev_priv = dev->dev_private; 213 struct drm_i915_gem_get_aperture *args = data; 214 struct drm_i915_gem_object *obj; 215 size_t pinned; 216 217 pinned = 0; 218 DRM_LOCK(dev); 219 list_for_each_entry(obj, &dev_priv->mm.bound_list, gtt_list) 220 if (obj->pin_count) 221 pinned += obj->gtt_space->size; 222 DRM_UNLOCK(dev); 223 224 args->aper_size = dev_priv->mm.gtt_total; 225 args->aper_available_size = args->aper_size - pinned; 226 227 return 0; 228 } 229 230 static int 231 i915_gem_create(struct drm_file *file, 232 struct drm_device *dev, 233 uint64_t size, 234 uint32_t *handle_p) 235 { 236 struct drm_i915_gem_object *obj; 237 int ret; 238 u32 handle; 239 240 size = roundup(size, PAGE_SIZE); 241 if (size == 0) 242 return -EINVAL; 243 244 /* Allocate the new object */ 245 obj = i915_gem_alloc_object(dev, size); 246 if (obj == NULL) 247 return -ENOMEM; 248 249 handle = 0; 250 ret = drm_gem_handle_create(file, &obj->base, &handle); 251 if (ret) { 252 drm_gem_object_release(&obj->base); 253 i915_gem_info_remove_obj(dev->dev_private, obj->base.size); 254 drm_free(obj, M_DRM); 255 return (-ret); 256 } 257 258 /* drop reference from allocate - handle holds it now */ 259 drm_gem_object_unreference(&obj->base); 260 *handle_p = handle; 261 return 0; 262 } 263 264 int 265 i915_gem_dumb_create(struct drm_file *file, 266 struct drm_device *dev, 267 struct drm_mode_create_dumb *args) 268 { 269 270 /* have to work out size/pitch and return them */ 271 args->pitch = roundup2(args->width * ((args->bpp + 7) / 8), 64); 272 args->size = args->pitch * args->height; 273 return i915_gem_create(file, dev, 274 args->size, &args->handle); 275 } 276 277 int i915_gem_dumb_destroy(struct drm_file *file, 278 struct drm_device *dev, 279 uint32_t handle) 280 { 281 282 return drm_gem_handle_delete(file, handle); 283 } 284 285 /** 286 * Creates a new mm object and returns a handle to it. 287 */ 288 int 289 i915_gem_create_ioctl(struct drm_device *dev, void *data, 290 struct drm_file *file) 291 { 292 struct drm_i915_gem_create *args = data; 293 294 return i915_gem_create(file, dev, 295 args->size, &args->handle); 296 } 297 298 static int i915_gem_object_needs_bit17_swizzle(struct drm_i915_gem_object *obj) 299 { 300 drm_i915_private_t *dev_priv = obj->base.dev->dev_private; 301 302 return dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_9_10_17 && 303 obj->tiling_mode != I915_TILING_NONE; 304 } 305 306 static inline void vm_page_reference(vm_page_t m) 307 { 308 vm_page_flag_set(m, PG_REFERENCED); 309 } 310 311 static int 312 i915_gem_shmem_pread(struct drm_device *dev, 313 struct drm_i915_gem_object *obj, 314 struct drm_i915_gem_pread *args, 315 struct drm_file *file) 316 { 317 vm_object_t vm_obj; 318 vm_page_t m; 319 struct sf_buf *sf; 320 vm_offset_t mkva; 321 vm_pindex_t obj_pi; 322 int cnt, do_bit17_swizzling, length, obj_po, ret, swizzled_po; 323 324 do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj); 325 326 obj->dirty = 1; 327 vm_obj = obj->base.vm_obj; 328 ret = 0; 329 330 VM_OBJECT_LOCK(vm_obj); 331 vm_object_pip_add(vm_obj, 1); 332 while (args->size > 0) { 333 obj_pi = OFF_TO_IDX(args->offset); 334 obj_po = args->offset & PAGE_MASK; 335 336 m = shmem_read_mapping_page(vm_obj, obj_pi); 337 VM_OBJECT_UNLOCK(vm_obj); 338 339 sf = sf_buf_alloc(m); 340 mkva = sf_buf_kva(sf); 341 length = min(args->size, PAGE_SIZE - obj_po); 342 while (length > 0) { 343 if (do_bit17_swizzling && 344 (VM_PAGE_TO_PHYS(m) & (1 << 17)) != 0) { 345 cnt = roundup2(obj_po + 1, 64); 346 cnt = min(cnt - obj_po, length); 347 swizzled_po = obj_po ^ 64; 348 } else { 349 cnt = length; 350 swizzled_po = obj_po; 351 } 352 ret = -copyout_nofault( 353 (char *)mkva + swizzled_po, 354 (void *)(uintptr_t)args->data_ptr, cnt); 355 if (ret != 0) 356 break; 357 args->data_ptr += cnt; 358 args->size -= cnt; 359 length -= cnt; 360 args->offset += cnt; 361 obj_po += cnt; 362 } 363 sf_buf_free(sf); 364 VM_OBJECT_LOCK(vm_obj); 365 vm_page_reference(m); 366 vm_page_busy_wait(m, FALSE, "i915gem"); 367 vm_page_unwire(m, 1); 368 vm_page_wakeup(m); 369 370 if (ret != 0) 371 break; 372 } 373 vm_object_pip_wakeup(vm_obj); 374 VM_OBJECT_UNLOCK(vm_obj); 375 376 return (ret); 377 } 378 379 /** 380 * Reads data from the object referenced by handle. 381 * 382 * On error, the contents of *data are undefined. 383 */ 384 int 385 i915_gem_pread_ioctl(struct drm_device *dev, void *data, 386 struct drm_file *file) 387 { 388 struct drm_i915_gem_pread *args = data; 389 struct drm_i915_gem_object *obj; 390 int ret = 0; 391 392 if (args->size == 0) 393 return 0; 394 395 ret = i915_mutex_lock_interruptible(dev); 396 if (ret) 397 return ret; 398 399 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 400 if (&obj->base == NULL) { 401 ret = -ENOENT; 402 goto unlock; 403 } 404 405 /* Bounds check source. */ 406 if (args->offset > obj->base.size || 407 args->size > obj->base.size - args->offset) { 408 ret = -EINVAL; 409 goto out; 410 } 411 412 ret = i915_gem_shmem_pread(dev, obj, args, file); 413 out: 414 drm_gem_object_unreference(&obj->base); 415 unlock: 416 DRM_UNLOCK(dev); 417 return ret; 418 } 419 420 static int 421 i915_gem_gtt_write(struct drm_device *dev, struct drm_i915_gem_object *obj, 422 uint64_t data_ptr, uint64_t size, uint64_t offset, struct drm_file *file) 423 { 424 vm_offset_t mkva; 425 int ret; 426 427 /* 428 * Pass the unaligned physical address and size to pmap_mapdev_attr() 429 * so it can properly calculate whether an extra page needs to be 430 * mapped or not to cover the requested range. The function will 431 * add the page offset into the returned mkva for us. 432 */ 433 mkva = (vm_offset_t)pmap_mapdev_attr(dev->agp->base + obj->gtt_offset + 434 offset, size, PAT_WRITE_COMBINING); 435 ret = -copyin_nofault((void *)(uintptr_t)data_ptr, (char *)mkva, size); 436 pmap_unmapdev(mkva, size); 437 return ret; 438 } 439 440 static int 441 i915_gem_shmem_pwrite(struct drm_device *dev, 442 struct drm_i915_gem_object *obj, 443 struct drm_i915_gem_pwrite *args, 444 struct drm_file *file) 445 { 446 vm_object_t vm_obj; 447 vm_page_t m; 448 struct sf_buf *sf; 449 vm_offset_t mkva; 450 vm_pindex_t obj_pi; 451 int cnt, do_bit17_swizzling, length, obj_po, ret, swizzled_po; 452 453 do_bit17_swizzling = 0; 454 455 obj->dirty = 1; 456 vm_obj = obj->base.vm_obj; 457 ret = 0; 458 459 VM_OBJECT_LOCK(vm_obj); 460 vm_object_pip_add(vm_obj, 1); 461 while (args->size > 0) { 462 obj_pi = OFF_TO_IDX(args->offset); 463 obj_po = args->offset & PAGE_MASK; 464 465 m = shmem_read_mapping_page(vm_obj, obj_pi); 466 VM_OBJECT_UNLOCK(vm_obj); 467 468 sf = sf_buf_alloc(m); 469 mkva = sf_buf_kva(sf); 470 length = min(args->size, PAGE_SIZE - obj_po); 471 while (length > 0) { 472 if (do_bit17_swizzling && 473 (VM_PAGE_TO_PHYS(m) & (1 << 17)) != 0) { 474 cnt = roundup2(obj_po + 1, 64); 475 cnt = min(cnt - obj_po, length); 476 swizzled_po = obj_po ^ 64; 477 } else { 478 cnt = length; 479 swizzled_po = obj_po; 480 } 481 ret = -copyin_nofault( 482 (void *)(uintptr_t)args->data_ptr, 483 (char *)mkva + swizzled_po, cnt); 484 if (ret != 0) 485 break; 486 args->data_ptr += cnt; 487 args->size -= cnt; 488 length -= cnt; 489 args->offset += cnt; 490 obj_po += cnt; 491 } 492 sf_buf_free(sf); 493 VM_OBJECT_LOCK(vm_obj); 494 vm_page_dirty(m); 495 vm_page_reference(m); 496 vm_page_busy_wait(m, FALSE, "i915gem"); 497 vm_page_unwire(m, 1); 498 vm_page_wakeup(m); 499 500 if (ret != 0) 501 break; 502 } 503 vm_object_pip_wakeup(vm_obj); 504 VM_OBJECT_UNLOCK(vm_obj); 505 506 return (ret); 507 } 508 509 /** 510 * Writes data to the object referenced by handle. 511 * 512 * On error, the contents of the buffer that were to be modified are undefined. 513 */ 514 int 515 i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, 516 struct drm_file *file) 517 { 518 struct drm_i915_gem_pwrite *args = data; 519 struct drm_i915_gem_object *obj; 520 vm_page_t *ma; 521 vm_offset_t start, end; 522 int npages, ret; 523 524 if (args->size == 0) 525 return 0; 526 527 start = trunc_page(args->data_ptr); 528 end = round_page(args->data_ptr + args->size); 529 npages = howmany(end - start, PAGE_SIZE); 530 ma = kmalloc(npages * sizeof(vm_page_t), M_DRM, M_WAITOK | 531 M_ZERO); 532 npages = vm_fault_quick_hold_pages(&curproc->p_vmspace->vm_map, 533 (vm_offset_t)args->data_ptr, args->size, 534 VM_PROT_READ, ma, npages); 535 if (npages == -1) { 536 ret = -EFAULT; 537 goto free_ma; 538 } 539 540 ret = i915_mutex_lock_interruptible(dev); 541 if (ret != 0) 542 goto unlocked; 543 544 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 545 if (&obj->base == NULL) { 546 ret = -ENOENT; 547 goto unlock; 548 } 549 550 /* Bounds check destination. */ 551 if (args->offset > obj->base.size || 552 args->size > obj->base.size - args->offset) { 553 ret = -EINVAL; 554 goto out; 555 } 556 557 if (obj->phys_obj) { 558 ret = i915_gem_phys_pwrite(dev, obj, args, file); 559 } else if (obj->gtt_space && 560 obj->base.write_domain != I915_GEM_DOMAIN_CPU) { 561 ret = i915_gem_object_pin(obj, 0, true, false); 562 if (ret != 0) 563 goto out; 564 ret = i915_gem_object_set_to_gtt_domain(obj, true); 565 if (ret != 0) 566 goto out_unpin; 567 ret = i915_gem_object_put_fence(obj); 568 if (ret != 0) 569 goto out_unpin; 570 ret = i915_gem_gtt_write(dev, obj, args->data_ptr, args->size, 571 args->offset, file); 572 out_unpin: 573 i915_gem_object_unpin(obj); 574 } else { 575 ret = i915_gem_object_set_to_cpu_domain(obj, true); 576 if (ret != 0) 577 goto out; 578 ret = i915_gem_shmem_pwrite(dev, obj, args, file); 579 } 580 out: 581 drm_gem_object_unreference(&obj->base); 582 unlock: 583 DRM_UNLOCK(dev); 584 unlocked: 585 vm_page_unhold_pages(ma, npages); 586 free_ma: 587 drm_free(ma, M_DRM); 588 return ret; 589 } 590 591 int 592 i915_gem_check_wedge(struct drm_i915_private *dev_priv, 593 bool interruptible) 594 { 595 if (atomic_read(&dev_priv->mm.wedged)) { 596 struct completion *x = &dev_priv->error_completion; 597 bool recovery_complete; 598 599 /* Give the error handler a chance to run. */ 600 lockmgr(&x->wait.lock, LK_EXCLUSIVE); 601 recovery_complete = x->done > 0; 602 lockmgr(&x->wait.lock, LK_RELEASE); 603 604 /* Non-interruptible callers can't handle -EAGAIN, hence return 605 * -EIO unconditionally for these. */ 606 if (!interruptible) 607 return -EIO; 608 609 /* Recovery complete, but still wedged means reset failure. */ 610 if (recovery_complete) 611 return -EIO; 612 613 return -EAGAIN; 614 } 615 616 return 0; 617 } 618 619 /* 620 * Compare seqno against outstanding lazy request. Emit a request if they are 621 * equal. 622 */ 623 static int 624 i915_gem_check_olr(struct intel_ring_buffer *ring, u32 seqno) 625 { 626 int ret; 627 628 DRM_LOCK_ASSERT(ring->dev); 629 630 ret = 0; 631 if (seqno == ring->outstanding_lazy_request) 632 ret = i915_add_request(ring, NULL, NULL); 633 634 return ret; 635 } 636 637 /** 638 * __wait_seqno - wait until execution of seqno has finished 639 * @ring: the ring expected to report seqno 640 * @seqno: duh! 641 * @interruptible: do an interruptible wait (normally yes) 642 * @timeout: in - how long to wait (NULL forever); out - how much time remaining 643 * 644 * Returns 0 if the seqno was found within the alloted time. Else returns the 645 * errno with remaining time filled in timeout argument. 646 */ 647 static int __wait_seqno(struct intel_ring_buffer *ring, u32 seqno, 648 bool interruptible, struct timespec *timeout) 649 { 650 drm_i915_private_t *dev_priv = ring->dev->dev_private; 651 struct timespec before, now, wait_time={1,0}; 652 unsigned long timeout_jiffies; 653 long end; 654 bool wait_forever = true; 655 int ret; 656 657 if (i915_seqno_passed(ring->get_seqno(ring, true), seqno)) 658 return 0; 659 660 if (timeout != NULL) { 661 wait_time = *timeout; 662 wait_forever = false; 663 } 664 665 timeout_jiffies = timespec_to_jiffies(&wait_time); 666 667 if (WARN_ON(!ring->irq_get(ring))) 668 return -ENODEV; 669 670 /* Record current time in case interrupted by signal, or wedged * */ 671 getrawmonotonic(&before); 672 673 #define EXIT_COND \ 674 (i915_seqno_passed(ring->get_seqno(ring, false), seqno) || \ 675 atomic_read(&dev_priv->mm.wedged)) 676 do { 677 if (interruptible) 678 end = wait_event_interruptible_timeout(ring->irq_queue, 679 EXIT_COND, 680 timeout_jiffies); 681 else 682 end = wait_event_timeout(ring->irq_queue, EXIT_COND, 683 timeout_jiffies); 684 685 ret = i915_gem_check_wedge(dev_priv, interruptible); 686 if (ret) 687 end = ret; 688 } while (end == 0 && wait_forever); 689 690 getrawmonotonic(&now); 691 692 ring->irq_put(ring); 693 #undef EXIT_COND 694 695 if (timeout) { 696 struct timespec sleep_time = timespec_sub(now, before); 697 *timeout = timespec_sub(*timeout, sleep_time); 698 } 699 700 switch (end) { 701 case -EIO: 702 case -EAGAIN: /* Wedged */ 703 case -ERESTARTSYS: /* Signal */ 704 return (int)end; 705 case 0: /* Timeout */ 706 if (timeout) 707 set_normalized_timespec(timeout, 0, 0); 708 return -ETIMEDOUT; /* -ETIME on Linux */ 709 default: /* Completed */ 710 WARN_ON(end < 0); /* We're not aware of other errors */ 711 return 0; 712 } 713 } 714 715 /** 716 * Waits for a sequence number to be signaled, and cleans up the 717 * request and object lists appropriately for that event. 718 */ 719 int 720 i915_wait_seqno(struct intel_ring_buffer *ring, uint32_t seqno) 721 { 722 struct drm_device *dev = ring->dev; 723 struct drm_i915_private *dev_priv = dev->dev_private; 724 bool interruptible = dev_priv->mm.interruptible; 725 int ret; 726 727 DRM_LOCK_ASSERT(dev); 728 BUG_ON(seqno == 0); 729 730 ret = i915_gem_check_wedge(dev_priv, interruptible); 731 if (ret) 732 return ret; 733 734 ret = i915_gem_check_olr(ring, seqno); 735 if (ret) 736 return ret; 737 738 return __wait_seqno(ring, seqno, interruptible, NULL); 739 } 740 741 /** 742 * Ensures that all rendering to the object has completed and the object is 743 * safe to unbind from the GTT or access from the CPU. 744 */ 745 static __must_check int 746 i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj, 747 bool readonly) 748 { 749 struct intel_ring_buffer *ring = obj->ring; 750 u32 seqno; 751 int ret; 752 753 seqno = readonly ? obj->last_write_seqno : obj->last_read_seqno; 754 if (seqno == 0) 755 return 0; 756 757 ret = i915_wait_seqno(ring, seqno); 758 if (ret) 759 return ret; 760 761 i915_gem_retire_requests_ring(ring); 762 763 /* Manually manage the write flush as we may have not yet 764 * retired the buffer. 765 */ 766 if (obj->last_write_seqno && 767 i915_seqno_passed(seqno, obj->last_write_seqno)) { 768 obj->last_write_seqno = 0; 769 obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS; 770 } 771 772 return 0; 773 } 774 775 /* A nonblocking variant of the above wait. This is a highly dangerous routine 776 * as the object state may change during this call. 777 */ 778 static __must_check int 779 i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj, 780 bool readonly) 781 { 782 struct drm_device *dev = obj->base.dev; 783 struct drm_i915_private *dev_priv = dev->dev_private; 784 struct intel_ring_buffer *ring = obj->ring; 785 u32 seqno; 786 int ret; 787 788 DRM_LOCK_ASSERT(dev); 789 BUG_ON(!dev_priv->mm.interruptible); 790 791 seqno = readonly ? obj->last_write_seqno : obj->last_read_seqno; 792 if (seqno == 0) 793 return 0; 794 795 ret = i915_gem_check_wedge(dev_priv, true); 796 if (ret) 797 return ret; 798 799 ret = i915_gem_check_olr(ring, seqno); 800 if (ret) 801 return ret; 802 803 DRM_UNLOCK(dev); 804 ret = __wait_seqno(ring, seqno, true, NULL); 805 DRM_LOCK(dev); 806 807 i915_gem_retire_requests_ring(ring); 808 809 /* Manually manage the write flush as we may have not yet 810 * retired the buffer. 811 */ 812 if (obj->last_write_seqno && 813 i915_seqno_passed(seqno, obj->last_write_seqno)) { 814 obj->last_write_seqno = 0; 815 obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS; 816 } 817 818 return ret; 819 } 820 821 /** 822 * Called when user space prepares to use an object with the CPU, either 823 * through the mmap ioctl's mapping or a GTT mapping. 824 */ 825 int 826 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, 827 struct drm_file *file) 828 { 829 struct drm_i915_gem_set_domain *args = data; 830 struct drm_i915_gem_object *obj; 831 uint32_t read_domains = args->read_domains; 832 uint32_t write_domain = args->write_domain; 833 int ret; 834 835 /* Only handle setting domains to types used by the CPU. */ 836 if (write_domain & I915_GEM_GPU_DOMAINS) 837 return -EINVAL; 838 839 if (read_domains & I915_GEM_GPU_DOMAINS) 840 return -EINVAL; 841 842 /* Having something in the write domain implies it's in the read 843 * domain, and only that read domain. Enforce that in the request. 844 */ 845 if (write_domain != 0 && read_domains != write_domain) 846 return -EINVAL; 847 848 ret = i915_mutex_lock_interruptible(dev); 849 if (ret) 850 return ret; 851 852 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 853 if (&obj->base == NULL) { 854 ret = -ENOENT; 855 goto unlock; 856 } 857 858 /* Try to flush the object off the GPU without holding the lock. 859 * We will repeat the flush holding the lock in the normal manner 860 * to catch cases where we are gazumped. 861 */ 862 ret = i915_gem_object_wait_rendering__nonblocking(obj, !write_domain); 863 if (ret) 864 goto unref; 865 866 if (read_domains & I915_GEM_DOMAIN_GTT) { 867 ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0); 868 869 /* Silently promote "you're not bound, there was nothing to do" 870 * to success, since the client was just asking us to 871 * make sure everything was done. 872 */ 873 if (ret == -EINVAL) 874 ret = 0; 875 } else { 876 ret = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0); 877 } 878 879 unref: 880 drm_gem_object_unreference(&obj->base); 881 unlock: 882 DRM_UNLOCK(dev); 883 return ret; 884 } 885 886 /** 887 * Called when user space has done writes to this buffer 888 */ 889 int 890 i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data, 891 struct drm_file *file) 892 { 893 struct drm_i915_gem_sw_finish *args = data; 894 struct drm_i915_gem_object *obj; 895 int ret = 0; 896 897 ret = i915_mutex_lock_interruptible(dev); 898 if (ret) 899 return ret; 900 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 901 if (&obj->base == NULL) { 902 ret = -ENOENT; 903 goto unlock; 904 } 905 906 /* Pinned buffers may be scanout, so flush the cache */ 907 if (obj->pin_count) 908 i915_gem_object_flush_cpu_write_domain(obj); 909 910 drm_gem_object_unreference(&obj->base); 911 unlock: 912 DRM_UNLOCK(dev); 913 return ret; 914 } 915 916 /** 917 * Maps the contents of an object, returning the address it is mapped 918 * into. 919 * 920 * While the mapping holds a reference on the contents of the object, it doesn't 921 * imply a ref on the object itself. 922 */ 923 int 924 i915_gem_mmap_ioctl(struct drm_device *dev, void *data, 925 struct drm_file *file) 926 { 927 struct drm_i915_gem_mmap *args = data; 928 struct drm_gem_object *obj; 929 struct proc *p = curproc; 930 vm_map_t map = &p->p_vmspace->vm_map; 931 vm_offset_t addr; 932 vm_size_t size; 933 int error = 0, rv; 934 935 obj = drm_gem_object_lookup(dev, file, args->handle); 936 if (obj == NULL) 937 return -ENOENT; 938 939 if (args->size == 0) 940 goto out; 941 942 size = round_page(args->size); 943 if (map->size + size > p->p_rlimit[RLIMIT_VMEM].rlim_cur) { 944 error = ENOMEM; 945 goto out; 946 } 947 948 addr = 0; 949 vm_object_hold(obj->vm_obj); 950 vm_object_reference_locked(obj->vm_obj); 951 vm_object_drop(obj->vm_obj); 952 rv = vm_map_find(map, obj->vm_obj, NULL, 953 args->offset, &addr, args->size, 954 PAGE_SIZE, /* align */ 955 TRUE, /* fitit */ 956 VM_MAPTYPE_NORMAL, /* maptype */ 957 VM_PROT_READ | VM_PROT_WRITE, /* prot */ 958 VM_PROT_READ | VM_PROT_WRITE, /* max */ 959 MAP_SHARED /* cow */); 960 if (rv != KERN_SUCCESS) { 961 vm_object_deallocate(obj->vm_obj); 962 error = -vm_mmap_to_errno(rv); 963 } else { 964 args->addr_ptr = (uint64_t)addr; 965 } 966 out: 967 drm_gem_object_unreference(obj); 968 return (error); 969 } 970 971 /** 972 * i915_gem_fault - fault a page into the GTT 973 * vma: VMA in question 974 * vmf: fault info 975 * 976 * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped 977 * from userspace. The fault handler takes care of binding the object to 978 * the GTT (if needed), allocating and programming a fence register (again, 979 * only if needed based on whether the old reg is still valid or the object 980 * is tiled) and inserting a new PTE into the faulting process. 981 * 982 * Note that the faulting process may involve evicting existing objects 983 * from the GTT and/or fence registers to make room. So performance may 984 * suffer if the GTT working set is large or there are few fence registers 985 * left. 986 */ 987 988 /** 989 * i915_gem_release_mmap - remove physical page mappings 990 * @obj: obj in question 991 * 992 * Preserve the reservation of the mmapping with the DRM core code, but 993 * relinquish ownership of the pages back to the system. 994 * 995 * It is vital that we remove the page mapping if we have mapped a tiled 996 * object through the GTT and then lose the fence register due to 997 * resource pressure. Similarly if the object has been moved out of the 998 * aperture, than pages mapped into userspace must be revoked. Removing the 999 * mapping will then trigger a page fault on the next user access, allowing 1000 * fixup by i915_gem_fault(). 1001 */ 1002 void 1003 i915_gem_release_mmap(struct drm_i915_gem_object *obj) 1004 { 1005 vm_object_t devobj; 1006 vm_page_t m; 1007 int i, page_count; 1008 1009 if (!obj->fault_mappable) 1010 return; 1011 1012 devobj = cdev_pager_lookup(obj); 1013 if (devobj != NULL) { 1014 page_count = OFF_TO_IDX(obj->base.size); 1015 1016 VM_OBJECT_LOCK(devobj); 1017 for (i = 0; i < page_count; i++) { 1018 m = vm_page_lookup_busy_wait(devobj, i, TRUE, "915unm"); 1019 if (m == NULL) 1020 continue; 1021 cdev_pager_free_page(devobj, m); 1022 } 1023 VM_OBJECT_UNLOCK(devobj); 1024 vm_object_deallocate(devobj); 1025 } 1026 1027 obj->fault_mappable = false; 1028 } 1029 1030 static uint32_t 1031 i915_gem_get_gtt_size(struct drm_device *dev, uint32_t size, int tiling_mode) 1032 { 1033 uint32_t gtt_size; 1034 1035 if (INTEL_INFO(dev)->gen >= 4 || 1036 tiling_mode == I915_TILING_NONE) 1037 return size; 1038 1039 /* Previous chips need a power-of-two fence region when tiling */ 1040 if (INTEL_INFO(dev)->gen == 3) 1041 gtt_size = 1024*1024; 1042 else 1043 gtt_size = 512*1024; 1044 1045 while (gtt_size < size) 1046 gtt_size <<= 1; 1047 1048 return gtt_size; 1049 } 1050 1051 /** 1052 * i915_gem_get_gtt_alignment - return required GTT alignment for an object 1053 * @obj: object to check 1054 * 1055 * Return the required GTT alignment for an object, taking into account 1056 * potential fence register mapping. 1057 */ 1058 static uint32_t 1059 i915_gem_get_gtt_alignment(struct drm_device *dev, 1060 uint32_t size, 1061 int tiling_mode) 1062 { 1063 1064 /* 1065 * Minimum alignment is 4k (GTT page size), but might be greater 1066 * if a fence register is needed for the object. 1067 */ 1068 if (INTEL_INFO(dev)->gen >= 4 || 1069 tiling_mode == I915_TILING_NONE) 1070 return 4096; 1071 1072 /* 1073 * Previous chips need to be aligned to the size of the smallest 1074 * fence register that can contain the object. 1075 */ 1076 return i915_gem_get_gtt_size(dev, size, tiling_mode); 1077 } 1078 1079 /** 1080 * i915_gem_get_unfenced_gtt_alignment - return required GTT alignment for an 1081 * unfenced object 1082 * @dev: the device 1083 * @size: size of the object 1084 * @tiling_mode: tiling mode of the object 1085 * 1086 * Return the required GTT alignment for an object, only taking into account 1087 * unfenced tiled surface requirements. 1088 */ 1089 uint32_t 1090 i915_gem_get_unfenced_gtt_alignment(struct drm_device *dev, 1091 uint32_t size, 1092 int tiling_mode) 1093 { 1094 /* 1095 * Minimum alignment is 4k (GTT page size) for sane hw. 1096 */ 1097 if (INTEL_INFO(dev)->gen >= 4 || IS_G33(dev) || 1098 tiling_mode == I915_TILING_NONE) 1099 return 4096; 1100 1101 /* Previous hardware however needs to be aligned to a power-of-two 1102 * tile height. The simplest method for determining this is to reuse 1103 * the power-of-tile object size. 1104 */ 1105 return i915_gem_get_gtt_size(dev, size, tiling_mode); 1106 } 1107 1108 int 1109 i915_gem_mmap_gtt(struct drm_file *file, 1110 struct drm_device *dev, 1111 uint32_t handle, 1112 uint64_t *offset) 1113 { 1114 struct drm_i915_private *dev_priv = dev->dev_private; 1115 struct drm_i915_gem_object *obj; 1116 int ret; 1117 1118 ret = i915_mutex_lock_interruptible(dev); 1119 if (ret) 1120 return ret; 1121 1122 obj = to_intel_bo(drm_gem_object_lookup(dev, file, handle)); 1123 if (&obj->base == NULL) { 1124 ret = -ENOENT; 1125 goto unlock; 1126 } 1127 1128 if (obj->base.size > dev_priv->mm.gtt_mappable_end) { 1129 ret = -E2BIG; 1130 goto out; 1131 } 1132 1133 if (obj->madv != I915_MADV_WILLNEED) { 1134 DRM_ERROR("Attempting to mmap a purgeable buffer\n"); 1135 ret = -EINVAL; 1136 goto out; 1137 } 1138 1139 ret = drm_gem_create_mmap_offset(&obj->base); 1140 if (ret) 1141 goto out; 1142 1143 *offset = DRM_GEM_MAPPING_OFF(obj->base.map_list.key) | 1144 DRM_GEM_MAPPING_KEY; 1145 out: 1146 drm_gem_object_unreference(&obj->base); 1147 unlock: 1148 DRM_UNLOCK(dev); 1149 return ret; 1150 } 1151 1152 /** 1153 * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing 1154 * @dev: DRM device 1155 * @data: GTT mapping ioctl data 1156 * @file: GEM object info 1157 * 1158 * Simply returns the fake offset to userspace so it can mmap it. 1159 * The mmap call will end up in drm_gem_mmap(), which will set things 1160 * up so we can get faults in the handler above. 1161 * 1162 * The fault handler will take care of binding the object into the GTT 1163 * (since it may have been evicted to make room for something), allocating 1164 * a fence register, and mapping the appropriate aperture address into 1165 * userspace. 1166 */ 1167 int 1168 i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data, 1169 struct drm_file *file) 1170 { 1171 struct drm_i915_gem_mmap_gtt *args = data; 1172 1173 return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset); 1174 } 1175 1176 /* Immediately discard the backing storage */ 1177 static void 1178 i915_gem_object_truncate(struct drm_i915_gem_object *obj) 1179 { 1180 vm_object_t vm_obj; 1181 1182 vm_obj = obj->base.vm_obj; 1183 VM_OBJECT_LOCK(vm_obj); 1184 vm_object_page_remove(vm_obj, 0, 0, false); 1185 VM_OBJECT_UNLOCK(vm_obj); 1186 obj->madv = __I915_MADV_PURGED; 1187 } 1188 1189 static inline int 1190 i915_gem_object_is_purgeable(struct drm_i915_gem_object *obj) 1191 { 1192 return obj->madv == I915_MADV_DONTNEED; 1193 } 1194 1195 static void 1196 i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj) 1197 { 1198 vm_page_t m; 1199 int page_count, i; 1200 1201 BUG_ON(obj->madv == __I915_MADV_PURGED); 1202 1203 if (obj->tiling_mode != I915_TILING_NONE) 1204 i915_gem_object_save_bit_17_swizzle(obj); 1205 if (obj->madv == I915_MADV_DONTNEED) 1206 obj->dirty = 0; 1207 page_count = obj->base.size / PAGE_SIZE; 1208 VM_OBJECT_LOCK(obj->base.vm_obj); 1209 #if GEM_PARANOID_CHECK_GTT 1210 i915_gem_assert_pages_not_mapped(obj->base.dev, obj->pages, page_count); 1211 #endif 1212 for (i = 0; i < page_count; i++) { 1213 m = obj->pages[i]; 1214 if (obj->dirty) 1215 vm_page_dirty(m); 1216 if (obj->madv == I915_MADV_WILLNEED) 1217 vm_page_reference(m); 1218 vm_page_busy_wait(obj->pages[i], FALSE, "i915gem"); 1219 vm_page_unwire(obj->pages[i], 1); 1220 vm_page_wakeup(obj->pages[i]); 1221 } 1222 VM_OBJECT_UNLOCK(obj->base.vm_obj); 1223 obj->dirty = 0; 1224 drm_free(obj->pages, M_DRM); 1225 obj->pages = NULL; 1226 } 1227 1228 static int 1229 i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) 1230 { 1231 struct drm_device *dev; 1232 vm_object_t vm_obj; 1233 int page_count, i, j; 1234 struct vm_page *page; 1235 1236 dev = obj->base.dev; 1237 KASSERT(obj->pages == NULL, ("Obj already has pages")); 1238 page_count = obj->base.size / PAGE_SIZE; 1239 obj->pages = kmalloc(page_count * sizeof(vm_page_t), M_DRM, 1240 M_WAITOK); 1241 1242 vm_obj = obj->base.vm_obj; 1243 VM_OBJECT_LOCK(vm_obj); 1244 1245 for (i = 0; i < page_count; i++) { 1246 page = shmem_read_mapping_page(vm_obj, i); 1247 if (IS_ERR(page)) 1248 goto err_pages; 1249 1250 obj->pages[i] = page; 1251 } 1252 1253 VM_OBJECT_UNLOCK(vm_obj); 1254 if (i915_gem_object_needs_bit17_swizzle(obj)) 1255 i915_gem_object_do_bit_17_swizzle(obj); 1256 1257 return 0; 1258 1259 err_pages: 1260 for (j = 0; j < i; j++) { 1261 page = obj->pages[j]; 1262 vm_page_busy_wait(page, FALSE, "i915gem"); 1263 vm_page_unwire(page, 0); 1264 vm_page_wakeup(page); 1265 } 1266 VM_OBJECT_UNLOCK(vm_obj); 1267 drm_free(obj->pages, M_DRM); 1268 obj->pages = NULL; 1269 return (-EIO); 1270 } 1271 1272 void 1273 i915_gem_object_move_to_active(struct drm_i915_gem_object *obj, 1274 struct intel_ring_buffer *ring) 1275 { 1276 struct drm_device *dev = obj->base.dev; 1277 struct drm_i915_private *dev_priv = dev->dev_private; 1278 u32 seqno = intel_ring_get_seqno(ring); 1279 1280 BUG_ON(ring == NULL); 1281 obj->ring = ring; 1282 1283 /* Add a reference if we're newly entering the active list. */ 1284 if (!obj->active) { 1285 drm_gem_object_reference(&obj->base); 1286 obj->active = 1; 1287 } 1288 1289 /* Move from whatever list we were on to the tail of execution. */ 1290 list_move_tail(&obj->mm_list, &dev_priv->mm.active_list); 1291 list_move_tail(&obj->ring_list, &ring->active_list); 1292 1293 obj->last_read_seqno = seqno; 1294 1295 if (obj->fenced_gpu_access) { 1296 obj->last_fenced_seqno = seqno; 1297 1298 /* Bump MRU to take account of the delayed flush */ 1299 if (obj->fence_reg != I915_FENCE_REG_NONE) { 1300 struct drm_i915_fence_reg *reg; 1301 1302 reg = &dev_priv->fence_regs[obj->fence_reg]; 1303 list_move_tail(®->lru_list, 1304 &dev_priv->mm.fence_list); 1305 } 1306 } 1307 } 1308 1309 static void 1310 i915_gem_object_move_to_inactive(struct drm_i915_gem_object *obj) 1311 { 1312 struct drm_device *dev = obj->base.dev; 1313 struct drm_i915_private *dev_priv = dev->dev_private; 1314 1315 BUG_ON(obj->base.write_domain & ~I915_GEM_GPU_DOMAINS); 1316 BUG_ON(!obj->active); 1317 1318 list_move_tail(&obj->mm_list, &dev_priv->mm.inactive_list); 1319 1320 list_del_init(&obj->ring_list); 1321 obj->ring = NULL; 1322 1323 obj->last_read_seqno = 0; 1324 obj->last_write_seqno = 0; 1325 obj->base.write_domain = 0; 1326 1327 obj->last_fenced_seqno = 0; 1328 obj->fenced_gpu_access = false; 1329 1330 obj->active = 0; 1331 drm_gem_object_unreference(&obj->base); 1332 1333 WARN_ON(i915_verify_lists(dev)); 1334 } 1335 1336 static int 1337 i915_gem_handle_seqno_wrap(struct drm_device *dev) 1338 { 1339 struct drm_i915_private *dev_priv = dev->dev_private; 1340 struct intel_ring_buffer *ring; 1341 int ret, i, j; 1342 1343 /* The hardware uses various monotonic 32-bit counters, if we 1344 * detect that they will wraparound we need to idle the GPU 1345 * and reset those counters. 1346 */ 1347 ret = 0; 1348 for_each_ring(ring, dev_priv, i) { 1349 for (j = 0; j < ARRAY_SIZE(ring->sync_seqno); j++) 1350 ret |= ring->sync_seqno[j] != 0; 1351 } 1352 if (ret == 0) 1353 return ret; 1354 1355 ret = i915_gpu_idle(dev); 1356 if (ret) 1357 return ret; 1358 1359 i915_gem_retire_requests(dev); 1360 for_each_ring(ring, dev_priv, i) { 1361 for (j = 0; j < ARRAY_SIZE(ring->sync_seqno); j++) 1362 ring->sync_seqno[j] = 0; 1363 } 1364 1365 return 0; 1366 } 1367 1368 int 1369 i915_gem_get_seqno(struct drm_device *dev, u32 *seqno) 1370 { 1371 struct drm_i915_private *dev_priv = dev->dev_private; 1372 1373 /* reserve 0 for non-seqno */ 1374 if (dev_priv->next_seqno == 0) { 1375 int ret = i915_gem_handle_seqno_wrap(dev); 1376 if (ret) 1377 return ret; 1378 1379 dev_priv->next_seqno = 1; 1380 } 1381 1382 *seqno = dev_priv->next_seqno++; 1383 return 0; 1384 } 1385 1386 int 1387 i915_add_request(struct intel_ring_buffer *ring, 1388 struct drm_file *file, 1389 u32 *out_seqno) 1390 { 1391 drm_i915_private_t *dev_priv = ring->dev->dev_private; 1392 struct drm_i915_gem_request *request; 1393 u32 request_ring_position; 1394 int was_empty; 1395 int ret; 1396 1397 /* 1398 * Emit any outstanding flushes - execbuf can fail to emit the flush 1399 * after having emitted the batchbuffer command. Hence we need to fix 1400 * things up similar to emitting the lazy request. The difference here 1401 * is that the flush _must_ happen before the next request, no matter 1402 * what. 1403 */ 1404 ret = intel_ring_flush_all_caches(ring); 1405 if (ret) 1406 return ret; 1407 1408 request = kmalloc(sizeof(*request), M_DRM, M_WAITOK | M_ZERO); 1409 if (request == NULL) 1410 return -ENOMEM; 1411 1412 1413 /* Record the position of the start of the request so that 1414 * should we detect the updated seqno part-way through the 1415 * GPU processing the request, we never over-estimate the 1416 * position of the head. 1417 */ 1418 request_ring_position = intel_ring_get_tail(ring); 1419 1420 ret = ring->add_request(ring); 1421 if (ret) { 1422 kfree(request, M_DRM); 1423 return ret; 1424 } 1425 1426 request->seqno = intel_ring_get_seqno(ring); 1427 request->ring = ring; 1428 request->tail = request_ring_position; 1429 request->emitted_jiffies = jiffies; 1430 was_empty = list_empty(&ring->request_list); 1431 list_add_tail(&request->list, &ring->request_list); 1432 request->file_priv = NULL; 1433 1434 if (file) { 1435 struct drm_i915_file_private *file_priv = file->driver_priv; 1436 1437 spin_lock(&file_priv->mm.lock); 1438 request->file_priv = file_priv; 1439 list_add_tail(&request->client_list, 1440 &file_priv->mm.request_list); 1441 spin_unlock(&file_priv->mm.lock); 1442 } 1443 1444 ring->outstanding_lazy_request = 0; 1445 1446 if (!dev_priv->mm.suspended) { 1447 if (i915_enable_hangcheck) { 1448 mod_timer(&dev_priv->hangcheck_timer, 1449 round_jiffies_up(jiffies + DRM_I915_HANGCHECK_JIFFIES)); 1450 } 1451 if (was_empty) { 1452 queue_delayed_work(dev_priv->wq, 1453 &dev_priv->mm.retire_work, 1454 round_jiffies_up_relative(hz)); 1455 intel_mark_busy(dev_priv->dev); 1456 } 1457 } 1458 1459 if (out_seqno) 1460 *out_seqno = request->seqno; 1461 return 0; 1462 } 1463 1464 static inline void 1465 i915_gem_request_remove_from_client(struct drm_i915_gem_request *request) 1466 { 1467 struct drm_i915_file_private *file_priv = request->file_priv; 1468 1469 if (!file_priv) 1470 return; 1471 1472 spin_lock(&file_priv->mm.lock); 1473 if (request->file_priv) { 1474 list_del(&request->client_list); 1475 request->file_priv = NULL; 1476 } 1477 spin_unlock(&file_priv->mm.lock); 1478 } 1479 1480 static void i915_gem_reset_ring_lists(struct drm_i915_private *dev_priv, 1481 struct intel_ring_buffer *ring) 1482 { 1483 while (!list_empty(&ring->request_list)) { 1484 struct drm_i915_gem_request *request; 1485 1486 request = list_first_entry(&ring->request_list, 1487 struct drm_i915_gem_request, 1488 list); 1489 1490 list_del(&request->list); 1491 i915_gem_request_remove_from_client(request); 1492 drm_free(request, M_DRM); 1493 } 1494 1495 while (!list_empty(&ring->active_list)) { 1496 struct drm_i915_gem_object *obj; 1497 1498 obj = list_first_entry(&ring->active_list, 1499 struct drm_i915_gem_object, 1500 ring_list); 1501 1502 i915_gem_object_move_to_inactive(obj); 1503 } 1504 } 1505 1506 static void i915_gem_reset_fences(struct drm_device *dev) 1507 { 1508 struct drm_i915_private *dev_priv = dev->dev_private; 1509 int i; 1510 1511 for (i = 0; i < dev_priv->num_fence_regs; i++) { 1512 struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i]; 1513 1514 i915_gem_write_fence(dev, i, NULL); 1515 1516 if (reg->obj) 1517 i915_gem_object_fence_lost(reg->obj); 1518 1519 reg->pin_count = 0; 1520 reg->obj = NULL; 1521 INIT_LIST_HEAD(®->lru_list); 1522 } 1523 1524 INIT_LIST_HEAD(&dev_priv->mm.fence_list); 1525 } 1526 1527 void i915_gem_reset(struct drm_device *dev) 1528 { 1529 struct drm_i915_private *dev_priv = dev->dev_private; 1530 struct drm_i915_gem_object *obj; 1531 struct intel_ring_buffer *ring; 1532 int i; 1533 1534 for_each_ring(ring, dev_priv, i) 1535 i915_gem_reset_ring_lists(dev_priv, ring); 1536 1537 /* Move everything out of the GPU domains to ensure we do any 1538 * necessary invalidation upon reuse. 1539 */ 1540 list_for_each_entry(obj, 1541 &dev_priv->mm.inactive_list, 1542 mm_list) 1543 { 1544 obj->base.read_domains &= ~I915_GEM_GPU_DOMAINS; 1545 } 1546 1547 /* The fence registers are invalidated so clear them out */ 1548 i915_gem_reset_fences(dev); 1549 } 1550 1551 /** 1552 * This function clears the request list as sequence numbers are passed. 1553 */ 1554 void 1555 i915_gem_retire_requests_ring(struct intel_ring_buffer *ring) 1556 { 1557 uint32_t seqno; 1558 1559 if (list_empty(&ring->request_list)) 1560 return; 1561 1562 WARN_ON(i915_verify_lists(ring->dev)); 1563 1564 seqno = ring->get_seqno(ring, true); 1565 1566 while (!list_empty(&ring->request_list)) { 1567 struct drm_i915_gem_request *request; 1568 1569 request = list_first_entry(&ring->request_list, 1570 struct drm_i915_gem_request, 1571 list); 1572 1573 if (!i915_seqno_passed(seqno, request->seqno)) 1574 break; 1575 1576 /* We know the GPU must have read the request to have 1577 * sent us the seqno + interrupt, so use the position 1578 * of tail of the request to update the last known position 1579 * of the GPU head. 1580 */ 1581 ring->last_retired_head = request->tail; 1582 1583 list_del(&request->list); 1584 i915_gem_request_remove_from_client(request); 1585 kfree(request, M_DRM); 1586 } 1587 1588 /* Move any buffers on the active list that are no longer referenced 1589 * by the ringbuffer to the flushing/inactive lists as appropriate. 1590 */ 1591 while (!list_empty(&ring->active_list)) { 1592 struct drm_i915_gem_object *obj; 1593 1594 obj = list_first_entry(&ring->active_list, 1595 struct drm_i915_gem_object, 1596 ring_list); 1597 1598 if (!i915_seqno_passed(seqno, obj->last_read_seqno)) 1599 break; 1600 1601 i915_gem_object_move_to_inactive(obj); 1602 } 1603 1604 if (unlikely(ring->trace_irq_seqno && 1605 i915_seqno_passed(seqno, ring->trace_irq_seqno))) { 1606 ring->irq_put(ring); 1607 ring->trace_irq_seqno = 0; 1608 } 1609 1610 } 1611 1612 void 1613 i915_gem_retire_requests(struct drm_device *dev) 1614 { 1615 drm_i915_private_t *dev_priv = dev->dev_private; 1616 struct intel_ring_buffer *ring; 1617 int i; 1618 1619 for_each_ring(ring, dev_priv, i) 1620 i915_gem_retire_requests_ring(ring); 1621 } 1622 1623 static void 1624 i915_gem_retire_work_handler(struct work_struct *work) 1625 { 1626 drm_i915_private_t *dev_priv; 1627 struct drm_device *dev; 1628 struct intel_ring_buffer *ring; 1629 bool idle; 1630 int i; 1631 1632 dev_priv = container_of(work, drm_i915_private_t, 1633 mm.retire_work.work); 1634 dev = dev_priv->dev; 1635 1636 /* Come back later if the device is busy... */ 1637 if (lockmgr(&dev->dev_struct_lock, LK_EXCLUSIVE|LK_NOWAIT)) { 1638 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 1639 round_jiffies_up_relative(hz)); 1640 return; 1641 } 1642 1643 i915_gem_retire_requests(dev); 1644 1645 /* Send a periodic flush down the ring so we don't hold onto GEM 1646 * objects indefinitely. 1647 */ 1648 idle = true; 1649 for_each_ring(ring, dev_priv, i) { 1650 if (ring->gpu_caches_dirty) 1651 i915_add_request(ring, NULL, NULL); 1652 1653 idle &= list_empty(&ring->request_list); 1654 } 1655 1656 if (!dev_priv->mm.suspended && !idle) 1657 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 1658 round_jiffies_up_relative(hz)); 1659 if (idle) 1660 intel_mark_idle(dev); 1661 1662 DRM_UNLOCK(dev); 1663 } 1664 /** 1665 * Ensures that an object will eventually get non-busy by flushing any required 1666 * write domains, emitting any outstanding lazy request and retiring and 1667 * completed requests. 1668 */ 1669 static int 1670 i915_gem_object_flush_active(struct drm_i915_gem_object *obj) 1671 { 1672 int ret; 1673 1674 if (obj->active) { 1675 ret = i915_gem_check_olr(obj->ring, obj->last_read_seqno); 1676 if (ret) 1677 return ret; 1678 1679 i915_gem_retire_requests_ring(obj->ring); 1680 } 1681 1682 return 0; 1683 } 1684 1685 /** 1686 * i915_gem_wait_ioctl - implements DRM_IOCTL_I915_GEM_WAIT 1687 * @DRM_IOCTL_ARGS: standard ioctl arguments 1688 * 1689 * Returns 0 if successful, else an error is returned with the remaining time in 1690 * the timeout parameter. 1691 * -ETIME: object is still busy after timeout 1692 * -ERESTARTSYS: signal interrupted the wait 1693 * -ENONENT: object doesn't exist 1694 * Also possible, but rare: 1695 * -EAGAIN: GPU wedged 1696 * -ENOMEM: damn 1697 * -ENODEV: Internal IRQ fail 1698 * -E?: The add request failed 1699 * 1700 * The wait ioctl with a timeout of 0 reimplements the busy ioctl. With any 1701 * non-zero timeout parameter the wait ioctl will wait for the given number of 1702 * nanoseconds on an object becoming unbusy. Since the wait itself does so 1703 * without holding struct_mutex the object may become re-busied before this 1704 * function completes. A similar but shorter * race condition exists in the busy 1705 * ioctl 1706 */ 1707 int 1708 i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) 1709 { 1710 struct drm_i915_gem_wait *args = data; 1711 struct drm_i915_gem_object *obj; 1712 struct intel_ring_buffer *ring = NULL; 1713 struct timespec timeout_stack, *timeout = NULL; 1714 u32 seqno = 0; 1715 int ret = 0; 1716 1717 if (args->timeout_ns >= 0) { 1718 timeout_stack = ns_to_timespec(args->timeout_ns); 1719 timeout = &timeout_stack; 1720 } 1721 1722 ret = i915_mutex_lock_interruptible(dev); 1723 if (ret) 1724 return ret; 1725 1726 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->bo_handle)); 1727 if (&obj->base == NULL) { 1728 DRM_UNLOCK(dev); 1729 return -ENOENT; 1730 } 1731 1732 /* Need to make sure the object gets inactive eventually. */ 1733 ret = i915_gem_object_flush_active(obj); 1734 if (ret) 1735 goto out; 1736 1737 if (obj->active) { 1738 seqno = obj->last_read_seqno; 1739 ring = obj->ring; 1740 } 1741 1742 if (seqno == 0) 1743 goto out; 1744 1745 /* Do this after OLR check to make sure we make forward progress polling 1746 * on this IOCTL with a 0 timeout (like busy ioctl) 1747 */ 1748 if (!args->timeout_ns) { 1749 ret = -ETIMEDOUT; 1750 goto out; 1751 } 1752 1753 drm_gem_object_unreference(&obj->base); 1754 DRM_UNLOCK(dev); 1755 1756 ret = __wait_seqno(ring, seqno, true, timeout); 1757 if (timeout) { 1758 WARN_ON(!timespec_valid(timeout)); 1759 args->timeout_ns = timespec_to_ns(timeout); 1760 } 1761 return ret; 1762 1763 out: 1764 drm_gem_object_unreference(&obj->base); 1765 DRM_UNLOCK(dev); 1766 return ret; 1767 } 1768 1769 /** 1770 * i915_gem_object_sync - sync an object to a ring. 1771 * 1772 * @obj: object which may be in use on another ring. 1773 * @to: ring we wish to use the object on. May be NULL. 1774 * 1775 * This code is meant to abstract object synchronization with the GPU. 1776 * Calling with NULL implies synchronizing the object with the CPU 1777 * rather than a particular GPU ring. 1778 * 1779 * Returns 0 if successful, else propagates up the lower layer error. 1780 */ 1781 int 1782 i915_gem_object_sync(struct drm_i915_gem_object *obj, 1783 struct intel_ring_buffer *to) 1784 { 1785 struct intel_ring_buffer *from = obj->ring; 1786 u32 seqno; 1787 int ret, idx; 1788 1789 if (from == NULL || to == from) 1790 return 0; 1791 1792 if (to == NULL || !i915_semaphore_is_enabled(obj->base.dev)) 1793 return i915_gem_object_wait_rendering(obj, false); 1794 1795 idx = intel_ring_sync_index(from, to); 1796 1797 seqno = obj->last_read_seqno; 1798 if (seqno <= from->sync_seqno[idx]) 1799 return 0; 1800 1801 ret = i915_gem_check_olr(obj->ring, seqno); 1802 if (ret) 1803 return ret; 1804 1805 ret = to->sync_to(to, from, seqno); 1806 if (!ret) 1807 /* We use last_read_seqno because sync_to() 1808 * might have just caused seqno wrap under 1809 * the radar. 1810 */ 1811 from->sync_seqno[idx] = obj->last_read_seqno; 1812 1813 return ret; 1814 } 1815 1816 static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj) 1817 { 1818 u32 old_write_domain, old_read_domains; 1819 1820 /* Act a barrier for all accesses through the GTT */ 1821 cpu_mfence(); 1822 1823 /* Force a pagefault for domain tracking on next user access */ 1824 i915_gem_release_mmap(obj); 1825 1826 if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0) 1827 return; 1828 1829 old_read_domains = obj->base.read_domains; 1830 old_write_domain = obj->base.write_domain; 1831 1832 obj->base.read_domains &= ~I915_GEM_DOMAIN_GTT; 1833 obj->base.write_domain &= ~I915_GEM_DOMAIN_GTT; 1834 1835 } 1836 1837 /** 1838 * Unbinds an object from the GTT aperture. 1839 */ 1840 int 1841 i915_gem_object_unbind(struct drm_i915_gem_object *obj) 1842 { 1843 drm_i915_private_t *dev_priv = obj->base.dev->dev_private; 1844 int ret = 0; 1845 1846 if (obj->gtt_space == NULL) 1847 return 0; 1848 1849 if (obj->pin_count) 1850 return -EBUSY; 1851 1852 BUG_ON(obj->pages == NULL); 1853 1854 ret = i915_gem_object_finish_gpu(obj); 1855 if (ret) 1856 return ret; 1857 /* Continue on if we fail due to EIO, the GPU is hung so we 1858 * should be safe and we need to cleanup or else we might 1859 * cause memory corruption through use-after-free. 1860 */ 1861 1862 i915_gem_object_finish_gtt(obj); 1863 1864 /* Move the object to the CPU domain to ensure that 1865 * any possible CPU writes while it's not in the GTT 1866 * are flushed when we go to remap it. 1867 */ 1868 if (ret == 0) 1869 ret = i915_gem_object_set_to_cpu_domain(obj, 1); 1870 if (ret == -ERESTARTSYS) 1871 return ret; 1872 if (ret) { 1873 /* In the event of a disaster, abandon all caches and 1874 * hope for the best. 1875 */ 1876 i915_gem_clflush_object(obj); 1877 obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU; 1878 } 1879 1880 /* release the fence reg _after_ flushing */ 1881 ret = i915_gem_object_put_fence(obj); 1882 if (ret) 1883 return ret; 1884 1885 if (obj->has_global_gtt_mapping) 1886 i915_gem_gtt_unbind_object(obj); 1887 if (obj->has_aliasing_ppgtt_mapping) { 1888 i915_ppgtt_unbind_object(dev_priv->mm.aliasing_ppgtt, obj); 1889 obj->has_aliasing_ppgtt_mapping = 0; 1890 } 1891 i915_gem_gtt_finish_object(obj); 1892 1893 i915_gem_object_put_pages_gtt(obj); 1894 1895 list_del_init(&obj->gtt_list); 1896 list_del_init(&obj->mm_list); 1897 /* Avoid an unnecessary call to unbind on rebind. */ 1898 obj->map_and_fenceable = true; 1899 1900 drm_mm_put_block(obj->gtt_space); 1901 obj->gtt_space = NULL; 1902 obj->gtt_offset = 0; 1903 1904 if (i915_gem_object_is_purgeable(obj)) 1905 i915_gem_object_truncate(obj); 1906 1907 return ret; 1908 } 1909 1910 int i915_gpu_idle(struct drm_device *dev) 1911 { 1912 drm_i915_private_t *dev_priv = dev->dev_private; 1913 struct intel_ring_buffer *ring; 1914 int ret, i; 1915 1916 /* Flush everything onto the inactive list. */ 1917 for_each_ring(ring, dev_priv, i) { 1918 ret = i915_switch_context(ring, NULL, DEFAULT_CONTEXT_ID); 1919 if (ret) 1920 return ret; 1921 1922 ret = intel_ring_idle(ring); 1923 if (ret) 1924 return ret; 1925 } 1926 1927 return 0; 1928 } 1929 1930 static void sandybridge_write_fence_reg(struct drm_device *dev, int reg, 1931 struct drm_i915_gem_object *obj) 1932 { 1933 drm_i915_private_t *dev_priv = dev->dev_private; 1934 uint64_t val; 1935 1936 if (obj) { 1937 u32 size = obj->gtt_space->size; 1938 1939 val = (uint64_t)((obj->gtt_offset + size - 4096) & 1940 0xfffff000) << 32; 1941 val |= obj->gtt_offset & 0xfffff000; 1942 val |= (uint64_t)((obj->stride / 128) - 1) << 1943 SANDYBRIDGE_FENCE_PITCH_SHIFT; 1944 1945 if (obj->tiling_mode == I915_TILING_Y) 1946 val |= 1 << I965_FENCE_TILING_Y_SHIFT; 1947 val |= I965_FENCE_REG_VALID; 1948 } else 1949 val = 0; 1950 1951 I915_WRITE64(FENCE_REG_SANDYBRIDGE_0 + reg * 8, val); 1952 POSTING_READ(FENCE_REG_SANDYBRIDGE_0 + reg * 8); 1953 } 1954 1955 static void i965_write_fence_reg(struct drm_device *dev, int reg, 1956 struct drm_i915_gem_object *obj) 1957 { 1958 drm_i915_private_t *dev_priv = dev->dev_private; 1959 uint64_t val; 1960 1961 if (obj) { 1962 u32 size = obj->gtt_space->size; 1963 1964 val = (uint64_t)((obj->gtt_offset + size - 4096) & 1965 0xfffff000) << 32; 1966 val |= obj->gtt_offset & 0xfffff000; 1967 val |= ((obj->stride / 128) - 1) << I965_FENCE_PITCH_SHIFT; 1968 if (obj->tiling_mode == I915_TILING_Y) 1969 val |= 1 << I965_FENCE_TILING_Y_SHIFT; 1970 val |= I965_FENCE_REG_VALID; 1971 } else 1972 val = 0; 1973 1974 I915_WRITE64(FENCE_REG_965_0 + reg * 8, val); 1975 POSTING_READ(FENCE_REG_965_0 + reg * 8); 1976 } 1977 1978 static void i915_write_fence_reg(struct drm_device *dev, int reg, 1979 struct drm_i915_gem_object *obj) 1980 { 1981 drm_i915_private_t *dev_priv = dev->dev_private; 1982 u32 val; 1983 1984 if (obj) { 1985 u32 size = obj->gtt_space->size; 1986 int pitch_val; 1987 int tile_width; 1988 1989 WARN((obj->gtt_offset & ~I915_FENCE_START_MASK) || 1990 (size & -size) != size || 1991 (obj->gtt_offset & (size - 1)), 1992 "object 0x%08x [fenceable? %d] not 1M or pot-size (0x%08x) aligned\n", 1993 obj->gtt_offset, obj->map_and_fenceable, size); 1994 1995 if (obj->tiling_mode == I915_TILING_Y && HAS_128_BYTE_Y_TILING(dev)) 1996 tile_width = 128; 1997 else 1998 tile_width = 512; 1999 2000 /* Note: pitch better be a power of two tile widths */ 2001 pitch_val = obj->stride / tile_width; 2002 pitch_val = ffs(pitch_val) - 1; 2003 2004 val = obj->gtt_offset; 2005 if (obj->tiling_mode == I915_TILING_Y) 2006 val |= 1 << I830_FENCE_TILING_Y_SHIFT; 2007 val |= I915_FENCE_SIZE_BITS(size); 2008 val |= pitch_val << I830_FENCE_PITCH_SHIFT; 2009 val |= I830_FENCE_REG_VALID; 2010 } else 2011 val = 0; 2012 2013 if (reg < 8) 2014 reg = FENCE_REG_830_0 + reg * 4; 2015 else 2016 reg = FENCE_REG_945_8 + (reg - 8) * 4; 2017 2018 I915_WRITE(reg, val); 2019 POSTING_READ(reg); 2020 } 2021 2022 static void i830_write_fence_reg(struct drm_device *dev, int reg, 2023 struct drm_i915_gem_object *obj) 2024 { 2025 drm_i915_private_t *dev_priv = dev->dev_private; 2026 uint32_t val; 2027 2028 if (obj) { 2029 u32 size = obj->gtt_space->size; 2030 uint32_t pitch_val; 2031 2032 WARN((obj->gtt_offset & ~I830_FENCE_START_MASK) || 2033 (size & -size) != size || 2034 (obj->gtt_offset & (size - 1)), 2035 "object 0x%08x not 512K or pot-size 0x%08x aligned\n", 2036 obj->gtt_offset, size); 2037 2038 pitch_val = obj->stride / 128; 2039 pitch_val = ffs(pitch_val) - 1; 2040 2041 val = obj->gtt_offset; 2042 if (obj->tiling_mode == I915_TILING_Y) 2043 val |= 1 << I830_FENCE_TILING_Y_SHIFT; 2044 val |= I830_FENCE_SIZE_BITS(size); 2045 val |= pitch_val << I830_FENCE_PITCH_SHIFT; 2046 val |= I830_FENCE_REG_VALID; 2047 } else 2048 val = 0; 2049 2050 I915_WRITE(FENCE_REG_830_0 + reg * 4, val); 2051 POSTING_READ(FENCE_REG_830_0 + reg * 4); 2052 } 2053 2054 static void i915_gem_write_fence(struct drm_device *dev, int reg, 2055 struct drm_i915_gem_object *obj) 2056 { 2057 switch (INTEL_INFO(dev)->gen) { 2058 case 7: 2059 case 6: sandybridge_write_fence_reg(dev, reg, obj); break; 2060 case 5: 2061 case 4: i965_write_fence_reg(dev, reg, obj); break; 2062 case 3: i915_write_fence_reg(dev, reg, obj); break; 2063 case 2: i830_write_fence_reg(dev, reg, obj); break; 2064 default: break; 2065 } 2066 } 2067 2068 static inline int fence_number(struct drm_i915_private *dev_priv, 2069 struct drm_i915_fence_reg *fence) 2070 { 2071 return fence - dev_priv->fence_regs; 2072 } 2073 2074 static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj, 2075 struct drm_i915_fence_reg *fence, 2076 bool enable) 2077 { 2078 struct drm_device *dev = obj->base.dev; 2079 struct drm_i915_private *dev_priv = dev->dev_private; 2080 int fence_reg = fence_number(dev_priv, fence); 2081 2082 /* In order to fully serialize access to the fenced region and 2083 * the update to the fence register we need to take extreme 2084 * measures on SNB+. In theory, the write to the fence register 2085 * flushes all memory transactions before, and coupled with the 2086 * mb() placed around the register write we serialise all memory 2087 * operations with respect to the changes in the tiler. Yet, on 2088 * SNB+ we need to take a step further and emit an explicit wbinvd() 2089 * on each processor in order to manually flush all memory 2090 * transactions before updating the fence register. 2091 */ 2092 if (HAS_LLC(obj->base.dev)) 2093 cpu_wbinvd_on_all_cpus(); 2094 i915_gem_write_fence(dev, fence_reg, enable ? obj : NULL); 2095 2096 if (enable) { 2097 obj->fence_reg = fence_reg; 2098 fence->obj = obj; 2099 list_move_tail(&fence->lru_list, &dev_priv->mm.fence_list); 2100 } else { 2101 obj->fence_reg = I915_FENCE_REG_NONE; 2102 fence->obj = NULL; 2103 list_del_init(&fence->lru_list); 2104 } 2105 } 2106 2107 static int 2108 i915_gem_object_flush_fence(struct drm_i915_gem_object *obj) 2109 { 2110 if (obj->last_fenced_seqno) { 2111 int ret = i915_wait_seqno(obj->ring, obj->last_fenced_seqno); 2112 if (ret) 2113 return ret; 2114 2115 obj->last_fenced_seqno = 0; 2116 } 2117 2118 /* Ensure that all CPU reads are completed before installing a fence 2119 * and all writes before removing the fence. 2120 */ 2121 if (obj->base.read_domains & I915_GEM_DOMAIN_GTT) 2122 cpu_mfence(); 2123 2124 obj->fenced_gpu_access = false; 2125 return 0; 2126 } 2127 2128 int 2129 i915_gem_object_put_fence(struct drm_i915_gem_object *obj) 2130 { 2131 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 2132 int ret; 2133 2134 ret = i915_gem_object_flush_fence(obj); 2135 if (ret) 2136 return ret; 2137 2138 if (obj->fence_reg == I915_FENCE_REG_NONE) 2139 return 0; 2140 2141 i915_gem_object_update_fence(obj, 2142 &dev_priv->fence_regs[obj->fence_reg], 2143 false); 2144 i915_gem_object_fence_lost(obj); 2145 2146 return 0; 2147 } 2148 2149 static struct drm_i915_fence_reg * 2150 i915_find_fence_reg(struct drm_device *dev) 2151 { 2152 struct drm_i915_private *dev_priv = dev->dev_private; 2153 struct drm_i915_fence_reg *reg, *avail; 2154 int i; 2155 2156 /* First try to find a free reg */ 2157 avail = NULL; 2158 for (i = dev_priv->fence_reg_start; i < dev_priv->num_fence_regs; i++) { 2159 reg = &dev_priv->fence_regs[i]; 2160 if (!reg->obj) 2161 return reg; 2162 2163 if (!reg->pin_count) 2164 avail = reg; 2165 } 2166 2167 if (avail == NULL) 2168 return NULL; 2169 2170 /* None available, try to steal one or wait for a user to finish */ 2171 list_for_each_entry(reg, &dev_priv->mm.fence_list, lru_list) { 2172 if (reg->pin_count) 2173 continue; 2174 2175 return reg; 2176 } 2177 2178 return NULL; 2179 } 2180 2181 /** 2182 * i915_gem_object_get_fence - set up fencing for an object 2183 * @obj: object to map through a fence reg 2184 * 2185 * When mapping objects through the GTT, userspace wants to be able to write 2186 * to them without having to worry about swizzling if the object is tiled. 2187 * This function walks the fence regs looking for a free one for @obj, 2188 * stealing one if it can't find any. 2189 * 2190 * It then sets up the reg based on the object's properties: address, pitch 2191 * and tiling format. 2192 * 2193 * For an untiled surface, this removes any existing fence. 2194 */ 2195 int 2196 i915_gem_object_get_fence(struct drm_i915_gem_object *obj) 2197 { 2198 struct drm_device *dev = obj->base.dev; 2199 struct drm_i915_private *dev_priv = dev->dev_private; 2200 bool enable = obj->tiling_mode != I915_TILING_NONE; 2201 struct drm_i915_fence_reg *reg; 2202 int ret; 2203 2204 /* Have we updated the tiling parameters upon the object and so 2205 * will need to serialise the write to the associated fence register? 2206 */ 2207 if (obj->fence_dirty) { 2208 ret = i915_gem_object_flush_fence(obj); 2209 if (ret) 2210 return ret; 2211 } 2212 2213 /* Just update our place in the LRU if our fence is getting reused. */ 2214 if (obj->fence_reg != I915_FENCE_REG_NONE) { 2215 reg = &dev_priv->fence_regs[obj->fence_reg]; 2216 if (!obj->fence_dirty) { 2217 list_move_tail(®->lru_list, 2218 &dev_priv->mm.fence_list); 2219 return 0; 2220 } 2221 } else if (enable) { 2222 reg = i915_find_fence_reg(dev); 2223 if (reg == NULL) 2224 return -EDEADLK; 2225 2226 if (reg->obj) { 2227 struct drm_i915_gem_object *old = reg->obj; 2228 2229 ret = i915_gem_object_flush_fence(old); 2230 if (ret) 2231 return ret; 2232 2233 i915_gem_object_fence_lost(old); 2234 } 2235 } else 2236 return 0; 2237 2238 i915_gem_object_update_fence(obj, reg, enable); 2239 obj->fence_dirty = false; 2240 2241 return 0; 2242 } 2243 2244 static bool i915_gem_valid_gtt_space(struct drm_device *dev, 2245 struct drm_mm_node *gtt_space, 2246 unsigned long cache_level) 2247 { 2248 struct drm_mm_node *other; 2249 2250 /* On non-LLC machines we have to be careful when putting differing 2251 * types of snoopable memory together to avoid the prefetcher 2252 * crossing memory domains and dieing. 2253 */ 2254 if (HAS_LLC(dev)) 2255 return true; 2256 2257 if (gtt_space == NULL) 2258 return true; 2259 2260 if (list_empty(>t_space->node_list)) 2261 return true; 2262 2263 other = list_entry(gtt_space->node_list.prev, struct drm_mm_node, node_list); 2264 if (other->allocated && !other->hole_follows && other->color != cache_level) 2265 return false; 2266 2267 other = list_entry(gtt_space->node_list.next, struct drm_mm_node, node_list); 2268 if (other->allocated && !gtt_space->hole_follows && other->color != cache_level) 2269 return false; 2270 2271 return true; 2272 } 2273 2274 static void i915_gem_verify_gtt(struct drm_device *dev) 2275 { 2276 #if WATCH_GTT 2277 struct drm_i915_private *dev_priv = dev->dev_private; 2278 struct drm_i915_gem_object *obj; 2279 int err = 0; 2280 2281 list_for_each_entry(obj, &dev_priv->mm.gtt_list, gtt_list) { 2282 if (obj->gtt_space == NULL) { 2283 printk(KERN_ERR "object found on GTT list with no space reserved\n"); 2284 err++; 2285 continue; 2286 } 2287 2288 if (obj->cache_level != obj->gtt_space->color) { 2289 printk(KERN_ERR "object reserved space [%08lx, %08lx] with wrong color, cache_level=%x, color=%lx\n", 2290 obj->gtt_space->start, 2291 obj->gtt_space->start + obj->gtt_space->size, 2292 obj->cache_level, 2293 obj->gtt_space->color); 2294 err++; 2295 continue; 2296 } 2297 2298 if (!i915_gem_valid_gtt_space(dev, 2299 obj->gtt_space, 2300 obj->cache_level)) { 2301 printk(KERN_ERR "invalid GTT space found at [%08lx, %08lx] - color=%x\n", 2302 obj->gtt_space->start, 2303 obj->gtt_space->start + obj->gtt_space->size, 2304 obj->cache_level); 2305 err++; 2306 continue; 2307 } 2308 } 2309 2310 WARN_ON(err); 2311 #endif 2312 } 2313 2314 /** 2315 * Finds free space in the GTT aperture and binds the object there. 2316 */ 2317 static int 2318 i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj, 2319 unsigned alignment, 2320 bool map_and_fenceable, 2321 bool nonblocking) 2322 { 2323 struct drm_device *dev = obj->base.dev; 2324 drm_i915_private_t *dev_priv = dev->dev_private; 2325 struct drm_mm_node *free_space; 2326 uint32_t size, fence_size, fence_alignment, unfenced_alignment; 2327 bool mappable, fenceable; 2328 int ret; 2329 2330 if (obj->madv != I915_MADV_WILLNEED) { 2331 DRM_ERROR("Attempting to bind a purgeable object\n"); 2332 return -EINVAL; 2333 } 2334 2335 fence_size = i915_gem_get_gtt_size(dev, obj->base.size, 2336 obj->tiling_mode); 2337 fence_alignment = i915_gem_get_gtt_alignment(dev, obj->base.size, 2338 obj->tiling_mode); 2339 unfenced_alignment = i915_gem_get_unfenced_gtt_alignment(dev, 2340 obj->base.size, obj->tiling_mode); 2341 if (alignment == 0) 2342 alignment = map_and_fenceable ? fence_alignment : 2343 unfenced_alignment; 2344 if (map_and_fenceable && (alignment & (fence_alignment - 1)) != 0) { 2345 DRM_ERROR("Invalid object alignment requested %u\n", alignment); 2346 return -EINVAL; 2347 } 2348 2349 size = map_and_fenceable ? fence_size : obj->base.size; 2350 2351 /* If the object is bigger than the entire aperture, reject it early 2352 * before evicting everything in a vain attempt to find space. 2353 */ 2354 if (obj->base.size > (map_and_fenceable ? 2355 dev_priv->mm.gtt_mappable_end : dev_priv->mm.gtt_total)) { 2356 DRM_ERROR( 2357 "Attempting to bind an object larger than the aperture\n"); 2358 return -E2BIG; 2359 } 2360 2361 search_free: 2362 if (map_and_fenceable) 2363 free_space = 2364 drm_mm_search_free_in_range_color(&dev_priv->mm.gtt_space, 2365 size, alignment, obj->cache_level, 2366 0, dev_priv->mm.gtt_mappable_end, 2367 false); 2368 else 2369 free_space = drm_mm_search_free_color(&dev_priv->mm.gtt_space, 2370 size, alignment, obj->cache_level, 2371 false); 2372 2373 if (free_space != NULL) { 2374 if (map_and_fenceable) 2375 obj->gtt_space = 2376 drm_mm_get_block_range_generic(free_space, 2377 size, alignment, obj->cache_level, 2378 0, dev_priv->mm.gtt_mappable_end, 2379 false); 2380 else 2381 obj->gtt_space = 2382 drm_mm_get_block_generic(free_space, 2383 size, alignment, obj->cache_level, 2384 false); 2385 } 2386 if (obj->gtt_space == NULL) { 2387 ret = i915_gem_evict_something(dev, size, alignment, 2388 obj->cache_level, 2389 map_and_fenceable, 2390 nonblocking); 2391 if (ret) 2392 return ret; 2393 2394 goto search_free; 2395 } 2396 2397 /* 2398 * NOTE: i915_gem_object_get_pages_gtt() cannot 2399 * return ENOMEM, since we used VM_ALLOC_RETRY. 2400 */ 2401 ret = i915_gem_object_get_pages_gtt(obj); 2402 if (ret != 0) { 2403 drm_mm_put_block(obj->gtt_space); 2404 obj->gtt_space = NULL; 2405 return ret; 2406 } 2407 2408 i915_gem_gtt_bind_object(obj, obj->cache_level); 2409 if (ret != 0) { 2410 i915_gem_object_put_pages_gtt(obj); 2411 drm_mm_put_block(obj->gtt_space); 2412 obj->gtt_space = NULL; 2413 if (i915_gem_evict_everything(dev)) 2414 return (ret); 2415 goto search_free; 2416 } 2417 2418 list_add_tail(&obj->gtt_list, &dev_priv->mm.bound_list); 2419 list_add_tail(&obj->mm_list, &dev_priv->mm.inactive_list); 2420 2421 obj->gtt_offset = obj->gtt_space->start; 2422 2423 fenceable = 2424 obj->gtt_space->size == fence_size && 2425 (obj->gtt_space->start & (fence_alignment - 1)) == 0; 2426 2427 mappable = 2428 obj->gtt_offset + obj->base.size <= dev_priv->mm.gtt_mappable_end; 2429 obj->map_and_fenceable = mappable && fenceable; 2430 2431 i915_gem_verify_gtt(dev); 2432 return 0; 2433 } 2434 2435 void 2436 i915_gem_clflush_object(struct drm_i915_gem_object *obj) 2437 { 2438 2439 /* If we don't have a page list set up, then we're not pinned 2440 * to GPU, and we can ignore the cache flush because it'll happen 2441 * again at bind time. 2442 */ 2443 if (obj->pages == NULL) 2444 return; 2445 2446 /* If the GPU is snooping the contents of the CPU cache, 2447 * we do not need to manually clear the CPU cache lines. However, 2448 * the caches are only snooped when the render cache is 2449 * flushed/invalidated. As we always have to emit invalidations 2450 * and flushes when moving into and out of the RENDER domain, correct 2451 * snooping behaviour occurs naturally as the result of our domain 2452 * tracking. 2453 */ 2454 if (obj->cache_level != I915_CACHE_NONE) 2455 return; 2456 2457 drm_clflush_pages(obj->pages, obj->base.size / PAGE_SIZE); 2458 } 2459 2460 /** Flushes the GTT write domain for the object if it's dirty. */ 2461 static void 2462 i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj) 2463 { 2464 uint32_t old_write_domain; 2465 2466 if (obj->base.write_domain != I915_GEM_DOMAIN_GTT) 2467 return; 2468 2469 /* No actual flushing is required for the GTT write domain. Writes 2470 * to it immediately go to main memory as far as we know, so there's 2471 * no chipset flush. It also doesn't land in render cache. 2472 * 2473 * However, we do have to enforce the order so that all writes through 2474 * the GTT land before any writes to the device, such as updates to 2475 * the GATT itself. 2476 */ 2477 cpu_sfence(); 2478 2479 old_write_domain = obj->base.write_domain; 2480 obj->base.write_domain = 0; 2481 } 2482 2483 /** Flushes the CPU write domain for the object if it's dirty. */ 2484 static void 2485 i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj) 2486 { 2487 uint32_t old_write_domain; 2488 2489 if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) 2490 return; 2491 2492 i915_gem_clflush_object(obj); 2493 i915_gem_chipset_flush(obj->base.dev); 2494 old_write_domain = obj->base.write_domain; 2495 obj->base.write_domain = 0; 2496 } 2497 2498 /** 2499 * Moves a single object to the GTT read, and possibly write domain. 2500 * 2501 * This function returns when the move is complete, including waiting on 2502 * flushes to occur. 2503 */ 2504 int 2505 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) 2506 { 2507 drm_i915_private_t *dev_priv = obj->base.dev->dev_private; 2508 uint32_t old_write_domain, old_read_domains; 2509 int ret; 2510 2511 /* Not valid to be called on unbound objects. */ 2512 if (obj->gtt_space == NULL) 2513 return -EINVAL; 2514 2515 if (obj->base.write_domain == I915_GEM_DOMAIN_GTT) 2516 return 0; 2517 2518 ret = i915_gem_object_wait_rendering(obj, !write); 2519 if (ret) 2520 return ret; 2521 2522 i915_gem_object_flush_cpu_write_domain(obj); 2523 2524 old_write_domain = obj->base.write_domain; 2525 old_read_domains = obj->base.read_domains; 2526 2527 /* It should now be out of any other write domains, and we can update 2528 * the domain values for our changes. 2529 */ 2530 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_GTT) != 0); 2531 obj->base.read_domains |= I915_GEM_DOMAIN_GTT; 2532 if (write) { 2533 obj->base.read_domains = I915_GEM_DOMAIN_GTT; 2534 obj->base.write_domain = I915_GEM_DOMAIN_GTT; 2535 obj->dirty = 1; 2536 } 2537 2538 /* And bump the LRU for this access */ 2539 if (i915_gem_object_is_inactive(obj)) 2540 list_move_tail(&obj->mm_list, &dev_priv->mm.inactive_list); 2541 2542 return 0; 2543 } 2544 2545 int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, 2546 enum i915_cache_level cache_level) 2547 { 2548 struct drm_device *dev = obj->base.dev; 2549 drm_i915_private_t *dev_priv = dev->dev_private; 2550 int ret; 2551 2552 if (obj->cache_level == cache_level) 2553 return 0; 2554 2555 if (obj->pin_count) { 2556 DRM_DEBUG("can not change the cache level of pinned objects\n"); 2557 return -EBUSY; 2558 } 2559 2560 if (!i915_gem_valid_gtt_space(dev, obj->gtt_space, cache_level)) { 2561 ret = i915_gem_object_unbind(obj); 2562 if (ret) 2563 return ret; 2564 } 2565 2566 if (obj->gtt_space) { 2567 ret = i915_gem_object_finish_gpu(obj); 2568 if (ret) 2569 return ret; 2570 2571 i915_gem_object_finish_gtt(obj); 2572 2573 /* Before SandyBridge, you could not use tiling or fence 2574 * registers with snooped memory, so relinquish any fences 2575 * currently pointing to our region in the aperture. 2576 */ 2577 if (INTEL_INFO(dev)->gen < 6) { 2578 ret = i915_gem_object_put_fence(obj); 2579 if (ret) 2580 return ret; 2581 } 2582 2583 if (obj->has_global_gtt_mapping) 2584 i915_gem_gtt_bind_object(obj, cache_level); 2585 if (obj->has_aliasing_ppgtt_mapping) 2586 i915_ppgtt_bind_object(dev_priv->mm.aliasing_ppgtt, 2587 obj, cache_level); 2588 2589 obj->gtt_space->color = cache_level; 2590 } 2591 2592 if (cache_level == I915_CACHE_NONE) { 2593 u32 old_read_domains, old_write_domain; 2594 2595 /* If we're coming from LLC cached, then we haven't 2596 * actually been tracking whether the data is in the 2597 * CPU cache or not, since we only allow one bit set 2598 * in obj->write_domain and have been skipping the clflushes. 2599 * Just set it to the CPU cache for now. 2600 */ 2601 WARN_ON(obj->base.write_domain & ~I915_GEM_DOMAIN_CPU); 2602 WARN_ON(obj->base.read_domains & ~I915_GEM_DOMAIN_CPU); 2603 2604 old_read_domains = obj->base.read_domains; 2605 old_write_domain = obj->base.write_domain; 2606 2607 obj->base.read_domains = I915_GEM_DOMAIN_CPU; 2608 obj->base.write_domain = I915_GEM_DOMAIN_CPU; 2609 2610 } 2611 2612 obj->cache_level = cache_level; 2613 i915_gem_verify_gtt(dev); 2614 return 0; 2615 } 2616 2617 int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data, 2618 struct drm_file *file) 2619 { 2620 struct drm_i915_gem_caching *args = data; 2621 struct drm_i915_gem_object *obj; 2622 int ret; 2623 2624 ret = i915_mutex_lock_interruptible(dev); 2625 if (ret) 2626 return ret; 2627 2628 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 2629 if (&obj->base == NULL) { 2630 ret = -ENOENT; 2631 goto unlock; 2632 } 2633 2634 args->caching = obj->cache_level != I915_CACHE_NONE; 2635 2636 drm_gem_object_unreference(&obj->base); 2637 unlock: 2638 DRM_UNLOCK(dev); 2639 return ret; 2640 } 2641 2642 int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data, 2643 struct drm_file *file) 2644 { 2645 struct drm_i915_gem_caching *args = data; 2646 struct drm_i915_gem_object *obj; 2647 enum i915_cache_level level; 2648 int ret; 2649 2650 switch (args->caching) { 2651 case I915_CACHING_NONE: 2652 level = I915_CACHE_NONE; 2653 break; 2654 case I915_CACHING_CACHED: 2655 level = I915_CACHE_LLC; 2656 break; 2657 default: 2658 return -EINVAL; 2659 } 2660 2661 ret = i915_mutex_lock_interruptible(dev); 2662 if (ret) 2663 return ret; 2664 2665 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 2666 if (&obj->base == NULL) { 2667 ret = -ENOENT; 2668 goto unlock; 2669 } 2670 2671 ret = i915_gem_object_set_cache_level(obj, level); 2672 2673 drm_gem_object_unreference(&obj->base); 2674 unlock: 2675 DRM_UNLOCK(dev); 2676 return ret; 2677 } 2678 2679 /* 2680 * Prepare buffer for display plane (scanout, cursors, etc). 2681 * Can be called from an uninterruptible phase (modesetting) and allows 2682 * any flushes to be pipelined (for pageflips). 2683 */ 2684 int 2685 i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, 2686 u32 alignment, 2687 struct intel_ring_buffer *pipelined) 2688 { 2689 u32 old_read_domains, old_write_domain; 2690 int ret; 2691 2692 if (pipelined != obj->ring) { 2693 ret = i915_gem_object_sync(obj, pipelined); 2694 if (ret) 2695 return ret; 2696 } 2697 2698 /* The display engine is not coherent with the LLC cache on gen6. As 2699 * a result, we make sure that the pinning that is about to occur is 2700 * done with uncached PTEs. This is lowest common denominator for all 2701 * chipsets. 2702 * 2703 * However for gen6+, we could do better by using the GFDT bit instead 2704 * of uncaching, which would allow us to flush all the LLC-cached data 2705 * with that bit in the PTE to main memory with just one PIPE_CONTROL. 2706 */ 2707 ret = i915_gem_object_set_cache_level(obj, I915_CACHE_NONE); 2708 if (ret) 2709 return ret; 2710 2711 /* As the user may map the buffer once pinned in the display plane 2712 * (e.g. libkms for the bootup splash), we have to ensure that we 2713 * always use map_and_fenceable for all scanout buffers. 2714 */ 2715 ret = i915_gem_object_pin(obj, alignment, true, false); 2716 if (ret) 2717 return ret; 2718 2719 i915_gem_object_flush_cpu_write_domain(obj); 2720 2721 old_write_domain = obj->base.write_domain; 2722 old_read_domains = obj->base.read_domains; 2723 2724 /* It should now be out of any other write domains, and we can update 2725 * the domain values for our changes. 2726 */ 2727 obj->base.write_domain = 0; 2728 obj->base.read_domains |= I915_GEM_DOMAIN_GTT; 2729 2730 return 0; 2731 } 2732 2733 int 2734 i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj) 2735 { 2736 int ret; 2737 2738 if ((obj->base.read_domains & I915_GEM_GPU_DOMAINS) == 0) 2739 return 0; 2740 2741 ret = i915_gem_object_wait_rendering(obj, false); 2742 if (ret) 2743 return ret; 2744 2745 /* Ensure that we invalidate the GPU's caches and TLBs. */ 2746 obj->base.read_domains &= ~I915_GEM_GPU_DOMAINS; 2747 return 0; 2748 } 2749 2750 /** 2751 * Moves a single object to the CPU read, and possibly write domain. 2752 * 2753 * This function returns when the move is complete, including waiting on 2754 * flushes to occur. 2755 */ 2756 int 2757 i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) 2758 { 2759 uint32_t old_write_domain, old_read_domains; 2760 int ret; 2761 2762 if (obj->base.write_domain == I915_GEM_DOMAIN_CPU) 2763 return 0; 2764 2765 ret = i915_gem_object_wait_rendering(obj, !write); 2766 if (ret) 2767 return ret; 2768 2769 i915_gem_object_flush_gtt_write_domain(obj); 2770 2771 old_write_domain = obj->base.write_domain; 2772 old_read_domains = obj->base.read_domains; 2773 2774 /* Flush the CPU cache if it's still invalid. */ 2775 if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) { 2776 i915_gem_clflush_object(obj); 2777 2778 obj->base.read_domains |= I915_GEM_DOMAIN_CPU; 2779 } 2780 2781 /* It should now be out of any other write domains, and we can update 2782 * the domain values for our changes. 2783 */ 2784 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_CPU) != 0); 2785 2786 /* If we're writing through the CPU, then the GPU read domains will 2787 * need to be invalidated at next use. 2788 */ 2789 if (write) { 2790 obj->base.read_domains = I915_GEM_DOMAIN_CPU; 2791 obj->base.write_domain = I915_GEM_DOMAIN_CPU; 2792 } 2793 2794 return 0; 2795 } 2796 2797 /* Throttle our rendering by waiting until the ring has completed our requests 2798 * emitted over 20 msec ago. 2799 * 2800 * Note that if we were to use the current jiffies each time around the loop, 2801 * we wouldn't escape the function with any frames outstanding if the time to 2802 * render a frame was over 20ms. 2803 * 2804 * This should get us reasonable parallelism between CPU and GPU but also 2805 * relatively low latency when blocking on a particular request to finish. 2806 */ 2807 static int 2808 i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file) 2809 { 2810 struct drm_i915_private *dev_priv = dev->dev_private; 2811 struct drm_i915_file_private *file_priv = file->driver_priv; 2812 unsigned long recent_enough = jiffies - msecs_to_jiffies(20); 2813 struct drm_i915_gem_request *request; 2814 struct intel_ring_buffer *ring = NULL; 2815 u32 seqno = 0; 2816 int ret; 2817 2818 if (atomic_read(&dev_priv->mm.wedged)) 2819 return -EIO; 2820 2821 spin_lock(&file_priv->mm.lock); 2822 list_for_each_entry(request, &file_priv->mm.request_list, client_list) { 2823 if (time_after_eq(request->emitted_jiffies, recent_enough)) 2824 break; 2825 2826 ring = request->ring; 2827 seqno = request->seqno; 2828 } 2829 spin_unlock(&file_priv->mm.lock); 2830 2831 if (seqno == 0) 2832 return 0; 2833 2834 ret = __wait_seqno(ring, seqno, true, NULL); 2835 2836 if (ret == 0) 2837 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 0); 2838 2839 return ret; 2840 } 2841 2842 int 2843 i915_gem_object_pin(struct drm_i915_gem_object *obj, 2844 uint32_t alignment, 2845 bool map_and_fenceable, 2846 bool nonblocking) 2847 { 2848 int ret; 2849 2850 if (WARN_ON(obj->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT)) 2851 return -EBUSY; 2852 2853 if (obj->gtt_space != NULL) { 2854 if ((alignment && obj->gtt_offset & (alignment - 1)) || 2855 (map_and_fenceable && !obj->map_and_fenceable)) { 2856 WARN(obj->pin_count, 2857 "bo is already pinned with incorrect alignment:" 2858 " offset=%x, req.alignment=%x, req.map_and_fenceable=%d," 2859 " obj->map_and_fenceable=%d\n", 2860 obj->gtt_offset, alignment, 2861 map_and_fenceable, 2862 obj->map_and_fenceable); 2863 ret = i915_gem_object_unbind(obj); 2864 if (ret) 2865 return ret; 2866 } 2867 } 2868 2869 if (obj->gtt_space == NULL) { 2870 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 2871 2872 ret = i915_gem_object_bind_to_gtt(obj, alignment, 2873 map_and_fenceable, 2874 nonblocking); 2875 if (ret) 2876 return ret; 2877 2878 if (!dev_priv->mm.aliasing_ppgtt) 2879 i915_gem_gtt_bind_object(obj, obj->cache_level); 2880 } 2881 2882 if (!obj->has_global_gtt_mapping && map_and_fenceable) 2883 i915_gem_gtt_bind_object(obj, obj->cache_level); 2884 2885 obj->pin_count++; 2886 obj->pin_mappable |= map_and_fenceable; 2887 2888 return 0; 2889 } 2890 2891 void 2892 i915_gem_object_unpin(struct drm_i915_gem_object *obj) 2893 { 2894 BUG_ON(obj->pin_count == 0); 2895 BUG_ON(obj->gtt_space == NULL); 2896 2897 if (--obj->pin_count == 0) 2898 obj->pin_mappable = false; 2899 } 2900 2901 int 2902 i915_gem_pin_ioctl(struct drm_device *dev, void *data, 2903 struct drm_file *file) 2904 { 2905 struct drm_i915_gem_pin *args = data; 2906 struct drm_i915_gem_object *obj; 2907 int ret; 2908 2909 ret = i915_mutex_lock_interruptible(dev); 2910 if (ret) 2911 return ret; 2912 2913 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 2914 if (&obj->base == NULL) { 2915 ret = -ENOENT; 2916 goto unlock; 2917 } 2918 2919 if (obj->madv != I915_MADV_WILLNEED) { 2920 DRM_ERROR("Attempting to pin a purgeable buffer\n"); 2921 ret = -EINVAL; 2922 goto out; 2923 } 2924 2925 if (obj->pin_filp != NULL && obj->pin_filp != file) { 2926 DRM_ERROR("Already pinned in i915_gem_pin_ioctl(): %d\n", 2927 args->handle); 2928 ret = -EINVAL; 2929 goto out; 2930 } 2931 2932 if (obj->user_pin_count == 0) { 2933 ret = i915_gem_object_pin(obj, args->alignment, true, false); 2934 if (ret) 2935 goto out; 2936 } 2937 2938 obj->user_pin_count++; 2939 obj->pin_filp = file; 2940 2941 /* XXX - flush the CPU caches for pinned objects 2942 * as the X server doesn't manage domains yet 2943 */ 2944 i915_gem_object_flush_cpu_write_domain(obj); 2945 args->offset = obj->gtt_offset; 2946 out: 2947 drm_gem_object_unreference(&obj->base); 2948 unlock: 2949 DRM_UNLOCK(dev); 2950 return ret; 2951 } 2952 2953 int 2954 i915_gem_unpin_ioctl(struct drm_device *dev, void *data, 2955 struct drm_file *file) 2956 { 2957 struct drm_i915_gem_pin *args = data; 2958 struct drm_i915_gem_object *obj; 2959 int ret; 2960 2961 ret = i915_mutex_lock_interruptible(dev); 2962 if (ret) 2963 return ret; 2964 2965 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 2966 if (&obj->base == NULL) { 2967 ret = -ENOENT; 2968 goto unlock; 2969 } 2970 2971 if (obj->pin_filp != file) { 2972 DRM_ERROR("Not pinned by caller in i915_gem_pin_ioctl(): %d\n", 2973 args->handle); 2974 ret = -EINVAL; 2975 goto out; 2976 } 2977 obj->user_pin_count--; 2978 if (obj->user_pin_count == 0) { 2979 obj->pin_filp = NULL; 2980 i915_gem_object_unpin(obj); 2981 } 2982 2983 out: 2984 drm_gem_object_unreference(&obj->base); 2985 unlock: 2986 DRM_UNLOCK(dev); 2987 return ret; 2988 } 2989 2990 int 2991 i915_gem_busy_ioctl(struct drm_device *dev, void *data, 2992 struct drm_file *file) 2993 { 2994 struct drm_i915_gem_busy *args = data; 2995 struct drm_i915_gem_object *obj; 2996 int ret; 2997 2998 ret = i915_mutex_lock_interruptible(dev); 2999 if (ret) 3000 return ret; 3001 3002 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 3003 if (&obj->base == NULL) { 3004 ret = -ENOENT; 3005 goto unlock; 3006 } 3007 3008 /* Count all active objects as busy, even if they are currently not used 3009 * by the gpu. Users of this interface expect objects to eventually 3010 * become non-busy without any further actions, therefore emit any 3011 * necessary flushes here. 3012 */ 3013 ret = i915_gem_object_flush_active(obj); 3014 3015 args->busy = obj->active; 3016 if (obj->ring) { 3017 args->busy |= intel_ring_flag(obj->ring) << 16; 3018 } 3019 3020 drm_gem_object_unreference(&obj->base); 3021 unlock: 3022 DRM_UNLOCK(dev); 3023 return ret; 3024 } 3025 3026 int 3027 i915_gem_throttle_ioctl(struct drm_device *dev, void *data, 3028 struct drm_file *file_priv) 3029 { 3030 return i915_gem_ring_throttle(dev, file_priv); 3031 } 3032 3033 int 3034 i915_gem_madvise_ioctl(struct drm_device *dev, void *data, 3035 struct drm_file *file_priv) 3036 { 3037 struct drm_i915_gem_madvise *args = data; 3038 struct drm_i915_gem_object *obj; 3039 int ret; 3040 3041 switch (args->madv) { 3042 case I915_MADV_DONTNEED: 3043 case I915_MADV_WILLNEED: 3044 break; 3045 default: 3046 return -EINVAL; 3047 } 3048 3049 ret = i915_mutex_lock_interruptible(dev); 3050 if (ret) 3051 return ret; 3052 3053 obj = to_intel_bo(drm_gem_object_lookup(dev, file_priv, args->handle)); 3054 if (&obj->base == NULL) { 3055 ret = -ENOENT; 3056 goto unlock; 3057 } 3058 3059 if (obj->pin_count) { 3060 ret = -EINVAL; 3061 goto out; 3062 } 3063 3064 if (obj->madv != __I915_MADV_PURGED) 3065 obj->madv = args->madv; 3066 3067 /* if the object is no longer attached, discard its backing storage */ 3068 if (i915_gem_object_is_purgeable(obj) && obj->pages == NULL) 3069 i915_gem_object_truncate(obj); 3070 3071 args->retained = obj->madv != __I915_MADV_PURGED; 3072 3073 out: 3074 drm_gem_object_unreference(&obj->base); 3075 unlock: 3076 DRM_UNLOCK(dev); 3077 return ret; 3078 } 3079 3080 void i915_gem_object_init(struct drm_i915_gem_object *obj, 3081 const struct drm_i915_gem_object_ops *ops) 3082 { 3083 INIT_LIST_HEAD(&obj->mm_list); 3084 INIT_LIST_HEAD(&obj->gtt_list); 3085 INIT_LIST_HEAD(&obj->ring_list); 3086 INIT_LIST_HEAD(&obj->exec_list); 3087 3088 obj->ops = ops; 3089 3090 obj->fence_reg = I915_FENCE_REG_NONE; 3091 obj->madv = I915_MADV_WILLNEED; 3092 /* Avoid an unnecessary call to unbind on the first bind. */ 3093 obj->map_and_fenceable = true; 3094 3095 i915_gem_info_add_obj(obj->base.dev->dev_private, obj->base.size); 3096 } 3097 3098 static const struct drm_i915_gem_object_ops i915_gem_object_ops = { 3099 .get_pages = i915_gem_object_get_pages_gtt, 3100 .put_pages = i915_gem_object_put_pages_gtt, 3101 }; 3102 3103 struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev, 3104 size_t size) 3105 { 3106 struct drm_i915_gem_object *obj; 3107 #if 0 3108 struct address_space *mapping; 3109 u32 mask; 3110 #endif 3111 3112 obj = kmalloc(sizeof(*obj), M_DRM, M_WAITOK | M_ZERO); 3113 if (obj == NULL) 3114 return NULL; 3115 3116 if (drm_gem_object_init(dev, &obj->base, size) != 0) { 3117 kfree(obj, M_DRM); 3118 return NULL; 3119 } 3120 3121 #if 0 3122 mask = GFP_HIGHUSER | __GFP_RECLAIMABLE; 3123 if (IS_CRESTLINE(dev) || IS_BROADWATER(dev)) { 3124 /* 965gm cannot relocate objects above 4GiB. */ 3125 mask &= ~__GFP_HIGHMEM; 3126 mask |= __GFP_DMA32; 3127 } 3128 3129 mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping; 3130 mapping_set_gfp_mask(mapping, mask); 3131 #endif 3132 3133 i915_gem_object_init(obj, &i915_gem_object_ops); 3134 3135 obj->base.write_domain = I915_GEM_DOMAIN_CPU; 3136 obj->base.read_domains = I915_GEM_DOMAIN_CPU; 3137 3138 if (HAS_LLC(dev)) { 3139 /* On some devices, we can have the GPU use the LLC (the CPU 3140 * cache) for about a 10% performance improvement 3141 * compared to uncached. Graphics requests other than 3142 * display scanout are coherent with the CPU in 3143 * accessing this cache. This means in this mode we 3144 * don't need to clflush on the CPU side, and on the 3145 * GPU side we only need to flush internal caches to 3146 * get data visible to the CPU. 3147 * 3148 * However, we maintain the display planes as UC, and so 3149 * need to rebind when first used as such. 3150 */ 3151 obj->cache_level = I915_CACHE_LLC; 3152 } else 3153 obj->cache_level = I915_CACHE_NONE; 3154 3155 return obj; 3156 } 3157 3158 int i915_gem_init_object(struct drm_gem_object *obj) 3159 { 3160 BUG(); 3161 3162 return 0; 3163 } 3164 3165 void i915_gem_free_object(struct drm_gem_object *gem_obj) 3166 { 3167 struct drm_i915_gem_object *obj = to_intel_bo(gem_obj); 3168 struct drm_device *dev = obj->base.dev; 3169 drm_i915_private_t *dev_priv = dev->dev_private; 3170 3171 if (obj->phys_obj) 3172 i915_gem_detach_phys_object(dev, obj); 3173 3174 obj->pin_count = 0; 3175 if (WARN_ON(i915_gem_object_unbind(obj) == -ERESTARTSYS)) { 3176 bool was_interruptible; 3177 3178 was_interruptible = dev_priv->mm.interruptible; 3179 dev_priv->mm.interruptible = false; 3180 3181 WARN_ON(i915_gem_object_unbind(obj)); 3182 3183 dev_priv->mm.interruptible = was_interruptible; 3184 } 3185 3186 drm_gem_free_mmap_offset(&obj->base); 3187 3188 drm_gem_object_release(&obj->base); 3189 i915_gem_info_remove_obj(dev_priv, obj->base.size); 3190 3191 drm_free(obj->bit_17, M_DRM); 3192 drm_free(obj, M_DRM); 3193 } 3194 3195 int 3196 i915_gem_idle(struct drm_device *dev) 3197 { 3198 drm_i915_private_t *dev_priv = dev->dev_private; 3199 int ret; 3200 3201 DRM_LOCK(dev); 3202 3203 if (dev_priv->mm.suspended) { 3204 DRM_UNLOCK(dev); 3205 return 0; 3206 } 3207 3208 ret = i915_gpu_idle(dev); 3209 if (ret) { 3210 DRM_UNLOCK(dev); 3211 return ret; 3212 } 3213 i915_gem_retire_requests(dev); 3214 3215 /* Under UMS, be paranoid and evict. */ 3216 if (!drm_core_check_feature(dev, DRIVER_MODESET)) 3217 i915_gem_evict_everything(dev); 3218 3219 i915_gem_reset_fences(dev); 3220 3221 /* Hack! Don't let anybody do execbuf while we don't control the chip. 3222 * We need to replace this with a semaphore, or something. 3223 * And not confound mm.suspended! 3224 */ 3225 dev_priv->mm.suspended = 1; 3226 del_timer_sync(&dev_priv->hangcheck_timer); 3227 3228 i915_kernel_lost_context(dev); 3229 i915_gem_cleanup_ringbuffer(dev); 3230 3231 DRM_UNLOCK(dev); 3232 3233 /* Cancel the retire work handler, which should be idle now. */ 3234 cancel_delayed_work_sync(&dev_priv->mm.retire_work); 3235 3236 return 0; 3237 } 3238 3239 void i915_gem_l3_remap(struct drm_device *dev) 3240 { 3241 drm_i915_private_t *dev_priv = dev->dev_private; 3242 u32 misccpctl; 3243 int i; 3244 3245 if (!HAS_L3_GPU_CACHE(dev)) 3246 return; 3247 3248 if (!dev_priv->l3_parity.remap_info) 3249 return; 3250 3251 misccpctl = I915_READ(GEN7_MISCCPCTL); 3252 I915_WRITE(GEN7_MISCCPCTL, misccpctl & ~GEN7_DOP_CLOCK_GATE_ENABLE); 3253 POSTING_READ(GEN7_MISCCPCTL); 3254 3255 for (i = 0; i < GEN7_L3LOG_SIZE; i += 4) { 3256 u32 remap = I915_READ(GEN7_L3LOG_BASE + i); 3257 if (remap && remap != dev_priv->l3_parity.remap_info[i/4]) 3258 DRM_DEBUG("0x%x was already programmed to %x\n", 3259 GEN7_L3LOG_BASE + i, remap); 3260 if (remap && !dev_priv->l3_parity.remap_info[i/4]) 3261 DRM_DEBUG_DRIVER("Clearing remapped register\n"); 3262 I915_WRITE(GEN7_L3LOG_BASE + i, dev_priv->l3_parity.remap_info[i/4]); 3263 } 3264 3265 /* Make sure all the writes land before disabling dop clock gating */ 3266 POSTING_READ(GEN7_L3LOG_BASE); 3267 3268 I915_WRITE(GEN7_MISCCPCTL, misccpctl); 3269 } 3270 3271 void i915_gem_init_swizzling(struct drm_device *dev) 3272 { 3273 drm_i915_private_t *dev_priv = dev->dev_private; 3274 3275 if (INTEL_INFO(dev)->gen < 5 || 3276 dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE) 3277 return; 3278 3279 I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) | 3280 DISP_TILE_SURFACE_SWIZZLING); 3281 3282 if (IS_GEN5(dev)) 3283 return; 3284 3285 I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_SWZCTL); 3286 if (IS_GEN6(dev)) 3287 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_SNB)); 3288 else 3289 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_IVB)); 3290 } 3291 3292 static bool 3293 intel_enable_blt(struct drm_device *dev) 3294 { 3295 int revision; 3296 3297 if (!HAS_BLT(dev)) 3298 return false; 3299 3300 /* The blitter was dysfunctional on early prototypes */ 3301 revision = pci_read_config(dev->dev, PCIR_REVID, 1); 3302 if (IS_GEN6(dev) && revision < 8) { 3303 DRM_INFO("BLT not supported on this pre-production hardware;" 3304 " graphics performance will be degraded.\n"); 3305 return false; 3306 } 3307 3308 return true; 3309 } 3310 3311 int 3312 i915_gem_init_hw(struct drm_device *dev) 3313 { 3314 drm_i915_private_t *dev_priv = dev->dev_private; 3315 int ret; 3316 3317 if (IS_HASWELL(dev) && (I915_READ(0x120010) == 1)) 3318 I915_WRITE(0x9008, I915_READ(0x9008) | 0xf0000); 3319 3320 i915_gem_l3_remap(dev); 3321 3322 i915_gem_init_swizzling(dev); 3323 3324 ret = intel_init_render_ring_buffer(dev); 3325 if (ret) 3326 return ret; 3327 3328 if (HAS_BSD(dev)) { 3329 ret = intel_init_bsd_ring_buffer(dev); 3330 if (ret) 3331 goto cleanup_render_ring; 3332 } 3333 3334 if (intel_enable_blt(dev)) { 3335 ret = intel_init_blt_ring_buffer(dev); 3336 if (ret) 3337 goto cleanup_bsd_ring; 3338 } 3339 3340 dev_priv->next_seqno = 1; 3341 3342 /* 3343 * XXX: There was some w/a described somewhere suggesting loading 3344 * contexts before PPGTT. 3345 */ 3346 i915_gem_context_init(dev); 3347 i915_gem_init_ppgtt(dev); 3348 3349 return 0; 3350 3351 cleanup_bsd_ring: 3352 intel_cleanup_ring_buffer(&dev_priv->ring[VCS]); 3353 cleanup_render_ring: 3354 intel_cleanup_ring_buffer(&dev_priv->ring[RCS]); 3355 return ret; 3356 } 3357 3358 static bool 3359 intel_enable_ppgtt(struct drm_device *dev) 3360 { 3361 if (i915_enable_ppgtt >= 0) 3362 return i915_enable_ppgtt; 3363 3364 /* Disable ppgtt on SNB if VT-d is on. */ 3365 if (INTEL_INFO(dev)->gen == 6 && intel_iommu_enabled) 3366 return false; 3367 3368 return true; 3369 } 3370 3371 int i915_gem_init(struct drm_device *dev) 3372 { 3373 struct drm_i915_private *dev_priv = dev->dev_private; 3374 unsigned long gtt_size, mappable_size; 3375 int ret; 3376 3377 gtt_size = dev_priv->mm.gtt->gtt_total_entries << PAGE_SHIFT; 3378 mappable_size = dev_priv->mm.gtt->gtt_mappable_entries << PAGE_SHIFT; 3379 3380 DRM_LOCK(dev); 3381 if (intel_enable_ppgtt(dev) && HAS_ALIASING_PPGTT(dev)) { 3382 /* PPGTT pdes are stolen from global gtt ptes, so shrink the 3383 * aperture accordingly when using aliasing ppgtt. */ 3384 gtt_size -= I915_PPGTT_PD_ENTRIES*PAGE_SIZE; 3385 3386 i915_gem_init_global_gtt(dev, 0, mappable_size, gtt_size); 3387 3388 ret = i915_gem_init_aliasing_ppgtt(dev); 3389 if (ret) { 3390 DRM_UNLOCK(dev); 3391 return ret; 3392 } 3393 } else { 3394 /* Let GEM Manage all of the aperture. 3395 * 3396 * However, leave one page at the end still bound to the scratch 3397 * page. There are a number of places where the hardware 3398 * apparently prefetches past the end of the object, and we've 3399 * seen multiple hangs with the GPU head pointer stuck in a 3400 * batchbuffer bound at the last page of the aperture. One page 3401 * should be enough to keep any prefetching inside of the 3402 * aperture. 3403 */ 3404 i915_gem_init_global_gtt(dev, 0, mappable_size, 3405 gtt_size); 3406 } 3407 3408 ret = i915_gem_init_hw(dev); 3409 DRM_UNLOCK(dev); 3410 if (ret) { 3411 i915_gem_cleanup_aliasing_ppgtt(dev); 3412 return ret; 3413 } 3414 3415 /* Allow hardware batchbuffers unless told otherwise, but not for KMS. */ 3416 if (!drm_core_check_feature(dev, DRIVER_MODESET)) 3417 dev_priv->dri1.allow_batchbuffer = 1; 3418 return 0; 3419 } 3420 3421 void 3422 i915_gem_cleanup_ringbuffer(struct drm_device *dev) 3423 { 3424 drm_i915_private_t *dev_priv = dev->dev_private; 3425 struct intel_ring_buffer *ring; 3426 int i; 3427 3428 for_each_ring(ring, dev_priv, i) 3429 intel_cleanup_ring_buffer(ring); 3430 } 3431 3432 int 3433 i915_gem_entervt_ioctl(struct drm_device *dev, void *data, 3434 struct drm_file *file_priv) 3435 { 3436 drm_i915_private_t *dev_priv = dev->dev_private; 3437 int ret; 3438 3439 if (drm_core_check_feature(dev, DRIVER_MODESET)) 3440 return 0; 3441 3442 if (atomic_read(&dev_priv->mm.wedged)) { 3443 DRM_ERROR("Reenabling wedged hardware, good luck\n"); 3444 atomic_set(&dev_priv->mm.wedged, 0); 3445 } 3446 3447 DRM_LOCK(dev); 3448 dev_priv->mm.suspended = 0; 3449 3450 ret = i915_gem_init_hw(dev); 3451 if (ret != 0) { 3452 DRM_UNLOCK(dev); 3453 return ret; 3454 } 3455 3456 KASSERT(list_empty(&dev_priv->mm.active_list), ("active list")); 3457 DRM_UNLOCK(dev); 3458 3459 ret = drm_irq_install(dev); 3460 if (ret) 3461 goto cleanup_ringbuffer; 3462 3463 return 0; 3464 3465 cleanup_ringbuffer: 3466 DRM_LOCK(dev); 3467 i915_gem_cleanup_ringbuffer(dev); 3468 dev_priv->mm.suspended = 1; 3469 DRM_UNLOCK(dev); 3470 3471 return ret; 3472 } 3473 3474 int 3475 i915_gem_leavevt_ioctl(struct drm_device *dev, void *data, 3476 struct drm_file *file_priv) 3477 { 3478 if (drm_core_check_feature(dev, DRIVER_MODESET)) 3479 return 0; 3480 3481 drm_irq_uninstall(dev); 3482 return i915_gem_idle(dev); 3483 } 3484 3485 void 3486 i915_gem_lastclose(struct drm_device *dev) 3487 { 3488 int ret; 3489 3490 if (drm_core_check_feature(dev, DRIVER_MODESET)) 3491 return; 3492 3493 ret = i915_gem_idle(dev); 3494 if (ret) 3495 DRM_ERROR("failed to idle hardware: %d\n", ret); 3496 } 3497 3498 static void 3499 init_ring_lists(struct intel_ring_buffer *ring) 3500 { 3501 INIT_LIST_HEAD(&ring->active_list); 3502 INIT_LIST_HEAD(&ring->request_list); 3503 } 3504 3505 void 3506 i915_gem_load(struct drm_device *dev) 3507 { 3508 int i; 3509 drm_i915_private_t *dev_priv = dev->dev_private; 3510 3511 INIT_LIST_HEAD(&dev_priv->mm.active_list); 3512 INIT_LIST_HEAD(&dev_priv->mm.inactive_list); 3513 INIT_LIST_HEAD(&dev_priv->mm.unbound_list); 3514 INIT_LIST_HEAD(&dev_priv->mm.bound_list); 3515 INIT_LIST_HEAD(&dev_priv->mm.fence_list); 3516 for (i = 0; i < I915_NUM_RINGS; i++) 3517 init_ring_lists(&dev_priv->ring[i]); 3518 for (i = 0; i < I915_MAX_NUM_FENCES; i++) 3519 INIT_LIST_HEAD(&dev_priv->fence_regs[i].lru_list); 3520 INIT_DELAYED_WORK(&dev_priv->mm.retire_work, 3521 i915_gem_retire_work_handler); 3522 init_completion(&dev_priv->error_completion); 3523 3524 /* On GEN3 we really need to make sure the ARB C3 LP bit is set */ 3525 if (IS_GEN3(dev)) { 3526 I915_WRITE(MI_ARB_STATE, 3527 _MASKED_BIT_ENABLE(MI_ARB_C3_LP_WRITE_ENABLE)); 3528 } 3529 3530 dev_priv->relative_constants_mode = I915_EXEC_CONSTANTS_REL_GENERAL; 3531 3532 /* Old X drivers will take 0-2 for front, back, depth buffers */ 3533 if (!drm_core_check_feature(dev, DRIVER_MODESET)) 3534 dev_priv->fence_reg_start = 3; 3535 3536 if (INTEL_INFO(dev)->gen >= 4 || IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev)) 3537 dev_priv->num_fence_regs = 16; 3538 else 3539 dev_priv->num_fence_regs = 8; 3540 3541 /* Initialize fence registers to zero */ 3542 i915_gem_reset_fences(dev); 3543 3544 i915_gem_detect_bit_6_swizzle(dev); 3545 init_waitqueue_head(&dev_priv->pending_flip_queue); 3546 3547 dev_priv->mm.interruptible = true; 3548 3549 #if 0 3550 dev_priv->mm.inactive_shrinker.shrink = i915_gem_inactive_shrink; 3551 dev_priv->mm.inactive_shrinker.seeks = DEFAULT_SEEKS; 3552 register_shrinker(&dev_priv->mm.inactive_shrinker); 3553 #else 3554 dev_priv->mm.i915_lowmem = EVENTHANDLER_REGISTER(vm_lowmem, 3555 i915_gem_lowmem, dev, EVENTHANDLER_PRI_ANY); 3556 #endif 3557 } 3558 3559 /* 3560 * Create a physically contiguous memory object for this object 3561 * e.g. for cursor + overlay regs 3562 */ 3563 static int i915_gem_init_phys_object(struct drm_device *dev, 3564 int id, int size, int align) 3565 { 3566 drm_i915_private_t *dev_priv = dev->dev_private; 3567 struct drm_i915_gem_phys_object *phys_obj; 3568 int ret; 3569 3570 if (dev_priv->mm.phys_objs[id - 1] || !size) 3571 return 0; 3572 3573 phys_obj = kmalloc(sizeof(struct drm_i915_gem_phys_object), M_DRM, 3574 M_WAITOK | M_ZERO); 3575 if (!phys_obj) 3576 return -ENOMEM; 3577 3578 phys_obj->id = id; 3579 3580 phys_obj->handle = drm_pci_alloc(dev, size, align); 3581 if (!phys_obj->handle) { 3582 ret = -ENOMEM; 3583 goto kfree_obj; 3584 } 3585 pmap_change_attr((vm_offset_t)phys_obj->handle->vaddr, 3586 size / PAGE_SIZE, PAT_WRITE_COMBINING); 3587 3588 dev_priv->mm.phys_objs[id - 1] = phys_obj; 3589 3590 return 0; 3591 3592 kfree_obj: 3593 drm_free(phys_obj, M_DRM); 3594 return ret; 3595 } 3596 3597 static void i915_gem_free_phys_object(struct drm_device *dev, int id) 3598 { 3599 drm_i915_private_t *dev_priv = dev->dev_private; 3600 struct drm_i915_gem_phys_object *phys_obj; 3601 3602 if (!dev_priv->mm.phys_objs[id - 1]) 3603 return; 3604 3605 phys_obj = dev_priv->mm.phys_objs[id - 1]; 3606 if (phys_obj->cur_obj) { 3607 i915_gem_detach_phys_object(dev, phys_obj->cur_obj); 3608 } 3609 3610 drm_pci_free(dev, phys_obj->handle); 3611 drm_free(phys_obj, M_DRM); 3612 dev_priv->mm.phys_objs[id - 1] = NULL; 3613 } 3614 3615 void i915_gem_free_all_phys_object(struct drm_device *dev) 3616 { 3617 int i; 3618 3619 for (i = I915_GEM_PHYS_CURSOR_0; i <= I915_MAX_PHYS_OBJECT; i++) 3620 i915_gem_free_phys_object(dev, i); 3621 } 3622 3623 void i915_gem_detach_phys_object(struct drm_device *dev, 3624 struct drm_i915_gem_object *obj) 3625 { 3626 struct vm_object *mapping = obj->base.vm_obj; 3627 char *vaddr; 3628 int i; 3629 int page_count; 3630 3631 if (!obj->phys_obj) 3632 return; 3633 vaddr = obj->phys_obj->handle->vaddr; 3634 3635 page_count = obj->base.size / PAGE_SIZE; 3636 VM_OBJECT_LOCK(obj->base.vm_obj); 3637 for (i = 0; i < page_count; i++) { 3638 struct vm_page *page = shmem_read_mapping_page(mapping, i); 3639 if (!IS_ERR(page)) { 3640 VM_OBJECT_UNLOCK(obj->base.vm_obj); 3641 char *dst = kmap_atomic(page); 3642 memcpy(dst, vaddr + i*PAGE_SIZE, PAGE_SIZE); 3643 kunmap_atomic(dst); 3644 3645 drm_clflush_pages(&page, 1); 3646 3647 #if 0 3648 set_page_dirty(page); 3649 mark_page_accessed(page); 3650 page_cache_release(page); 3651 #endif 3652 VM_OBJECT_LOCK(obj->base.vm_obj); 3653 vm_page_reference(page); 3654 vm_page_dirty(page); 3655 vm_page_busy_wait(page, FALSE, "i915gem"); 3656 vm_page_unwire(page, 0); 3657 vm_page_wakeup(page); 3658 } 3659 } 3660 VM_OBJECT_UNLOCK(obj->base.vm_obj); 3661 intel_gtt_chipset_flush(); 3662 3663 obj->phys_obj->cur_obj = NULL; 3664 obj->phys_obj = NULL; 3665 } 3666 3667 int 3668 i915_gem_attach_phys_object(struct drm_device *dev, 3669 struct drm_i915_gem_object *obj, 3670 int id, 3671 int align) 3672 { 3673 struct vm_object *mapping = obj->base.vm_obj; 3674 drm_i915_private_t *dev_priv = dev->dev_private; 3675 int ret = 0; 3676 int page_count; 3677 int i; 3678 3679 if (id > I915_MAX_PHYS_OBJECT) 3680 return -EINVAL; 3681 3682 if (obj->phys_obj) { 3683 if (obj->phys_obj->id == id) 3684 return 0; 3685 i915_gem_detach_phys_object(dev, obj); 3686 } 3687 3688 /* create a new object */ 3689 if (!dev_priv->mm.phys_objs[id - 1]) { 3690 ret = i915_gem_init_phys_object(dev, id, 3691 obj->base.size, align); 3692 if (ret) { 3693 DRM_ERROR("failed to init phys object %d size: %zu\n", 3694 id, obj->base.size); 3695 return ret; 3696 } 3697 } 3698 3699 /* bind to the object */ 3700 obj->phys_obj = dev_priv->mm.phys_objs[id - 1]; 3701 obj->phys_obj->cur_obj = obj; 3702 3703 page_count = obj->base.size / PAGE_SIZE; 3704 3705 VM_OBJECT_LOCK(obj->base.vm_obj); 3706 for (i = 0; i < page_count; i++) { 3707 struct vm_page *page; 3708 char *dst, *src; 3709 3710 page = shmem_read_mapping_page(mapping, i); 3711 VM_OBJECT_UNLOCK(obj->base.vm_obj); 3712 if (IS_ERR(page)) 3713 return PTR_ERR(page); 3714 3715 src = kmap_atomic(page); 3716 dst = (char*)obj->phys_obj->handle->vaddr + (i * PAGE_SIZE); 3717 memcpy(dst, src, PAGE_SIZE); 3718 kunmap_atomic(src); 3719 3720 #if 0 3721 mark_page_accessed(page); 3722 page_cache_release(page); 3723 #endif 3724 VM_OBJECT_LOCK(obj->base.vm_obj); 3725 vm_page_reference(page); 3726 vm_page_busy_wait(page, FALSE, "i915gem"); 3727 vm_page_unwire(page, 0); 3728 vm_page_wakeup(page); 3729 } 3730 VM_OBJECT_UNLOCK(obj->base.vm_obj); 3731 3732 return 0; 3733 } 3734 3735 static int 3736 i915_gem_phys_pwrite(struct drm_device *dev, 3737 struct drm_i915_gem_object *obj, 3738 struct drm_i915_gem_pwrite *args, 3739 struct drm_file *file_priv) 3740 { 3741 void *vaddr = (char *)obj->phys_obj->handle->vaddr + args->offset; 3742 char __user *user_data = (char __user *) (uintptr_t) args->data_ptr; 3743 3744 if (copyin_nofault(user_data, vaddr, args->size) != 0) { 3745 unsigned long unwritten; 3746 3747 /* The physical object once assigned is fixed for the lifetime 3748 * of the obj, so we can safely drop the lock and continue 3749 * to access vaddr. 3750 */ 3751 DRM_UNLOCK(dev); 3752 unwritten = copy_from_user(vaddr, user_data, args->size); 3753 DRM_LOCK(dev); 3754 if (unwritten) 3755 return -EFAULT; 3756 } 3757 3758 i915_gem_chipset_flush(dev); 3759 return 0; 3760 } 3761 3762 void i915_gem_release(struct drm_device *dev, struct drm_file *file) 3763 { 3764 struct drm_i915_file_private *file_priv = file->driver_priv; 3765 3766 /* Clean up our request list when the client is going away, so that 3767 * later retire_requests won't dereference our soon-to-be-gone 3768 * file_priv. 3769 */ 3770 spin_lock(&file_priv->mm.lock); 3771 while (!list_empty(&file_priv->mm.request_list)) { 3772 struct drm_i915_gem_request *request; 3773 3774 request = list_first_entry(&file_priv->mm.request_list, 3775 struct drm_i915_gem_request, 3776 client_list); 3777 list_del(&request->client_list); 3778 request->file_priv = NULL; 3779 } 3780 spin_unlock(&file_priv->mm.lock); 3781 } 3782 3783 static int 3784 i915_gem_pager_ctor(void *handle, vm_ooffset_t size, vm_prot_t prot, 3785 vm_ooffset_t foff, struct ucred *cred, u_short *color) 3786 { 3787 3788 *color = 0; /* XXXKIB */ 3789 return (0); 3790 } 3791 3792 int i915_intr_pf; 3793 3794 static int 3795 i915_gem_pager_fault(vm_object_t vm_obj, vm_ooffset_t offset, int prot, 3796 vm_page_t *mres) 3797 { 3798 struct drm_gem_object *gem_obj; 3799 struct drm_i915_gem_object *obj; 3800 struct drm_device *dev; 3801 drm_i915_private_t *dev_priv; 3802 vm_page_t m, oldm; 3803 int cause, ret; 3804 bool write; 3805 3806 gem_obj = vm_obj->handle; 3807 obj = to_intel_bo(gem_obj); 3808 dev = obj->base.dev; 3809 dev_priv = dev->dev_private; 3810 #if 0 3811 write = (prot & VM_PROT_WRITE) != 0; 3812 #else 3813 write = true; 3814 #endif 3815 vm_object_pip_add(vm_obj, 1); 3816 3817 /* 3818 * Remove the placeholder page inserted by vm_fault() from the 3819 * object before dropping the object lock. If 3820 * i915_gem_release_mmap() is active in parallel on this gem 3821 * object, then it owns the drm device sx and might find the 3822 * placeholder already. Then, since the page is busy, 3823 * i915_gem_release_mmap() sleeps waiting for the busy state 3824 * of the page cleared. We will be not able to acquire drm 3825 * device lock until i915_gem_release_mmap() is able to make a 3826 * progress. 3827 */ 3828 if (*mres != NULL) { 3829 oldm = *mres; 3830 vm_page_remove(oldm); 3831 *mres = NULL; 3832 } else 3833 oldm = NULL; 3834 retry: 3835 VM_OBJECT_UNLOCK(vm_obj); 3836 unlocked_vmobj: 3837 cause = ret = 0; 3838 m = NULL; 3839 3840 if (i915_intr_pf) { 3841 ret = i915_mutex_lock_interruptible(dev); 3842 if (ret != 0) { 3843 cause = 10; 3844 goto out; 3845 } 3846 } else 3847 DRM_LOCK(dev); 3848 3849 /* 3850 * Since the object lock was dropped, other thread might have 3851 * faulted on the same GTT address and instantiated the 3852 * mapping for the page. Recheck. 3853 */ 3854 VM_OBJECT_LOCK(vm_obj); 3855 m = vm_page_lookup(vm_obj, OFF_TO_IDX(offset)); 3856 if (m != NULL) { 3857 if ((m->flags & PG_BUSY) != 0) { 3858 DRM_UNLOCK(dev); 3859 #if 0 /* XXX */ 3860 vm_page_sleep(m, "915pee"); 3861 #endif 3862 goto retry; 3863 } 3864 goto have_page; 3865 } else 3866 VM_OBJECT_UNLOCK(vm_obj); 3867 3868 /* Now bind it into the GTT if needed */ 3869 if (!obj->map_and_fenceable) { 3870 ret = i915_gem_object_unbind(obj); 3871 if (ret != 0) { 3872 cause = 20; 3873 goto unlock; 3874 } 3875 } 3876 if (!obj->gtt_space) { 3877 ret = i915_gem_object_bind_to_gtt(obj, 0, true, false); 3878 if (ret != 0) { 3879 cause = 30; 3880 goto unlock; 3881 } 3882 3883 ret = i915_gem_object_set_to_gtt_domain(obj, write); 3884 if (ret != 0) { 3885 cause = 40; 3886 goto unlock; 3887 } 3888 } 3889 3890 if (obj->tiling_mode == I915_TILING_NONE) 3891 ret = i915_gem_object_put_fence(obj); 3892 else 3893 ret = i915_gem_object_get_fence(obj); 3894 if (ret != 0) { 3895 cause = 50; 3896 goto unlock; 3897 } 3898 3899 if (i915_gem_object_is_inactive(obj)) 3900 list_move_tail(&obj->mm_list, &dev_priv->mm.inactive_list); 3901 3902 obj->fault_mappable = true; 3903 VM_OBJECT_LOCK(vm_obj); 3904 m = vm_phys_fictitious_to_vm_page(dev->agp->base + obj->gtt_offset + 3905 offset); 3906 if (m == NULL) { 3907 cause = 60; 3908 ret = -EFAULT; 3909 goto unlock; 3910 } 3911 KASSERT((m->flags & PG_FICTITIOUS) != 0, 3912 ("not fictitious %p", m)); 3913 KASSERT(m->wire_count == 1, ("wire_count not 1 %p", m)); 3914 3915 if ((m->flags & PG_BUSY) != 0) { 3916 DRM_UNLOCK(dev); 3917 #if 0 /* XXX */ 3918 vm_page_sleep(m, "915pbs"); 3919 #endif 3920 goto retry; 3921 } 3922 m->valid = VM_PAGE_BITS_ALL; 3923 vm_page_insert(m, vm_obj, OFF_TO_IDX(offset)); 3924 have_page: 3925 *mres = m; 3926 vm_page_busy_try(m, false); 3927 3928 DRM_UNLOCK(dev); 3929 if (oldm != NULL) { 3930 vm_page_free(oldm); 3931 } 3932 vm_object_pip_wakeup(vm_obj); 3933 return (VM_PAGER_OK); 3934 3935 unlock: 3936 DRM_UNLOCK(dev); 3937 out: 3938 KASSERT(ret != 0, ("i915_gem_pager_fault: wrong return")); 3939 if (ret == -EAGAIN || ret == -EIO || ret == -EINTR) { 3940 goto unlocked_vmobj; 3941 } 3942 VM_OBJECT_LOCK(vm_obj); 3943 vm_object_pip_wakeup(vm_obj); 3944 return (VM_PAGER_ERROR); 3945 } 3946 3947 static void 3948 i915_gem_pager_dtor(void *handle) 3949 { 3950 struct drm_gem_object *obj; 3951 struct drm_device *dev; 3952 3953 obj = handle; 3954 dev = obj->dev; 3955 3956 DRM_LOCK(dev); 3957 drm_gem_free_mmap_offset(obj); 3958 i915_gem_release_mmap(to_intel_bo(obj)); 3959 drm_gem_object_unreference(obj); 3960 DRM_UNLOCK(dev); 3961 } 3962 3963 struct cdev_pager_ops i915_gem_pager_ops = { 3964 .cdev_pg_fault = i915_gem_pager_fault, 3965 .cdev_pg_ctor = i915_gem_pager_ctor, 3966 .cdev_pg_dtor = i915_gem_pager_dtor 3967 }; 3968 3969 #define GEM_PARANOID_CHECK_GTT 0 3970 #if GEM_PARANOID_CHECK_GTT 3971 static void 3972 i915_gem_assert_pages_not_mapped(struct drm_device *dev, vm_page_t *ma, 3973 int page_count) 3974 { 3975 struct drm_i915_private *dev_priv; 3976 vm_paddr_t pa; 3977 unsigned long start, end; 3978 u_int i; 3979 int j; 3980 3981 dev_priv = dev->dev_private; 3982 start = OFF_TO_IDX(dev_priv->mm.gtt_start); 3983 end = OFF_TO_IDX(dev_priv->mm.gtt_end); 3984 for (i = start; i < end; i++) { 3985 pa = intel_gtt_read_pte_paddr(i); 3986 for (j = 0; j < page_count; j++) { 3987 if (pa == VM_PAGE_TO_PHYS(ma[j])) { 3988 panic("Page %p in GTT pte index %d pte %x", 3989 ma[i], i, intel_gtt_read_pte(i)); 3990 } 3991 } 3992 } 3993 } 3994 #endif 3995 3996 static int 3997 i915_gpu_is_active(struct drm_device *dev) 3998 { 3999 drm_i915_private_t *dev_priv = dev->dev_private; 4000 4001 return !list_empty(&dev_priv->mm.active_list); 4002 } 4003 4004 static void 4005 i915_gem_lowmem(void *arg) 4006 { 4007 struct drm_device *dev; 4008 struct drm_i915_private *dev_priv; 4009 struct drm_i915_gem_object *obj, *next; 4010 int cnt, cnt_fail, cnt_total; 4011 4012 dev = arg; 4013 dev_priv = dev->dev_private; 4014 4015 if (lockmgr(&dev->dev_struct_lock, LK_EXCLUSIVE|LK_NOWAIT)) 4016 return; 4017 4018 rescan: 4019 /* first scan for clean buffers */ 4020 i915_gem_retire_requests(dev); 4021 4022 cnt_total = cnt_fail = cnt = 0; 4023 4024 list_for_each_entry_safe(obj, next, &dev_priv->mm.inactive_list, 4025 mm_list) { 4026 if (i915_gem_object_is_purgeable(obj)) { 4027 if (i915_gem_object_unbind(obj) != 0) 4028 cnt_total++; 4029 } else 4030 cnt_total++; 4031 } 4032 4033 /* second pass, evict/count anything still on the inactive list */ 4034 list_for_each_entry_safe(obj, next, &dev_priv->mm.inactive_list, 4035 mm_list) { 4036 if (i915_gem_object_unbind(obj) == 0) 4037 cnt++; 4038 else 4039 cnt_fail++; 4040 } 4041 4042 if (cnt_fail > cnt_total / 100 && i915_gpu_is_active(dev)) { 4043 /* 4044 * We are desperate for pages, so as a last resort, wait 4045 * for the GPU to finish and discard whatever we can. 4046 * This has a dramatic impact to reduce the number of 4047 * OOM-killer events whilst running the GPU aggressively. 4048 */ 4049 if (i915_gpu_idle(dev) == 0) 4050 goto rescan; 4051 } 4052 DRM_UNLOCK(dev); 4053 } 4054 4055 void 4056 i915_gem_unload(struct drm_device *dev) 4057 { 4058 struct drm_i915_private *dev_priv; 4059 4060 dev_priv = dev->dev_private; 4061 EVENTHANDLER_DEREGISTER(vm_lowmem, dev_priv->mm.i915_lowmem); 4062 } 4063