1 /* 2 * Copyright © 2008-2015 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Eric Anholt <eric@anholt.net> 25 * 26 */ 27 28 #include <drm/drmP.h> 29 #include <drm/drm_vma_manager.h> 30 #include <drm/i915_drm.h> 31 #include "i915_drv.h" 32 #include "i915_gem_clflush.h" 33 #include "i915_vgpu.h" 34 #include "i915_trace.h" 35 #include "intel_drv.h" 36 #include "intel_frontbuffer.h" 37 #include "intel_mocs.h" 38 #include "intel_workarounds.h" 39 #include "i915_gemfs.h" 40 #include <linux/dma-fence-array.h> 41 #include <linux/kthread.h> 42 #include <linux/reservation.h> 43 #include <linux/shmem_fs.h> 44 #include <linux/slab.h> 45 #include <linux/stop_machine.h> 46 #include <linux/swap.h> 47 #include <linux/pci.h> 48 #include <linux/dma-buf.h> 49 50 static void i915_gem_flush_free_objects(struct drm_i915_private *i915); 51 52 static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj) 53 { 54 if (obj->cache_dirty) 55 return false; 56 57 if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE)) 58 return true; 59 60 return obj->pin_global; /* currently in use by HW, keep flushed */ 61 } 62 63 static int 64 insert_mappable_node(struct i915_ggtt *ggtt, 65 struct drm_mm_node *node, u32 size) 66 { 67 memset(node, 0, sizeof(*node)); 68 return drm_mm_insert_node_in_range(&ggtt->vm.mm, node, 69 size, 0, I915_COLOR_UNEVICTABLE, 70 0, ggtt->mappable_end, 71 DRM_MM_INSERT_LOW); 72 } 73 74 static void 75 remove_mappable_node(struct drm_mm_node *node) 76 { 77 drm_mm_remove_node(node); 78 } 79 80 /* some bookkeeping */ 81 static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv, 82 u64 size) 83 { 84 spin_lock(&dev_priv->mm.object_stat_lock); 85 dev_priv->mm.object_count++; 86 dev_priv->mm.object_memory += size; 87 spin_unlock(&dev_priv->mm.object_stat_lock); 88 } 89 90 static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv, 91 u64 size) 92 { 93 spin_lock(&dev_priv->mm.object_stat_lock); 94 dev_priv->mm.object_count--; 95 dev_priv->mm.object_memory -= size; 96 spin_unlock(&dev_priv->mm.object_stat_lock); 97 } 98 99 static int 100 i915_gem_wait_for_error(struct i915_gpu_error *error) 101 { 102 int ret; 103 104 might_sleep(); 105 106 /* 107 * Only wait 10 seconds for the gpu reset to complete to avoid hanging 108 * userspace. If it takes that long something really bad is going on and 109 * we should simply try to bail out and fail as gracefully as possible. 110 */ 111 ret = wait_event_interruptible_timeout(error->reset_queue, 112 !i915_reset_backoff(error), 113 I915_RESET_TIMEOUT); 114 if (ret == 0) { 115 DRM_ERROR("Timed out waiting for the gpu reset to complete\n"); 116 return -EIO; 117 } else if (ret < 0) { 118 return ret; 119 } else { 120 return 0; 121 } 122 } 123 124 int i915_mutex_lock_interruptible(struct drm_device *dev) 125 { 126 struct drm_i915_private *dev_priv = to_i915(dev); 127 int ret; 128 129 ret = i915_gem_wait_for_error(&dev_priv->gpu_error); 130 if (ret) 131 return ret; 132 133 ret = mutex_lock_interruptible(&dev->struct_mutex); 134 if (ret) 135 return ret; 136 137 return 0; 138 } 139 140 static u32 __i915_gem_park(struct drm_i915_private *i915) 141 { 142 GEM_TRACE("\n"); 143 144 lockdep_assert_held(&i915->drm.struct_mutex); 145 GEM_BUG_ON(i915->gt.active_requests); 146 GEM_BUG_ON(!list_empty(&i915->gt.active_rings)); 147 148 if (!i915->gt.awake) 149 return I915_EPOCH_INVALID; 150 151 GEM_BUG_ON(i915->gt.epoch == I915_EPOCH_INVALID); 152 153 /* 154 * Be paranoid and flush a concurrent interrupt to make sure 155 * we don't reactivate any irq tasklets after parking. 156 * 157 * FIXME: Note that even though we have waited for execlists to be idle, 158 * there may still be an in-flight interrupt even though the CSB 159 * is now empty. synchronize_irq() makes sure that a residual interrupt 160 * is completed before we continue, but it doesn't prevent the HW from 161 * raising a spurious interrupt later. To complete the shield we should 162 * coordinate disabling the CS irq with flushing the interrupts. 163 */ 164 synchronize_irq(i915->drm.irq); 165 166 intel_engines_park(i915); 167 i915_timelines_park(i915); 168 169 i915_pmu_gt_parked(i915); 170 i915_vma_parked(i915); 171 172 i915->gt.awake = false; 173 174 if (INTEL_GEN(i915) >= 6) 175 gen6_rps_idle(i915); 176 177 intel_display_power_put(i915, POWER_DOMAIN_GT_IRQ); 178 179 intel_runtime_pm_put(i915); 180 181 return i915->gt.epoch; 182 } 183 184 void i915_gem_park(struct drm_i915_private *i915) 185 { 186 GEM_TRACE("\n"); 187 188 lockdep_assert_held(&i915->drm.struct_mutex); 189 GEM_BUG_ON(i915->gt.active_requests); 190 191 if (!i915->gt.awake) 192 return; 193 194 /* Defer the actual call to __i915_gem_park() to prevent ping-pongs */ 195 mod_delayed_work(i915->wq, &i915->gt.idle_work, msecs_to_jiffies(100)); 196 } 197 198 void i915_gem_unpark(struct drm_i915_private *i915) 199 { 200 GEM_TRACE("\n"); 201 202 lockdep_assert_held(&i915->drm.struct_mutex); 203 GEM_BUG_ON(!i915->gt.active_requests); 204 205 if (i915->gt.awake) 206 return; 207 208 intel_runtime_pm_get_noresume(i915); 209 210 /* 211 * It seems that the DMC likes to transition between the DC states a lot 212 * when there are no connected displays (no active power domains) during 213 * command submission. 214 * 215 * This activity has negative impact on the performance of the chip with 216 * huge latencies observed in the interrupt handler and elsewhere. 217 * 218 * Work around it by grabbing a GT IRQ power domain whilst there is any 219 * GT activity, preventing any DC state transitions. 220 */ 221 intel_display_power_get(i915, POWER_DOMAIN_GT_IRQ); 222 223 i915->gt.awake = true; 224 if (unlikely(++i915->gt.epoch == 0)) /* keep 0 as invalid */ 225 i915->gt.epoch = 1; 226 227 intel_enable_gt_powersave(i915); 228 i915_update_gfx_val(i915); 229 if (INTEL_GEN(i915) >= 6) 230 gen6_rps_busy(i915); 231 i915_pmu_gt_unparked(i915); 232 233 intel_engines_unpark(i915); 234 235 i915_queue_hangcheck(i915); 236 237 queue_delayed_work(i915->wq, 238 &i915->gt.retire_work, 239 round_jiffies_up_relative(HZ)); 240 } 241 242 int 243 i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, 244 struct drm_file *file) 245 { 246 struct drm_i915_private *dev_priv = to_i915(dev); 247 struct i915_ggtt *ggtt = &dev_priv->ggtt; 248 struct drm_i915_gem_get_aperture *args = data; 249 struct i915_vma *vma; 250 u64 pinned; 251 252 pinned = ggtt->vm.reserved; 253 mutex_lock(&dev->struct_mutex); 254 list_for_each_entry(vma, &ggtt->vm.active_list, vm_link) 255 if (i915_vma_is_pinned(vma)) 256 pinned += vma->node.size; 257 list_for_each_entry(vma, &ggtt->vm.inactive_list, vm_link) 258 if (i915_vma_is_pinned(vma)) 259 pinned += vma->node.size; 260 mutex_unlock(&dev->struct_mutex); 261 262 args->aper_size = ggtt->vm.total; 263 args->aper_available_size = args->aper_size - pinned; 264 265 return 0; 266 } 267 268 static int i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj) 269 { 270 #ifdef __linux__ 271 struct address_space *mapping = obj->base.filp->f_mapping; 272 #endif 273 drm_dma_handle_t *phys; 274 struct sg_table *st; 275 struct scatterlist *sg; 276 char *vaddr; 277 int i; 278 int err; 279 280 if (WARN_ON(i915_gem_object_needs_bit17_swizzle(obj))) 281 return -EINVAL; 282 283 /* Always aligning to the object size, allows a single allocation 284 * to handle all possible callers, and given typical object sizes, 285 * the alignment of the buddy allocation will naturally match. 286 */ 287 phys = drm_pci_alloc(obj->base.dev, 288 roundup_pow_of_two(obj->base.size), 289 roundup_pow_of_two(obj->base.size)); 290 if (!phys) 291 return -ENOMEM; 292 293 vaddr = phys->vaddr; 294 for (i = 0; i < obj->base.size / PAGE_SIZE; i++) { 295 struct vm_page *page; 296 char *src; 297 298 #ifdef __linux__ 299 page = shmem_read_mapping_page(mapping, i); 300 if (IS_ERR(page)) { 301 err = PTR_ERR(page); 302 goto err_phys; 303 } 304 #else 305 struct pglist plist; 306 TAILQ_INIT(&plist); 307 if (uvm_objwire(obj->base.uao, i * PAGE_SIZE, (i + 1) * PAGE_SIZE, &plist)) { 308 err = -ENOMEM; 309 goto err_phys; 310 } 311 page = TAILQ_FIRST(&plist); 312 #endif 313 314 src = kmap_atomic(page); 315 memcpy(vaddr, src, PAGE_SIZE); 316 drm_clflush_virt_range(vaddr, PAGE_SIZE); 317 kunmap_atomic(src); 318 319 #ifdef __linux__ 320 put_page(page); 321 #else 322 uvm_objunwire(obj->base.uao, i * PAGE_SIZE, (i + 1) * PAGE_SIZE); 323 #endif 324 vaddr += PAGE_SIZE; 325 } 326 327 i915_gem_chipset_flush(to_i915(obj->base.dev)); 328 329 st = kmalloc(sizeof(*st), GFP_KERNEL); 330 if (!st) { 331 err = -ENOMEM; 332 goto err_phys; 333 } 334 335 if (sg_alloc_table(st, 1, GFP_KERNEL)) { 336 kfree(st); 337 err = -ENOMEM; 338 goto err_phys; 339 } 340 341 sg = st->sgl; 342 sg->offset = 0; 343 sg->length = obj->base.size; 344 345 sg_dma_address(sg) = phys->busaddr; 346 sg_dma_len(sg) = obj->base.size; 347 348 obj->phys_handle = phys; 349 350 __i915_gem_object_set_pages(obj, st, sg->length); 351 352 return 0; 353 354 err_phys: 355 drm_pci_free(obj->base.dev, phys); 356 357 return err; 358 } 359 360 static void __start_cpu_write(struct drm_i915_gem_object *obj) 361 { 362 obj->read_domains = I915_GEM_DOMAIN_CPU; 363 obj->write_domain = I915_GEM_DOMAIN_CPU; 364 if (cpu_write_needs_clflush(obj)) 365 obj->cache_dirty = true; 366 } 367 368 static void 369 __i915_gem_object_release_shmem(struct drm_i915_gem_object *obj, 370 struct sg_table *pages, 371 bool needs_clflush) 372 { 373 GEM_BUG_ON(obj->mm.madv == __I915_MADV_PURGED); 374 375 if (obj->mm.madv == I915_MADV_DONTNEED) 376 obj->mm.dirty = false; 377 378 if (needs_clflush && 379 (obj->read_domains & I915_GEM_DOMAIN_CPU) == 0 && 380 !(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ)) 381 drm_clflush_sg(pages); 382 383 __start_cpu_write(obj); 384 } 385 386 static void 387 i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj, 388 struct sg_table *pages) 389 { 390 __i915_gem_object_release_shmem(obj, pages, false); 391 392 if (obj->mm.dirty) { 393 #ifdef __linux__ 394 struct address_space *mapping = obj->base.filp->f_mapping; 395 #endif 396 char *vaddr = obj->phys_handle->vaddr; 397 int i; 398 399 for (i = 0; i < obj->base.size / PAGE_SIZE; i++) { 400 struct vm_page *page; 401 char *dst; 402 403 #ifdef __linux__ 404 page = shmem_read_mapping_page(mapping, i); 405 if (IS_ERR(page)) 406 continue; 407 #else 408 struct pglist plist; 409 TAILQ_INIT(&plist); 410 if (uvm_objwire(obj->base.uao, i * PAGE_SIZE, (i + 1) * PAGE_SIZE, &plist)) 411 continue; 412 page = TAILQ_FIRST(&plist); 413 #endif 414 415 dst = kmap_atomic(page); 416 drm_clflush_virt_range(vaddr, PAGE_SIZE); 417 memcpy(dst, vaddr, PAGE_SIZE); 418 kunmap_atomic(dst); 419 420 set_page_dirty(page); 421 #ifdef __linux__ 422 if (obj->mm.madv == I915_MADV_WILLNEED) 423 mark_page_accessed(page); 424 put_page(page); 425 #else 426 uvm_objunwire(obj->base.uao, i * PAGE_SIZE, (i + 1) * PAGE_SIZE); 427 #endif 428 vaddr += PAGE_SIZE; 429 } 430 obj->mm.dirty = false; 431 } 432 433 sg_free_table(pages); 434 kfree(pages); 435 436 drm_pci_free(obj->base.dev, obj->phys_handle); 437 } 438 439 static void 440 i915_gem_object_release_phys(struct drm_i915_gem_object *obj) 441 { 442 i915_gem_object_unpin_pages(obj); 443 } 444 445 static const struct drm_i915_gem_object_ops i915_gem_phys_ops = { 446 .get_pages = i915_gem_object_get_pages_phys, 447 .put_pages = i915_gem_object_put_pages_phys, 448 .release = i915_gem_object_release_phys, 449 }; 450 451 static const struct drm_i915_gem_object_ops i915_gem_object_ops; 452 453 int i915_gem_object_unbind(struct drm_i915_gem_object *obj) 454 { 455 struct i915_vma *vma; 456 DRM_LIST_HEAD(still_in_list); 457 int ret; 458 459 lockdep_assert_held(&obj->base.dev->struct_mutex); 460 461 /* Closed vma are removed from the obj->vma_list - but they may 462 * still have an active binding on the object. To remove those we 463 * must wait for all rendering to complete to the object (as unbinding 464 * must anyway), and retire the requests. 465 */ 466 ret = i915_gem_object_set_to_cpu_domain(obj, false); 467 if (ret) 468 return ret; 469 470 while ((vma = list_first_entry_or_null(&obj->vma_list, 471 struct i915_vma, 472 obj_link))) { 473 list_move_tail(&vma->obj_link, &still_in_list); 474 ret = i915_vma_unbind(vma); 475 if (ret) 476 break; 477 } 478 list_splice(&still_in_list, &obj->vma_list); 479 480 return ret; 481 } 482 483 static long 484 i915_gem_object_wait_fence(struct dma_fence *fence, 485 unsigned int flags, 486 long timeout, 487 struct intel_rps_client *rps_client) 488 { 489 struct i915_request *rq; 490 491 BUILD_BUG_ON(I915_WAIT_INTERRUPTIBLE != 0x1); 492 493 if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) 494 return timeout; 495 496 if (!dma_fence_is_i915(fence)) 497 return dma_fence_wait_timeout(fence, 498 flags & I915_WAIT_INTERRUPTIBLE, 499 timeout); 500 501 rq = to_request(fence); 502 if (i915_request_completed(rq)) 503 goto out; 504 505 /* 506 * This client is about to stall waiting for the GPU. In many cases 507 * this is undesirable and limits the throughput of the system, as 508 * many clients cannot continue processing user input/output whilst 509 * blocked. RPS autotuning may take tens of milliseconds to respond 510 * to the GPU load and thus incurs additional latency for the client. 511 * We can circumvent that by promoting the GPU frequency to maximum 512 * before we wait. This makes the GPU throttle up much more quickly 513 * (good for benchmarks and user experience, e.g. window animations), 514 * but at a cost of spending more power processing the workload 515 * (bad for battery). Not all clients even want their results 516 * immediately and for them we should just let the GPU select its own 517 * frequency to maximise efficiency. To prevent a single client from 518 * forcing the clocks too high for the whole system, we only allow 519 * each client to waitboost once in a busy period. 520 */ 521 if (rps_client && !i915_request_started(rq)) { 522 if (INTEL_GEN(rq->i915) >= 6) 523 gen6_rps_boost(rq, rps_client); 524 } 525 526 timeout = i915_request_wait(rq, flags, timeout); 527 528 out: 529 if (flags & I915_WAIT_LOCKED && i915_request_completed(rq)) 530 i915_request_retire_upto(rq); 531 532 return timeout; 533 } 534 535 static long 536 i915_gem_object_wait_reservation(struct reservation_object *resv, 537 unsigned int flags, 538 long timeout, 539 struct intel_rps_client *rps_client) 540 { 541 unsigned int seq = __read_seqcount_begin(&resv->seq); 542 struct dma_fence *excl; 543 bool prune_fences = false; 544 545 if (flags & I915_WAIT_ALL) { 546 struct dma_fence **shared; 547 unsigned int count, i; 548 int ret; 549 550 ret = reservation_object_get_fences_rcu(resv, 551 &excl, &count, &shared); 552 if (ret) 553 return ret; 554 555 for (i = 0; i < count; i++) { 556 timeout = i915_gem_object_wait_fence(shared[i], 557 flags, timeout, 558 rps_client); 559 if (timeout < 0) 560 break; 561 562 dma_fence_put(shared[i]); 563 } 564 565 for (; i < count; i++) 566 dma_fence_put(shared[i]); 567 kfree(shared); 568 569 /* 570 * If both shared fences and an exclusive fence exist, 571 * then by construction the shared fences must be later 572 * than the exclusive fence. If we successfully wait for 573 * all the shared fences, we know that the exclusive fence 574 * must all be signaled. If all the shared fences are 575 * signaled, we can prune the array and recover the 576 * floating references on the fences/requests. 577 */ 578 prune_fences = count && timeout >= 0; 579 } else { 580 excl = reservation_object_get_excl_rcu(resv); 581 } 582 583 if (excl && timeout >= 0) 584 timeout = i915_gem_object_wait_fence(excl, flags, timeout, 585 rps_client); 586 587 dma_fence_put(excl); 588 589 /* 590 * Opportunistically prune the fences iff we know they have *all* been 591 * signaled and that the reservation object has not been changed (i.e. 592 * no new fences have been added). 593 */ 594 if (prune_fences && !__read_seqcount_retry(&resv->seq, seq)) { 595 if (reservation_object_trylock(resv)) { 596 if (!__read_seqcount_retry(&resv->seq, seq)) 597 reservation_object_add_excl_fence(resv, NULL); 598 reservation_object_unlock(resv); 599 } 600 } 601 602 return timeout; 603 } 604 605 static void __fence_set_priority(struct dma_fence *fence, 606 const struct i915_sched_attr *attr) 607 { 608 struct i915_request *rq; 609 struct intel_engine_cs *engine; 610 611 if (dma_fence_is_signaled(fence) || !dma_fence_is_i915(fence)) 612 return; 613 614 rq = to_request(fence); 615 engine = rq->engine; 616 617 local_bh_disable(); 618 rcu_read_lock(); /* RCU serialisation for set-wedged protection */ 619 if (engine->schedule) 620 engine->schedule(rq, attr); 621 rcu_read_unlock(); 622 local_bh_enable(); /* kick the tasklets if queues were reprioritised */ 623 } 624 625 static void fence_set_priority(struct dma_fence *fence, 626 const struct i915_sched_attr *attr) 627 { 628 /* Recurse once into a fence-array */ 629 if (dma_fence_is_array(fence)) { 630 struct dma_fence_array *array = to_dma_fence_array(fence); 631 int i; 632 633 for (i = 0; i < array->num_fences; i++) 634 __fence_set_priority(array->fences[i], attr); 635 } else { 636 __fence_set_priority(fence, attr); 637 } 638 } 639 640 int 641 i915_gem_object_wait_priority(struct drm_i915_gem_object *obj, 642 unsigned int flags, 643 const struct i915_sched_attr *attr) 644 { 645 struct dma_fence *excl; 646 647 if (flags & I915_WAIT_ALL) { 648 struct dma_fence **shared; 649 unsigned int count, i; 650 int ret; 651 652 ret = reservation_object_get_fences_rcu(obj->resv, 653 &excl, &count, &shared); 654 if (ret) 655 return ret; 656 657 for (i = 0; i < count; i++) { 658 fence_set_priority(shared[i], attr); 659 dma_fence_put(shared[i]); 660 } 661 662 kfree(shared); 663 } else { 664 excl = reservation_object_get_excl_rcu(obj->resv); 665 } 666 667 if (excl) { 668 fence_set_priority(excl, attr); 669 dma_fence_put(excl); 670 } 671 return 0; 672 } 673 674 /** 675 * Waits for rendering to the object to be completed 676 * @obj: i915 gem object 677 * @flags: how to wait (under a lock, for all rendering or just for writes etc) 678 * @timeout: how long to wait 679 * @rps_client: client (user process) to charge for any waitboosting 680 */ 681 int 682 i915_gem_object_wait(struct drm_i915_gem_object *obj, 683 unsigned int flags, 684 long timeout, 685 struct intel_rps_client *rps_client) 686 { 687 might_sleep(); 688 #if IS_ENABLED(CONFIG_LOCKDEP) 689 GEM_BUG_ON(debug_locks && 690 !!lockdep_is_held(&obj->base.dev->struct_mutex) != 691 !!(flags & I915_WAIT_LOCKED)); 692 #endif 693 GEM_BUG_ON(timeout < 0); 694 695 timeout = i915_gem_object_wait_reservation(obj->resv, 696 flags, timeout, 697 rps_client); 698 return timeout < 0 ? timeout : 0; 699 } 700 701 static struct intel_rps_client *to_rps_client(struct drm_file *file) 702 { 703 struct drm_i915_file_private *fpriv = file->driver_priv; 704 705 return &fpriv->rps_client; 706 } 707 708 static int 709 i915_gem_phys_pwrite(struct drm_i915_gem_object *obj, 710 struct drm_i915_gem_pwrite *args, 711 struct drm_file *file) 712 { 713 void *vaddr = obj->phys_handle->vaddr + args->offset; 714 char __user *user_data = u64_to_user_ptr(args->data_ptr); 715 716 /* We manually control the domain here and pretend that it 717 * remains coherent i.e. in the GTT domain, like shmem_pwrite. 718 */ 719 intel_fb_obj_invalidate(obj, ORIGIN_CPU); 720 if (copy_from_user(vaddr, user_data, args->size)) 721 return -EFAULT; 722 723 drm_clflush_virt_range(vaddr, args->size); 724 i915_gem_chipset_flush(to_i915(obj->base.dev)); 725 726 intel_fb_obj_flush(obj, ORIGIN_CPU); 727 return 0; 728 } 729 730 void *i915_gem_object_alloc(struct drm_i915_private *dev_priv) 731 { 732 #ifdef __linux__ 733 return kmem_cache_zalloc(dev_priv->objects, GFP_KERNEL); 734 #else 735 return pool_get(&dev_priv->objects, PR_WAITOK | PR_ZERO); 736 #endif 737 } 738 739 void i915_gem_object_free(struct drm_i915_gem_object *obj) 740 { 741 struct drm_i915_private *dev_priv = to_i915(obj->base.dev); 742 #ifdef __linux__ 743 kmem_cache_free(dev_priv->objects, obj); 744 #else 745 pool_put(&dev_priv->objects, obj); 746 #endif 747 } 748 749 static int 750 i915_gem_create(struct drm_file *file, 751 struct drm_i915_private *dev_priv, 752 uint64_t size, 753 uint32_t *handle_p) 754 { 755 struct drm_i915_gem_object *obj; 756 int ret; 757 u32 handle; 758 759 size = roundup(size, PAGE_SIZE); 760 if (size == 0) 761 return -EINVAL; 762 763 /* Allocate the new object */ 764 obj = i915_gem_object_create(dev_priv, size); 765 if (IS_ERR(obj)) 766 return PTR_ERR(obj); 767 768 ret = drm_gem_handle_create(file, &obj->base, &handle); 769 /* drop reference from allocate - handle holds it now */ 770 i915_gem_object_put(obj); 771 if (ret) 772 return ret; 773 774 *handle_p = handle; 775 return 0; 776 } 777 778 int 779 i915_gem_dumb_create(struct drm_file *file, 780 struct drm_device *dev, 781 struct drm_mode_create_dumb *args) 782 { 783 /* have to work out size/pitch and return them */ 784 args->pitch = roundup2(args->width * DIV_ROUND_UP(args->bpp, 8), 64); 785 args->size = args->pitch * args->height; 786 return i915_gem_create(file, to_i915(dev), 787 args->size, &args->handle); 788 } 789 790 static bool gpu_write_needs_clflush(struct drm_i915_gem_object *obj) 791 { 792 return !(obj->cache_level == I915_CACHE_NONE || 793 obj->cache_level == I915_CACHE_WT); 794 } 795 796 /** 797 * Creates a new mm object and returns a handle to it. 798 * @dev: drm device pointer 799 * @data: ioctl data blob 800 * @file: drm file pointer 801 */ 802 int 803 i915_gem_create_ioctl(struct drm_device *dev, void *data, 804 struct drm_file *file) 805 { 806 struct drm_i915_private *dev_priv = to_i915(dev); 807 struct drm_i915_gem_create *args = data; 808 809 i915_gem_flush_free_objects(dev_priv); 810 811 return i915_gem_create(file, dev_priv, 812 args->size, &args->handle); 813 } 814 815 static inline enum fb_op_origin 816 fb_write_origin(struct drm_i915_gem_object *obj, unsigned int domain) 817 { 818 return (domain == I915_GEM_DOMAIN_GTT ? 819 obj->frontbuffer_ggtt_origin : ORIGIN_CPU); 820 } 821 822 void i915_gem_flush_ggtt_writes(struct drm_i915_private *dev_priv) 823 { 824 /* 825 * No actual flushing is required for the GTT write domain for reads 826 * from the GTT domain. Writes to it "immediately" go to main memory 827 * as far as we know, so there's no chipset flush. It also doesn't 828 * land in the GPU render cache. 829 * 830 * However, we do have to enforce the order so that all writes through 831 * the GTT land before any writes to the device, such as updates to 832 * the GATT itself. 833 * 834 * We also have to wait a bit for the writes to land from the GTT. 835 * An uncached read (i.e. mmio) seems to be ideal for the round-trip 836 * timing. This issue has only been observed when switching quickly 837 * between GTT writes and CPU reads from inside the kernel on recent hw, 838 * and it appears to only affect discrete GTT blocks (i.e. on LLC 839 * system agents we cannot reproduce this behaviour, until Cannonlake 840 * that was!). 841 */ 842 843 i915_gem_chipset_flush(dev_priv); 844 845 intel_runtime_pm_get(dev_priv); 846 spin_lock_irq(&dev_priv->uncore.lock); 847 848 POSTING_READ_FW(RING_HEAD(RENDER_RING_BASE)); 849 850 spin_unlock_irq(&dev_priv->uncore.lock); 851 intel_runtime_pm_put(dev_priv); 852 } 853 854 static void 855 flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains) 856 { 857 struct drm_i915_private *dev_priv = to_i915(obj->base.dev); 858 struct i915_vma *vma; 859 860 if (!(obj->write_domain & flush_domains)) 861 return; 862 863 switch (obj->write_domain) { 864 case I915_GEM_DOMAIN_GTT: 865 i915_gem_flush_ggtt_writes(dev_priv); 866 867 intel_fb_obj_flush(obj, 868 fb_write_origin(obj, I915_GEM_DOMAIN_GTT)); 869 870 for_each_ggtt_vma(vma, obj) { 871 if (vma->iomap) 872 continue; 873 874 i915_vma_unset_ggtt_write(vma); 875 } 876 break; 877 878 case I915_GEM_DOMAIN_WC: 879 wmb(); 880 break; 881 882 case I915_GEM_DOMAIN_CPU: 883 i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC); 884 break; 885 886 case I915_GEM_DOMAIN_RENDER: 887 if (gpu_write_needs_clflush(obj)) 888 obj->cache_dirty = true; 889 break; 890 } 891 892 obj->write_domain = 0; 893 } 894 895 static inline int 896 __copy_to_user_swizzled(char __user *cpu_vaddr, 897 const char *gpu_vaddr, int gpu_offset, 898 int length) 899 { 900 int ret, cpu_offset = 0; 901 902 while (length > 0) { 903 int cacheline_end = roundup2(gpu_offset + 1, 64); 904 int this_length = min(cacheline_end - gpu_offset, length); 905 int swizzled_gpu_offset = gpu_offset ^ 64; 906 907 ret = __copy_to_user(cpu_vaddr + cpu_offset, 908 gpu_vaddr + swizzled_gpu_offset, 909 this_length); 910 if (ret) 911 return ret + length; 912 913 cpu_offset += this_length; 914 gpu_offset += this_length; 915 length -= this_length; 916 } 917 918 return 0; 919 } 920 921 static inline int 922 __copy_from_user_swizzled(char *gpu_vaddr, int gpu_offset, 923 const char __user *cpu_vaddr, 924 int length) 925 { 926 int ret, cpu_offset = 0; 927 928 while (length > 0) { 929 int cacheline_end = roundup2(gpu_offset + 1, 64); 930 int this_length = min(cacheline_end - gpu_offset, length); 931 int swizzled_gpu_offset = gpu_offset ^ 64; 932 933 ret = __copy_from_user(gpu_vaddr + swizzled_gpu_offset, 934 cpu_vaddr + cpu_offset, 935 this_length); 936 if (ret) 937 return ret + length; 938 939 cpu_offset += this_length; 940 gpu_offset += this_length; 941 length -= this_length; 942 } 943 944 return 0; 945 } 946 947 /* 948 * Pins the specified object's pages and synchronizes the object with 949 * GPU accesses. Sets needs_clflush to non-zero if the caller should 950 * flush the object from the CPU cache. 951 */ 952 int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj, 953 unsigned int *needs_clflush) 954 { 955 int ret; 956 957 lockdep_assert_held(&obj->base.dev->struct_mutex); 958 959 *needs_clflush = 0; 960 if (!i915_gem_object_has_struct_page(obj)) 961 return -ENODEV; 962 963 ret = i915_gem_object_wait(obj, 964 I915_WAIT_INTERRUPTIBLE | 965 I915_WAIT_LOCKED, 966 MAX_SCHEDULE_TIMEOUT, 967 NULL); 968 if (ret) 969 return ret; 970 971 ret = i915_gem_object_pin_pages(obj); 972 if (ret) 973 return ret; 974 975 if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ || 976 !static_cpu_has(X86_FEATURE_CLFLUSH)) { 977 ret = i915_gem_object_set_to_cpu_domain(obj, false); 978 if (ret) 979 goto err_unpin; 980 else 981 goto out; 982 } 983 984 flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); 985 986 /* If we're not in the cpu read domain, set ourself into the gtt 987 * read domain and manually flush cachelines (if required). This 988 * optimizes for the case when the gpu will dirty the data 989 * anyway again before the next pread happens. 990 */ 991 if (!obj->cache_dirty && 992 !(obj->read_domains & I915_GEM_DOMAIN_CPU)) 993 *needs_clflush = CLFLUSH_BEFORE; 994 995 out: 996 /* return with the pages pinned */ 997 return 0; 998 999 err_unpin: 1000 i915_gem_object_unpin_pages(obj); 1001 return ret; 1002 } 1003 1004 int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj, 1005 unsigned int *needs_clflush) 1006 { 1007 int ret; 1008 1009 lockdep_assert_held(&obj->base.dev->struct_mutex); 1010 1011 *needs_clflush = 0; 1012 if (!i915_gem_object_has_struct_page(obj)) 1013 return -ENODEV; 1014 1015 ret = i915_gem_object_wait(obj, 1016 I915_WAIT_INTERRUPTIBLE | 1017 I915_WAIT_LOCKED | 1018 I915_WAIT_ALL, 1019 MAX_SCHEDULE_TIMEOUT, 1020 NULL); 1021 if (ret) 1022 return ret; 1023 1024 ret = i915_gem_object_pin_pages(obj); 1025 if (ret) 1026 return ret; 1027 1028 if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE || 1029 !static_cpu_has(X86_FEATURE_CLFLUSH)) { 1030 ret = i915_gem_object_set_to_cpu_domain(obj, true); 1031 if (ret) 1032 goto err_unpin; 1033 else 1034 goto out; 1035 } 1036 1037 flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); 1038 1039 /* If we're not in the cpu write domain, set ourself into the 1040 * gtt write domain and manually flush cachelines (as required). 1041 * This optimizes for the case when the gpu will use the data 1042 * right away and we therefore have to clflush anyway. 1043 */ 1044 if (!obj->cache_dirty) { 1045 *needs_clflush |= CLFLUSH_AFTER; 1046 1047 /* 1048 * Same trick applies to invalidate partially written 1049 * cachelines read before writing. 1050 */ 1051 if (!(obj->read_domains & I915_GEM_DOMAIN_CPU)) 1052 *needs_clflush |= CLFLUSH_BEFORE; 1053 } 1054 1055 out: 1056 intel_fb_obj_invalidate(obj, ORIGIN_CPU); 1057 obj->mm.dirty = true; 1058 /* return with the pages pinned */ 1059 return 0; 1060 1061 err_unpin: 1062 i915_gem_object_unpin_pages(obj); 1063 return ret; 1064 } 1065 1066 static void 1067 shmem_clflush_swizzled_range(char *addr, unsigned long length, 1068 bool swizzled) 1069 { 1070 if (unlikely(swizzled)) { 1071 unsigned long start = (unsigned long) addr; 1072 unsigned long end = (unsigned long) addr + length; 1073 1074 /* For swizzling simply ensure that we always flush both 1075 * channels. Lame, but simple and it works. Swizzled 1076 * pwrite/pread is far from a hotpath - current userspace 1077 * doesn't use it at all. */ 1078 start = round_down(start, 128); 1079 end = round_up(end, 128); 1080 1081 drm_clflush_virt_range((void *)start, end - start); 1082 } else { 1083 drm_clflush_virt_range(addr, length); 1084 } 1085 1086 } 1087 1088 /* Only difference to the fast-path function is that this can handle bit17 1089 * and uses non-atomic copy and kmap functions. */ 1090 static int 1091 shmem_pread_slow(struct vm_page *page, int offset, int length, 1092 char __user *user_data, 1093 bool page_do_bit17_swizzling, bool needs_clflush) 1094 { 1095 char *vaddr; 1096 int ret; 1097 1098 vaddr = kmap(page); 1099 if (needs_clflush) 1100 shmem_clflush_swizzled_range(vaddr + offset, length, 1101 page_do_bit17_swizzling); 1102 1103 if (page_do_bit17_swizzling) 1104 ret = __copy_to_user_swizzled(user_data, vaddr, offset, length); 1105 else 1106 ret = __copy_to_user(user_data, vaddr + offset, length); 1107 kunmap(vaddr); 1108 1109 return ret ? - EFAULT : 0; 1110 } 1111 1112 static int 1113 shmem_pread(struct vm_page *page, int offset, int length, char __user *user_data, 1114 bool page_do_bit17_swizzling, bool needs_clflush) 1115 { 1116 int ret; 1117 1118 ret = -ENODEV; 1119 if (!page_do_bit17_swizzling) { 1120 char *vaddr = kmap_atomic(page); 1121 1122 if (needs_clflush) 1123 drm_clflush_virt_range(vaddr + offset, length); 1124 ret = __copy_to_user_inatomic(user_data, vaddr + offset, length); 1125 kunmap_atomic(vaddr); 1126 } 1127 if (ret == 0) 1128 return 0; 1129 1130 return shmem_pread_slow(page, offset, length, user_data, 1131 page_do_bit17_swizzling, needs_clflush); 1132 } 1133 1134 static int 1135 i915_gem_shmem_pread(struct drm_i915_gem_object *obj, 1136 struct drm_i915_gem_pread *args) 1137 { 1138 char __user *user_data; 1139 u64 remain; 1140 unsigned int obj_do_bit17_swizzling; 1141 unsigned int needs_clflush; 1142 unsigned int idx, offset; 1143 int ret; 1144 1145 obj_do_bit17_swizzling = 0; 1146 if (i915_gem_object_needs_bit17_swizzle(obj)) 1147 obj_do_bit17_swizzling = BIT(17); 1148 1149 ret = mutex_lock_interruptible(&obj->base.dev->struct_mutex); 1150 if (ret) 1151 return ret; 1152 1153 ret = i915_gem_obj_prepare_shmem_read(obj, &needs_clflush); 1154 mutex_unlock(&obj->base.dev->struct_mutex); 1155 if (ret) 1156 return ret; 1157 1158 remain = args->size; 1159 user_data = u64_to_user_ptr(args->data_ptr); 1160 offset = offset_in_page(args->offset); 1161 for (idx = args->offset >> PAGE_SHIFT; remain; idx++) { 1162 struct vm_page *page = i915_gem_object_get_page(obj, idx); 1163 unsigned int length = min_t(u64, remain, PAGE_SIZE - offset); 1164 1165 ret = shmem_pread(page, offset, length, user_data, 1166 page_to_phys(page) & obj_do_bit17_swizzling, 1167 needs_clflush); 1168 if (ret) 1169 break; 1170 1171 remain -= length; 1172 user_data += length; 1173 offset = 0; 1174 } 1175 1176 i915_gem_obj_finish_shmem_access(obj); 1177 return ret; 1178 } 1179 1180 #ifdef __linux__ 1181 static inline bool 1182 gtt_user_read(struct io_mapping *mapping, 1183 loff_t base, int offset, 1184 char __user *user_data, int length) 1185 { 1186 void __iomem *vaddr; 1187 unsigned long unwritten; 1188 1189 /* We can use the cpu mem copy function because this is X86. */ 1190 vaddr = io_mapping_map_atomic_wc(mapping, base); 1191 unwritten = __copy_to_user_inatomic(user_data, 1192 (void __force *)vaddr + offset, 1193 length); 1194 io_mapping_unmap_atomic(vaddr); 1195 if (unwritten) { 1196 vaddr = io_mapping_map_wc(mapping, base, PAGE_SIZE); 1197 unwritten = copy_to_user(user_data, 1198 (void __force *)vaddr + offset, 1199 length); 1200 io_mapping_unmap(vaddr); 1201 } 1202 return unwritten; 1203 } 1204 #else 1205 static inline bool 1206 gtt_user_read(struct drm_i915_private *dev_priv, 1207 loff_t base, int offset, 1208 char __user *user_data, int length) 1209 { 1210 bus_space_handle_t bsh; 1211 void __iomem *vaddr; 1212 unsigned long unwritten; 1213 1214 /* We can use the cpu mem copy function because this is X86. */ 1215 agp_map_atomic(dev_priv->agph, base, &bsh); 1216 vaddr = bus_space_vaddr(dev_priv->bst, bsh); 1217 unwritten = __copy_to_user_inatomic(user_data, 1218 (void __force *)vaddr + offset, 1219 length); 1220 agp_unmap_atomic(dev_priv->agph, bsh); 1221 if (unwritten) { 1222 agp_map_subregion(dev_priv->agph, base, PAGE_SIZE, &bsh); 1223 vaddr = bus_space_vaddr(dev_priv->bst, bsh); 1224 unwritten = copy_to_user(user_data, 1225 (void __force *)vaddr + offset, 1226 length); 1227 agp_unmap_subregion(dev_priv->agph, bsh, PAGE_SIZE); 1228 } 1229 return unwritten; 1230 } 1231 #endif 1232 1233 static int 1234 i915_gem_gtt_pread(struct drm_i915_gem_object *obj, 1235 const struct drm_i915_gem_pread *args) 1236 { 1237 struct drm_i915_private *i915 = to_i915(obj->base.dev); 1238 struct i915_ggtt *ggtt = &i915->ggtt; 1239 struct drm_mm_node node; 1240 struct i915_vma *vma; 1241 void __user *user_data; 1242 u64 remain, offset; 1243 int ret; 1244 1245 ret = mutex_lock_interruptible(&i915->drm.struct_mutex); 1246 if (ret) 1247 return ret; 1248 1249 intel_runtime_pm_get(i915); 1250 vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 1251 PIN_MAPPABLE | 1252 PIN_NONFAULT | 1253 PIN_NONBLOCK); 1254 if (!IS_ERR(vma)) { 1255 node.start = i915_ggtt_offset(vma); 1256 node.allocated = false; 1257 ret = i915_vma_put_fence(vma); 1258 if (ret) { 1259 i915_vma_unpin(vma); 1260 vma = ERR_PTR(ret); 1261 } 1262 } 1263 if (IS_ERR(vma)) { 1264 ret = insert_mappable_node(ggtt, &node, PAGE_SIZE); 1265 if (ret) 1266 goto out_unlock; 1267 GEM_BUG_ON(!node.allocated); 1268 } 1269 1270 ret = i915_gem_object_set_to_gtt_domain(obj, false); 1271 if (ret) 1272 goto out_unpin; 1273 1274 mutex_unlock(&i915->drm.struct_mutex); 1275 1276 user_data = u64_to_user_ptr(args->data_ptr); 1277 remain = args->size; 1278 offset = args->offset; 1279 1280 while (remain > 0) { 1281 /* Operation in this page 1282 * 1283 * page_base = page offset within aperture 1284 * page_offset = offset within page 1285 * page_length = bytes to copy for this page 1286 */ 1287 u32 page_base = node.start; 1288 unsigned page_offset = offset_in_page(offset); 1289 unsigned page_length = PAGE_SIZE - page_offset; 1290 page_length = remain < page_length ? remain : page_length; 1291 if (node.allocated) { 1292 wmb(); 1293 ggtt->vm.insert_page(&ggtt->vm, 1294 i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT), 1295 node.start, I915_CACHE_NONE, 0); 1296 wmb(); 1297 } else { 1298 page_base += offset & ~PAGE_MASK; 1299 } 1300 1301 if (gtt_user_read(i915, page_base, page_offset, 1302 user_data, page_length)) { 1303 ret = -EFAULT; 1304 break; 1305 } 1306 1307 remain -= page_length; 1308 user_data += page_length; 1309 offset += page_length; 1310 } 1311 1312 mutex_lock(&i915->drm.struct_mutex); 1313 out_unpin: 1314 if (node.allocated) { 1315 wmb(); 1316 ggtt->vm.clear_range(&ggtt->vm, node.start, node.size); 1317 remove_mappable_node(&node); 1318 } else { 1319 i915_vma_unpin(vma); 1320 } 1321 out_unlock: 1322 intel_runtime_pm_put(i915); 1323 mutex_unlock(&i915->drm.struct_mutex); 1324 1325 return ret; 1326 } 1327 1328 /** 1329 * Reads data from the object referenced by handle. 1330 * @dev: drm device pointer 1331 * @data: ioctl data blob 1332 * @file: drm file pointer 1333 * 1334 * On error, the contents of *data are undefined. 1335 */ 1336 int 1337 i915_gem_pread_ioctl(struct drm_device *dev, void *data, 1338 struct drm_file *file) 1339 { 1340 struct drm_i915_gem_pread *args = data; 1341 struct drm_i915_gem_object *obj; 1342 int ret; 1343 1344 if (args->size == 0) 1345 return 0; 1346 1347 if (!access_ok(VERIFY_WRITE, 1348 u64_to_user_ptr(args->data_ptr), 1349 args->size)) 1350 return -EFAULT; 1351 1352 obj = i915_gem_object_lookup(file, args->handle); 1353 if (!obj) 1354 return -ENOENT; 1355 1356 /* Bounds check source. */ 1357 if (range_overflows_t(u64, args->offset, args->size, obj->base.size)) { 1358 ret = -EINVAL; 1359 goto out; 1360 } 1361 1362 trace_i915_gem_object_pread(obj, args->offset, args->size); 1363 1364 ret = i915_gem_object_wait(obj, 1365 I915_WAIT_INTERRUPTIBLE, 1366 MAX_SCHEDULE_TIMEOUT, 1367 to_rps_client(file)); 1368 if (ret) 1369 goto out; 1370 1371 ret = i915_gem_object_pin_pages(obj); 1372 if (ret) 1373 goto out; 1374 1375 ret = i915_gem_shmem_pread(obj, args); 1376 if (ret == -EFAULT || ret == -ENODEV) 1377 ret = i915_gem_gtt_pread(obj, args); 1378 1379 i915_gem_object_unpin_pages(obj); 1380 out: 1381 i915_gem_object_put(obj); 1382 return ret; 1383 } 1384 1385 /* This is the fast write path which cannot handle 1386 * page faults in the source data 1387 */ 1388 #ifdef __linux__ 1389 static inline bool 1390 ggtt_write(struct io_mapping *mapping, 1391 loff_t base, int offset, 1392 char __user *user_data, int length) 1393 { 1394 void __iomem *vaddr; 1395 unsigned long unwritten; 1396 1397 /* We can use the cpu mem copy function because this is X86. */ 1398 vaddr = io_mapping_map_atomic_wc(mapping, base); 1399 unwritten = __copy_from_user_inatomic_nocache((void __force *)vaddr + offset, 1400 user_data, length); 1401 io_mapping_unmap_atomic(vaddr); 1402 if (unwritten) { 1403 vaddr = io_mapping_map_wc(mapping, base, PAGE_SIZE); 1404 unwritten = copy_from_user((void __force *)vaddr + offset, 1405 user_data, length); 1406 io_mapping_unmap(vaddr); 1407 } 1408 1409 return unwritten; 1410 } 1411 #else 1412 static inline bool 1413 ggtt_write(struct drm_i915_private *dev_priv, 1414 loff_t base, int offset, 1415 char __user *user_data, int length) 1416 { 1417 bus_space_handle_t bsh; 1418 void __iomem *vaddr; 1419 unsigned long unwritten; 1420 1421 /* We can use the cpu mem copy function because this is X86. */ 1422 agp_map_atomic(dev_priv->agph, base, &bsh); 1423 vaddr = bus_space_vaddr(dev_priv->bst, bsh); 1424 unwritten = __copy_from_user_inatomic_nocache((void __force *)vaddr + offset, 1425 user_data, length); 1426 agp_unmap_atomic(dev_priv->agph, bsh); 1427 if (unwritten) { 1428 agp_map_subregion(dev_priv->agph, base, PAGE_SIZE, &bsh); 1429 vaddr = bus_space_vaddr(dev_priv->bst, bsh); 1430 unwritten = copy_from_user((void __force *)vaddr + offset, 1431 user_data, length); 1432 agp_unmap_subregion(dev_priv->agph, bsh, PAGE_SIZE); 1433 } 1434 1435 return unwritten; 1436 } 1437 #endif 1438 1439 /** 1440 * This is the fast pwrite path, where we copy the data directly from the 1441 * user into the GTT, uncached. 1442 * @obj: i915 GEM object 1443 * @args: pwrite arguments structure 1444 */ 1445 static int 1446 i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj, 1447 const struct drm_i915_gem_pwrite *args) 1448 { 1449 struct drm_i915_private *i915 = to_i915(obj->base.dev); 1450 struct i915_ggtt *ggtt = &i915->ggtt; 1451 struct drm_mm_node node; 1452 struct i915_vma *vma; 1453 u64 remain, offset; 1454 void __user *user_data; 1455 int ret; 1456 1457 ret = mutex_lock_interruptible(&i915->drm.struct_mutex); 1458 if (ret) 1459 return ret; 1460 1461 if (i915_gem_object_has_struct_page(obj)) { 1462 /* 1463 * Avoid waking the device up if we can fallback, as 1464 * waking/resuming is very slow (worst-case 10-100 ms 1465 * depending on PCI sleeps and our own resume time). 1466 * This easily dwarfs any performance advantage from 1467 * using the cache bypass of indirect GGTT access. 1468 */ 1469 if (!intel_runtime_pm_get_if_in_use(i915)) { 1470 ret = -EFAULT; 1471 goto out_unlock; 1472 } 1473 } else { 1474 /* No backing pages, no fallback, we must force GGTT access */ 1475 intel_runtime_pm_get(i915); 1476 } 1477 1478 vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 1479 PIN_MAPPABLE | 1480 PIN_NONFAULT | 1481 PIN_NONBLOCK); 1482 if (!IS_ERR(vma)) { 1483 node.start = i915_ggtt_offset(vma); 1484 node.allocated = false; 1485 ret = i915_vma_put_fence(vma); 1486 if (ret) { 1487 i915_vma_unpin(vma); 1488 vma = ERR_PTR(ret); 1489 } 1490 } 1491 if (IS_ERR(vma)) { 1492 ret = insert_mappable_node(ggtt, &node, PAGE_SIZE); 1493 if (ret) 1494 goto out_rpm; 1495 GEM_BUG_ON(!node.allocated); 1496 } 1497 1498 ret = i915_gem_object_set_to_gtt_domain(obj, true); 1499 if (ret) 1500 goto out_unpin; 1501 1502 mutex_unlock(&i915->drm.struct_mutex); 1503 1504 intel_fb_obj_invalidate(obj, ORIGIN_CPU); 1505 1506 user_data = u64_to_user_ptr(args->data_ptr); 1507 offset = args->offset; 1508 remain = args->size; 1509 while (remain) { 1510 /* Operation in this page 1511 * 1512 * page_base = page offset within aperture 1513 * page_offset = offset within page 1514 * page_length = bytes to copy for this page 1515 */ 1516 u32 page_base = node.start; 1517 unsigned int page_offset = offset_in_page(offset); 1518 unsigned int page_length = PAGE_SIZE - page_offset; 1519 page_length = remain < page_length ? remain : page_length; 1520 if (node.allocated) { 1521 wmb(); /* flush the write before we modify the GGTT */ 1522 ggtt->vm.insert_page(&ggtt->vm, 1523 i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT), 1524 node.start, I915_CACHE_NONE, 0); 1525 wmb(); /* flush modifications to the GGTT (insert_page) */ 1526 } else { 1527 page_base += offset & ~PAGE_MASK; 1528 } 1529 /* If we get a fault while copying data, then (presumably) our 1530 * source page isn't available. Return the error and we'll 1531 * retry in the slow path. 1532 * If the object is non-shmem backed, we retry again with the 1533 * path that handles page fault. 1534 */ 1535 if (ggtt_write(i915, page_base, page_offset, 1536 user_data, page_length)) { 1537 ret = -EFAULT; 1538 break; 1539 } 1540 1541 remain -= page_length; 1542 user_data += page_length; 1543 offset += page_length; 1544 } 1545 intel_fb_obj_flush(obj, ORIGIN_CPU); 1546 1547 mutex_lock(&i915->drm.struct_mutex); 1548 out_unpin: 1549 if (node.allocated) { 1550 wmb(); 1551 ggtt->vm.clear_range(&ggtt->vm, node.start, node.size); 1552 remove_mappable_node(&node); 1553 } else { 1554 i915_vma_unpin(vma); 1555 } 1556 out_rpm: 1557 intel_runtime_pm_put(i915); 1558 out_unlock: 1559 mutex_unlock(&i915->drm.struct_mutex); 1560 return ret; 1561 } 1562 1563 static int 1564 shmem_pwrite_slow(struct vm_page *page, int offset, int length, 1565 char __user *user_data, 1566 bool page_do_bit17_swizzling, 1567 bool needs_clflush_before, 1568 bool needs_clflush_after) 1569 { 1570 char *vaddr; 1571 int ret; 1572 1573 vaddr = kmap(page); 1574 if (unlikely(needs_clflush_before || page_do_bit17_swizzling)) 1575 shmem_clflush_swizzled_range(vaddr + offset, length, 1576 page_do_bit17_swizzling); 1577 if (page_do_bit17_swizzling) 1578 ret = __copy_from_user_swizzled(vaddr, offset, user_data, 1579 length); 1580 else 1581 ret = __copy_from_user(vaddr + offset, user_data, length); 1582 if (needs_clflush_after) 1583 shmem_clflush_swizzled_range(vaddr + offset, length, 1584 page_do_bit17_swizzling); 1585 kunmap(vaddr); 1586 1587 return ret ? -EFAULT : 0; 1588 } 1589 1590 /* Per-page copy function for the shmem pwrite fastpath. 1591 * Flushes invalid cachelines before writing to the target if 1592 * needs_clflush_before is set and flushes out any written cachelines after 1593 * writing if needs_clflush is set. 1594 */ 1595 static int 1596 shmem_pwrite(struct vm_page *page, int offset, int len, char __user *user_data, 1597 bool page_do_bit17_swizzling, 1598 bool needs_clflush_before, 1599 bool needs_clflush_after) 1600 { 1601 int ret; 1602 1603 ret = -ENODEV; 1604 if (!page_do_bit17_swizzling) { 1605 char *vaddr = kmap_atomic(page); 1606 1607 if (needs_clflush_before) 1608 drm_clflush_virt_range(vaddr + offset, len); 1609 ret = __copy_from_user_inatomic(vaddr + offset, user_data, len); 1610 if (needs_clflush_after) 1611 drm_clflush_virt_range(vaddr + offset, len); 1612 1613 kunmap_atomic(vaddr); 1614 } 1615 if (ret == 0) 1616 return ret; 1617 1618 return shmem_pwrite_slow(page, offset, len, user_data, 1619 page_do_bit17_swizzling, 1620 needs_clflush_before, 1621 needs_clflush_after); 1622 } 1623 1624 static int 1625 i915_gem_shmem_pwrite(struct drm_i915_gem_object *obj, 1626 const struct drm_i915_gem_pwrite *args) 1627 { 1628 struct drm_i915_private *i915 = to_i915(obj->base.dev); 1629 void __user *user_data; 1630 u64 remain; 1631 unsigned int obj_do_bit17_swizzling; 1632 unsigned int partial_cacheline_write; 1633 unsigned int needs_clflush; 1634 unsigned int offset, idx; 1635 int ret; 1636 1637 ret = mutex_lock_interruptible(&i915->drm.struct_mutex); 1638 if (ret) 1639 return ret; 1640 1641 ret = i915_gem_obj_prepare_shmem_write(obj, &needs_clflush); 1642 mutex_unlock(&i915->drm.struct_mutex); 1643 if (ret) 1644 return ret; 1645 1646 obj_do_bit17_swizzling = 0; 1647 if (i915_gem_object_needs_bit17_swizzle(obj)) 1648 obj_do_bit17_swizzling = BIT(17); 1649 1650 /* If we don't overwrite a cacheline completely we need to be 1651 * careful to have up-to-date data by first clflushing. Don't 1652 * overcomplicate things and flush the entire patch. 1653 */ 1654 partial_cacheline_write = 0; 1655 if (needs_clflush & CLFLUSH_BEFORE) 1656 partial_cacheline_write = curcpu()->ci_cflushsz - 1; 1657 1658 user_data = u64_to_user_ptr(args->data_ptr); 1659 remain = args->size; 1660 offset = offset_in_page(args->offset); 1661 for (idx = args->offset >> PAGE_SHIFT; remain; idx++) { 1662 struct vm_page *page = i915_gem_object_get_page(obj, idx); 1663 unsigned int length = min_t(u64, remain, PAGE_SIZE - offset); 1664 1665 ret = shmem_pwrite(page, offset, length, user_data, 1666 page_to_phys(page) & obj_do_bit17_swizzling, 1667 (offset | length) & partial_cacheline_write, 1668 needs_clflush & CLFLUSH_AFTER); 1669 if (ret) 1670 break; 1671 1672 remain -= length; 1673 user_data += length; 1674 offset = 0; 1675 } 1676 1677 intel_fb_obj_flush(obj, ORIGIN_CPU); 1678 i915_gem_obj_finish_shmem_access(obj); 1679 return ret; 1680 } 1681 1682 /** 1683 * Writes data to the object referenced by handle. 1684 * @dev: drm device 1685 * @data: ioctl data blob 1686 * @file: drm file 1687 * 1688 * On error, the contents of the buffer that were to be modified are undefined. 1689 */ 1690 int 1691 i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, 1692 struct drm_file *file) 1693 { 1694 struct drm_i915_gem_pwrite *args = data; 1695 struct drm_i915_gem_object *obj; 1696 int ret; 1697 1698 if (args->size == 0) 1699 return 0; 1700 1701 if (!access_ok(VERIFY_READ, 1702 u64_to_user_ptr(args->data_ptr), 1703 args->size)) 1704 return -EFAULT; 1705 1706 obj = i915_gem_object_lookup(file, args->handle); 1707 if (!obj) 1708 return -ENOENT; 1709 1710 /* Bounds check destination. */ 1711 if (range_overflows_t(u64, args->offset, args->size, obj->base.size)) { 1712 ret = -EINVAL; 1713 goto err; 1714 } 1715 1716 /* Writes not allowed into this read-only object */ 1717 if (i915_gem_object_is_readonly(obj)) { 1718 ret = -EINVAL; 1719 goto err; 1720 } 1721 1722 trace_i915_gem_object_pwrite(obj, args->offset, args->size); 1723 1724 ret = -ENODEV; 1725 if (obj->ops->pwrite) 1726 ret = obj->ops->pwrite(obj, args); 1727 if (ret != -ENODEV) 1728 goto err; 1729 1730 ret = i915_gem_object_wait(obj, 1731 I915_WAIT_INTERRUPTIBLE | 1732 I915_WAIT_ALL, 1733 MAX_SCHEDULE_TIMEOUT, 1734 to_rps_client(file)); 1735 if (ret) 1736 goto err; 1737 1738 ret = i915_gem_object_pin_pages(obj); 1739 if (ret) 1740 goto err; 1741 1742 ret = -EFAULT; 1743 /* We can only do the GTT pwrite on untiled buffers, as otherwise 1744 * it would end up going through the fenced access, and we'll get 1745 * different detiling behavior between reading and writing. 1746 * pread/pwrite currently are reading and writing from the CPU 1747 * perspective, requiring manual detiling by the client. 1748 */ 1749 if (!i915_gem_object_has_struct_page(obj) || 1750 cpu_write_needs_clflush(obj)) 1751 /* Note that the gtt paths might fail with non-page-backed user 1752 * pointers (e.g. gtt mappings when moving data between 1753 * textures). Fallback to the shmem path in that case. 1754 */ 1755 ret = i915_gem_gtt_pwrite_fast(obj, args); 1756 1757 if (ret == -EFAULT || ret == -ENOSPC) { 1758 if (obj->phys_handle) 1759 ret = i915_gem_phys_pwrite(obj, args, file); 1760 else 1761 ret = i915_gem_shmem_pwrite(obj, args); 1762 } 1763 1764 i915_gem_object_unpin_pages(obj); 1765 err: 1766 i915_gem_object_put(obj); 1767 return ret; 1768 } 1769 1770 static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj) 1771 { 1772 struct drm_i915_private *i915; 1773 struct list_head *list; 1774 struct i915_vma *vma; 1775 1776 GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); 1777 1778 for_each_ggtt_vma(vma, obj) { 1779 if (i915_vma_is_active(vma)) 1780 continue; 1781 1782 if (!drm_mm_node_allocated(&vma->node)) 1783 continue; 1784 1785 list_move_tail(&vma->vm_link, &vma->vm->inactive_list); 1786 } 1787 1788 i915 = to_i915(obj->base.dev); 1789 spin_lock(&i915->mm.obj_lock); 1790 list = obj->bind_count ? &i915->mm.bound_list : &i915->mm.unbound_list; 1791 list_move_tail(&obj->mm.link, list); 1792 spin_unlock(&i915->mm.obj_lock); 1793 } 1794 1795 /** 1796 * Called when user space prepares to use an object with the CPU, either 1797 * through the mmap ioctl's mapping or a GTT mapping. 1798 * @dev: drm device 1799 * @data: ioctl data blob 1800 * @file: drm file 1801 */ 1802 int 1803 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, 1804 struct drm_file *file) 1805 { 1806 struct drm_i915_gem_set_domain *args = data; 1807 struct drm_i915_gem_object *obj; 1808 uint32_t read_domains = args->read_domains; 1809 uint32_t write_domain = args->write_domain; 1810 int err; 1811 1812 /* Only handle setting domains to types used by the CPU. */ 1813 if ((write_domain | read_domains) & I915_GEM_GPU_DOMAINS) 1814 return -EINVAL; 1815 1816 /* Having something in the write domain implies it's in the read 1817 * domain, and only that read domain. Enforce that in the request. 1818 */ 1819 if (write_domain != 0 && read_domains != write_domain) 1820 return -EINVAL; 1821 1822 obj = i915_gem_object_lookup(file, args->handle); 1823 if (!obj) 1824 return -ENOENT; 1825 1826 /* Try to flush the object off the GPU without holding the lock. 1827 * We will repeat the flush holding the lock in the normal manner 1828 * to catch cases where we are gazumped. 1829 */ 1830 err = i915_gem_object_wait(obj, 1831 I915_WAIT_INTERRUPTIBLE | 1832 (write_domain ? I915_WAIT_ALL : 0), 1833 MAX_SCHEDULE_TIMEOUT, 1834 to_rps_client(file)); 1835 if (err) 1836 goto out; 1837 1838 /* 1839 * Proxy objects do not control access to the backing storage, ergo 1840 * they cannot be used as a means to manipulate the cache domain 1841 * tracking for that backing storage. The proxy object is always 1842 * considered to be outside of any cache domain. 1843 */ 1844 if (i915_gem_object_is_proxy(obj)) { 1845 err = -ENXIO; 1846 goto out; 1847 } 1848 1849 /* 1850 * Flush and acquire obj->pages so that we are coherent through 1851 * direct access in memory with previous cached writes through 1852 * shmemfs and that our cache domain tracking remains valid. 1853 * For example, if the obj->filp was moved to swap without us 1854 * being notified and releasing the pages, we would mistakenly 1855 * continue to assume that the obj remained out of the CPU cached 1856 * domain. 1857 */ 1858 err = i915_gem_object_pin_pages(obj); 1859 if (err) 1860 goto out; 1861 1862 err = i915_mutex_lock_interruptible(dev); 1863 if (err) 1864 goto out_unpin; 1865 1866 if (read_domains & I915_GEM_DOMAIN_WC) 1867 err = i915_gem_object_set_to_wc_domain(obj, write_domain); 1868 else if (read_domains & I915_GEM_DOMAIN_GTT) 1869 err = i915_gem_object_set_to_gtt_domain(obj, write_domain); 1870 else 1871 err = i915_gem_object_set_to_cpu_domain(obj, write_domain); 1872 1873 /* And bump the LRU for this access */ 1874 i915_gem_object_bump_inactive_ggtt(obj); 1875 1876 mutex_unlock(&dev->struct_mutex); 1877 1878 if (write_domain != 0) 1879 intel_fb_obj_invalidate(obj, 1880 fb_write_origin(obj, write_domain)); 1881 1882 out_unpin: 1883 i915_gem_object_unpin_pages(obj); 1884 out: 1885 i915_gem_object_put(obj); 1886 return err; 1887 } 1888 1889 /** 1890 * Called when user space has done writes to this buffer 1891 * @dev: drm device 1892 * @data: ioctl data blob 1893 * @file: drm file 1894 */ 1895 int 1896 i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data, 1897 struct drm_file *file) 1898 { 1899 struct drm_i915_gem_sw_finish *args = data; 1900 struct drm_i915_gem_object *obj; 1901 1902 obj = i915_gem_object_lookup(file, args->handle); 1903 if (!obj) 1904 return -ENOENT; 1905 1906 /* 1907 * Proxy objects are barred from CPU access, so there is no 1908 * need to ban sw_finish as it is a nop. 1909 */ 1910 1911 /* Pinned buffers may be scanout, so flush the cache */ 1912 i915_gem_object_flush_if_display(obj); 1913 i915_gem_object_put(obj); 1914 1915 return 0; 1916 } 1917 1918 #ifdef __linux__ 1919 static inline bool 1920 __vma_matches(struct vm_area_struct *vma, struct file *filp, 1921 unsigned long addr, unsigned long size) 1922 { 1923 if (vma->vm_file != filp) 1924 return false; 1925 1926 return vma->vm_start == addr && 1927 (vma->vm_end - vma->vm_start) == PAGE_ALIGN(size); 1928 } 1929 #endif 1930 1931 /** 1932 * i915_gem_mmap_ioctl - Maps the contents of an object, returning the address 1933 * it is mapped to. 1934 * @dev: drm device 1935 * @data: ioctl data blob 1936 * @file: drm file 1937 * 1938 * While the mapping holds a reference on the contents of the object, it doesn't 1939 * imply a ref on the object itself. 1940 * 1941 * IMPORTANT: 1942 * 1943 * DRM driver writers who look a this function as an example for how to do GEM 1944 * mmap support, please don't implement mmap support like here. The modern way 1945 * to implement DRM mmap support is with an mmap offset ioctl (like 1946 * i915_gem_mmap_gtt) and then using the mmap syscall on the DRM fd directly. 1947 * That way debug tooling like valgrind will understand what's going on, hiding 1948 * the mmap call in a driver private ioctl will break that. The i915 driver only 1949 * does cpu mmaps this way because we didn't know better. 1950 */ 1951 int 1952 i915_gem_mmap_ioctl(struct drm_device *dev, void *data, 1953 struct drm_file *file) 1954 { 1955 struct drm_i915_gem_mmap *args = data; 1956 struct drm_i915_gem_object *obj; 1957 vaddr_t addr; 1958 vsize_t size; 1959 int ret; 1960 1961 #ifdef __OpenBSD__ 1962 if (args->size == 0 || args->offset & PAGE_MASK) 1963 return -EINVAL; 1964 size = round_page(args->size); 1965 if (args->offset + size < args->offset) 1966 return -EINVAL; 1967 #endif 1968 1969 if (args->flags & ~(I915_MMAP_WC)) 1970 return -EINVAL; 1971 1972 if (args->flags & I915_MMAP_WC && !boot_cpu_has(X86_FEATURE_PAT)) 1973 return -ENODEV; 1974 1975 obj = i915_gem_object_lookup(file, args->handle); 1976 if (!obj) 1977 return -ENOENT; 1978 1979 /* prime objects have no backing filp to GEM mmap 1980 * pages from. 1981 */ 1982 if (!obj->base.filp) { 1983 i915_gem_object_put(obj); 1984 return -ENXIO; 1985 } 1986 1987 #ifdef __linux__ 1988 addr = vm_mmap(obj->base.filp, 0, args->size, 1989 PROT_READ | PROT_WRITE, MAP_SHARED, 1990 args->offset); 1991 if (args->flags & I915_MMAP_WC) { 1992 struct mm_struct *mm = current->mm; 1993 struct vm_area_struct *vma; 1994 1995 if (down_write_killable(&mm->mmap_sem)) { 1996 i915_gem_object_put(obj); 1997 return -EINTR; 1998 } 1999 vma = find_vma(mm, addr); 2000 if (vma && __vma_matches(vma, obj->base.filp, addr, args->size)) 2001 vma->vm_page_prot = 2002 pgprot_writecombine(vm_get_page_prot(vma->vm_flags)); 2003 else 2004 addr = -ENOMEM; 2005 up_write(&mm->mmap_sem); 2006 2007 /* This may race, but that's ok, it only gets set */ 2008 WRITE_ONCE(obj->frontbuffer_ggtt_origin, ORIGIN_CPU); 2009 } 2010 i915_gem_object_put(obj); 2011 if (IS_ERR((void *)addr)) 2012 return addr; 2013 #else 2014 addr = 0; 2015 ret = -uvm_map(&curproc->p_vmspace->vm_map, &addr, size, 2016 obj->base.uao, args->offset, 0, UVM_MAPFLAG(PROT_READ | PROT_WRITE, 2017 PROT_READ | PROT_WRITE, MAP_INHERIT_SHARE, MADV_RANDOM, 2018 (args->flags & I915_MMAP_WC) ? UVM_FLAG_WC : 0)); 2019 if (ret == 0) 2020 uao_reference(obj->base.uao); 2021 i915_gem_object_put(obj); 2022 if (ret) 2023 return ret; 2024 #endif 2025 2026 args->addr_ptr = (uint64_t) addr; 2027 2028 return 0; 2029 } 2030 2031 static unsigned int tile_row_pages(struct drm_i915_gem_object *obj) 2032 { 2033 return i915_gem_object_get_tile_row_size(obj) >> PAGE_SHIFT; 2034 } 2035 2036 /** 2037 * i915_gem_mmap_gtt_version - report the current feature set for GTT mmaps 2038 * 2039 * A history of the GTT mmap interface: 2040 * 2041 * 0 - Everything had to fit into the GTT. Both parties of a memcpy had to 2042 * aligned and suitable for fencing, and still fit into the available 2043 * mappable space left by the pinned display objects. A classic problem 2044 * we called the page-fault-of-doom where we would ping-pong between 2045 * two objects that could not fit inside the GTT and so the memcpy 2046 * would page one object in at the expense of the other between every 2047 * single byte. 2048 * 2049 * 1 - Objects can be any size, and have any compatible fencing (X Y, or none 2050 * as set via i915_gem_set_tiling() [DRM_I915_GEM_SET_TILING]). If the 2051 * object is too large for the available space (or simply too large 2052 * for the mappable aperture!), a view is created instead and faulted 2053 * into userspace. (This view is aligned and sized appropriately for 2054 * fenced access.) 2055 * 2056 * 2 - Recognise WC as a separate cache domain so that we can flush the 2057 * delayed writes via GTT before performing direct access via WC. 2058 * 2059 * Restrictions: 2060 * 2061 * * snoopable objects cannot be accessed via the GTT. It can cause machine 2062 * hangs on some architectures, corruption on others. An attempt to service 2063 * a GTT page fault from a snoopable object will generate a SIGBUS. 2064 * 2065 * * the object must be able to fit into RAM (physical memory, though no 2066 * limited to the mappable aperture). 2067 * 2068 * 2069 * Caveats: 2070 * 2071 * * a new GTT page fault will synchronize rendering from the GPU and flush 2072 * all data to system memory. Subsequent access will not be synchronized. 2073 * 2074 * * all mappings are revoked on runtime device suspend. 2075 * 2076 * * there are only 8, 16 or 32 fence registers to share between all users 2077 * (older machines require fence register for display and blitter access 2078 * as well). Contention of the fence registers will cause the previous users 2079 * to be unmapped and any new access will generate new page faults. 2080 * 2081 * * running out of memory while servicing a fault may generate a SIGBUS, 2082 * rather than the expected SIGSEGV. 2083 */ 2084 int i915_gem_mmap_gtt_version(void) 2085 { 2086 return 2; 2087 } 2088 2089 static inline struct i915_ggtt_view 2090 compute_partial_view(struct drm_i915_gem_object *obj, 2091 pgoff_t page_offset, 2092 unsigned int chunk) 2093 { 2094 struct i915_ggtt_view view; 2095 2096 if (i915_gem_object_is_tiled(obj)) 2097 chunk = roundup(chunk, tile_row_pages(obj)); 2098 2099 view.type = I915_GGTT_VIEW_PARTIAL; 2100 view.partial.offset = rounddown(page_offset, chunk); 2101 view.partial.size = 2102 min_t(unsigned int, chunk, 2103 (obj->base.size >> PAGE_SHIFT) - view.partial.offset); 2104 2105 /* If the partial covers the entire object, just create a normal VMA. */ 2106 if (chunk >= obj->base.size >> PAGE_SHIFT) 2107 view.type = I915_GGTT_VIEW_NORMAL; 2108 2109 return view; 2110 } 2111 2112 #ifdef __linux__ 2113 2114 /** 2115 * i915_gem_fault - fault a page into the GTT 2116 * @vmf: fault info 2117 * 2118 * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped 2119 * from userspace. The fault handler takes care of binding the object to 2120 * the GTT (if needed), allocating and programming a fence register (again, 2121 * only if needed based on whether the old reg is still valid or the object 2122 * is tiled) and inserting a new PTE into the faulting process. 2123 * 2124 * Note that the faulting process may involve evicting existing objects 2125 * from the GTT and/or fence registers to make room. So performance may 2126 * suffer if the GTT working set is large or there are few fence registers 2127 * left. 2128 * 2129 * The current feature set supported by i915_gem_fault() and thus GTT mmaps 2130 * is exposed via I915_PARAM_MMAP_GTT_VERSION (see i915_gem_mmap_gtt_version). 2131 */ 2132 vm_fault_t i915_gem_fault(struct vm_fault *vmf) 2133 { 2134 #define MIN_CHUNK_PAGES (SZ_1M >> PAGE_SHIFT) 2135 struct vm_area_struct *area = vmf->vma; 2136 struct drm_i915_gem_object *obj = to_intel_bo(area->vm_private_data); 2137 struct drm_device *dev = obj->base.dev; 2138 struct drm_i915_private *dev_priv = to_i915(dev); 2139 struct i915_ggtt *ggtt = &dev_priv->ggtt; 2140 bool write = !!(vmf->flags & FAULT_FLAG_WRITE); 2141 struct i915_vma *vma; 2142 pgoff_t page_offset; 2143 int ret; 2144 2145 /* Sanity check that we allow writing into this object */ 2146 if (i915_gem_object_is_readonly(obj) && write) 2147 return VM_FAULT_SIGBUS; 2148 2149 /* We don't use vmf->pgoff since that has the fake offset */ 2150 page_offset = (vmf->address - area->vm_start) >> PAGE_SHIFT; 2151 2152 trace_i915_gem_object_fault(obj, page_offset, true, write); 2153 2154 /* Try to flush the object off the GPU first without holding the lock. 2155 * Upon acquiring the lock, we will perform our sanity checks and then 2156 * repeat the flush holding the lock in the normal manner to catch cases 2157 * where we are gazumped. 2158 */ 2159 ret = i915_gem_object_wait(obj, 2160 I915_WAIT_INTERRUPTIBLE, 2161 MAX_SCHEDULE_TIMEOUT, 2162 NULL); 2163 if (ret) 2164 goto err; 2165 2166 ret = i915_gem_object_pin_pages(obj); 2167 if (ret) 2168 goto err; 2169 2170 intel_runtime_pm_get(dev_priv); 2171 2172 ret = i915_mutex_lock_interruptible(dev); 2173 if (ret) 2174 goto err_rpm; 2175 2176 /* Access to snoopable pages through the GTT is incoherent. */ 2177 if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(dev_priv)) { 2178 ret = -EFAULT; 2179 goto err_unlock; 2180 } 2181 2182 2183 /* Now pin it into the GTT as needed */ 2184 vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 2185 PIN_MAPPABLE | 2186 PIN_NONBLOCK | 2187 PIN_NONFAULT); 2188 if (IS_ERR(vma)) { 2189 /* Use a partial view if it is bigger than available space */ 2190 struct i915_ggtt_view view = 2191 compute_partial_view(obj, page_offset, MIN_CHUNK_PAGES); 2192 unsigned int flags; 2193 2194 flags = PIN_MAPPABLE; 2195 if (view.type == I915_GGTT_VIEW_NORMAL) 2196 flags |= PIN_NONBLOCK; /* avoid warnings for pinned */ 2197 2198 /* 2199 * Userspace is now writing through an untracked VMA, abandon 2200 * all hope that the hardware is able to track future writes. 2201 */ 2202 obj->frontbuffer_ggtt_origin = ORIGIN_CPU; 2203 2204 vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, flags); 2205 if (IS_ERR(vma) && !view.type) { 2206 flags = PIN_MAPPABLE; 2207 view.type = I915_GGTT_VIEW_PARTIAL; 2208 vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, flags); 2209 } 2210 } 2211 if (IS_ERR(vma)) { 2212 ret = PTR_ERR(vma); 2213 goto err_unlock; 2214 } 2215 2216 ret = i915_gem_object_set_to_gtt_domain(obj, write); 2217 if (ret) 2218 goto err_unpin; 2219 2220 ret = i915_vma_pin_fence(vma); 2221 if (ret) 2222 goto err_unpin; 2223 2224 /* Finally, remap it using the new GTT offset */ 2225 ret = remap_io_mapping(area, 2226 area->vm_start + (vma->ggtt_view.partial.offset << PAGE_SHIFT), 2227 (ggtt->gmadr.start + vma->node.start) >> PAGE_SHIFT, 2228 min_t(u64, vma->size, area->vm_end - area->vm_start), 2229 &ggtt->iomap); 2230 if (ret) 2231 goto err_fence; 2232 2233 /* Mark as being mmapped into userspace for later revocation */ 2234 assert_rpm_wakelock_held(dev_priv); 2235 if (!i915_vma_set_userfault(vma) && !obj->userfault_count++) 2236 list_add(&obj->userfault_link, &dev_priv->mm.userfault_list); 2237 GEM_BUG_ON(!obj->userfault_count); 2238 2239 i915_vma_set_ggtt_write(vma); 2240 2241 err_fence: 2242 i915_vma_unpin_fence(vma); 2243 err_unpin: 2244 __i915_vma_unpin(vma); 2245 err_unlock: 2246 mutex_unlock(&dev->struct_mutex); 2247 err_rpm: 2248 intel_runtime_pm_put(dev_priv); 2249 i915_gem_object_unpin_pages(obj); 2250 err: 2251 switch (ret) { 2252 case -EIO: 2253 /* 2254 * We eat errors when the gpu is terminally wedged to avoid 2255 * userspace unduly crashing (gl has no provisions for mmaps to 2256 * fail). But any other -EIO isn't ours (e.g. swap in failure) 2257 * and so needs to be reported. 2258 */ 2259 if (!i915_terminally_wedged(&dev_priv->gpu_error)) 2260 return VM_FAULT_SIGBUS; 2261 /* else: fall through */ 2262 case -EAGAIN: 2263 /* 2264 * EAGAIN means the gpu is hung and we'll wait for the error 2265 * handler to reset everything when re-faulting in 2266 * i915_mutex_lock_interruptible. 2267 */ 2268 case 0: 2269 case -ERESTARTSYS: 2270 case -EINTR: 2271 case -EBUSY: 2272 /* 2273 * EBUSY is ok: this just means that another thread 2274 * already did the job. 2275 */ 2276 return VM_FAULT_NOPAGE; 2277 case -ENOMEM: 2278 return VM_FAULT_OOM; 2279 case -ENOSPC: 2280 case -EFAULT: 2281 return VM_FAULT_SIGBUS; 2282 default: 2283 WARN_ONCE(ret, "unhandled error in i915_gem_fault: %i\n", ret); 2284 return VM_FAULT_SIGBUS; 2285 } 2286 } 2287 2288 #else 2289 2290 int 2291 i915_gem_fault(struct drm_gem_object *gem_obj, struct uvm_faultinfo *ufi, 2292 off_t offset, vaddr_t vaddr, vm_page_t *pps, int npages, int centeridx, 2293 vm_prot_t access_type, int flags) 2294 { 2295 struct drm_i915_gem_object *obj = to_intel_bo(gem_obj); 2296 struct drm_device *dev = obj->base.dev; 2297 struct drm_i915_private *dev_priv = dev->dev_private; 2298 struct i915_ggtt *ggtt = &dev_priv->ggtt; 2299 paddr_t paddr; 2300 int lcv, ret = 0; 2301 int write = !!(access_type & PROT_WRITE); 2302 struct i915_vma *vma; 2303 vm_prot_t mapprot; 2304 boolean_t locked = TRUE; 2305 2306 /* Sanity check that we allow writing into this object */ 2307 if (i915_gem_object_is_readonly(obj) && write) { 2308 uvmfault_unlockall(ufi, ufi->entry->aref.ar_amap, 2309 &obj->base.uobj, NULL); 2310 return VM_PAGER_BAD; 2311 } 2312 2313 /* 2314 * If we already own the lock, we must be doing a copyin or 2315 * copyout in one of the fast paths. Return failure such that 2316 * we fall back on the slow path. 2317 */ 2318 if (!drm_vma_node_has_offset(&obj->base.vma_node) || 2319 RWLOCK_OWNER(&dev->struct_mutex) == curproc) { 2320 uvmfault_unlockall(ufi, ufi->entry->aref.ar_amap, 2321 &obj->base.uobj, NULL); 2322 return VM_PAGER_BAD; 2323 } 2324 2325 offset -= drm_vma_node_offset_addr(&obj->base.vma_node); 2326 2327 if (!mutex_trylock(&dev->struct_mutex)) { 2328 uvmfault_unlockall(ufi, NULL, &obj->base.uobj, NULL); 2329 mutex_lock(&dev->struct_mutex); 2330 locked = uvmfault_relock(ufi); 2331 } 2332 if (!locked) { 2333 mutex_unlock(&dev->struct_mutex); 2334 return VM_PAGER_REFAULT; 2335 } 2336 2337 /* Try to flush the object off the GPU first without holding the lock. 2338 * Upon acquiring the lock, we will perform our sanity checks and then 2339 * repeat the flush holding the lock in the normal manner to catch cases 2340 * where we are gazumped. 2341 */ 2342 ret = i915_gem_object_wait(obj, 2343 I915_WAIT_INTERRUPTIBLE, 2344 MAX_SCHEDULE_TIMEOUT, 2345 NULL); 2346 if (ret) 2347 goto err; 2348 2349 ret = i915_gem_object_pin_pages(obj); 2350 if (ret) 2351 goto err; 2352 2353 intel_runtime_pm_get(dev_priv); 2354 2355 /* Access to snoopable pages through the GTT is incoherent. */ 2356 if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(dev_priv)) { 2357 ret = -EINVAL; 2358 goto err_unlock; 2359 } 2360 2361 /* Now pin it into the GTT as needed */ 2362 vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 2363 PIN_MAPPABLE | 2364 PIN_NONBLOCK | 2365 PIN_NONFAULT); 2366 if (IS_ERR(vma)) { 2367 /* 2368 * Userspace is now writing through an untracked VMA, abandon 2369 * all hope that the hardware is able to track future writes. 2370 */ 2371 obj->frontbuffer_ggtt_origin = ORIGIN_CPU; 2372 2373 vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, PIN_MAPPABLE); 2374 } 2375 if (IS_ERR(vma)) { 2376 ret = PTR_ERR(vma); 2377 goto err_unlock; 2378 } 2379 2380 ret = i915_gem_object_set_to_gtt_domain(obj, write); 2381 if (ret) 2382 goto err_unpin; 2383 2384 ret = i915_vma_pin_fence(vma); 2385 if (ret) 2386 goto err_unpin; 2387 2388 mapprot = ufi->entry->protection; 2389 /* 2390 * if it's only a read fault, we only put ourselves into the gtt 2391 * read domain, so make sure we fault again and set ourselves to write. 2392 * this prevents us needing userland to do domain management and get 2393 * it wrong, and makes us fully coherent with the gpu re mmap. 2394 */ 2395 if (write == 0) 2396 mapprot &= ~PROT_WRITE; 2397 /* XXX try and be more efficient when we do this */ 2398 for (lcv = 0 ; lcv < npages ; lcv++, offset += PAGE_SIZE, 2399 vaddr += PAGE_SIZE) { 2400 if ((flags & PGO_ALLPAGES) == 0 && lcv != centeridx) 2401 continue; 2402 2403 if (pps[lcv] == PGO_DONTCARE) 2404 continue; 2405 2406 paddr = ggtt->gmadr.start + vma->node.start + offset; 2407 2408 if (pmap_enter(ufi->orig_map->pmap, vaddr, paddr, 2409 mapprot, PMAP_CANFAIL | mapprot) != 0) { 2410 i915_vma_unpin_fence(vma); 2411 __i915_vma_unpin(vma); 2412 uvmfault_unlockall(ufi, ufi->entry->aref.ar_amap, 2413 NULL, NULL); 2414 mutex_unlock(&dev->struct_mutex); 2415 pmap_update(ufi->orig_map->pmap); 2416 uvm_wait("intelflt"); 2417 ret = VM_PAGER_REFAULT; 2418 intel_runtime_pm_put(dev_priv); 2419 i915_gem_object_unpin_pages(obj); 2420 return ret; 2421 } 2422 } 2423 2424 /* Mark as being mmapped into userspace for later revocation */ 2425 assert_rpm_wakelock_held(dev_priv); 2426 if (!i915_vma_set_userfault(vma) && !obj->userfault_count++) 2427 list_add(&obj->userfault_link, &dev_priv->mm.userfault_list); 2428 GEM_BUG_ON(!obj->userfault_count); 2429 2430 i915_vma_set_ggtt_write(vma); 2431 2432 #ifdef notyet 2433 err_fence: 2434 #endif 2435 i915_vma_unpin_fence(vma); 2436 err_unpin: 2437 __i915_vma_unpin(vma); 2438 err_unlock: 2439 uvmfault_unlockall(ufi, ufi->entry->aref.ar_amap, NULL, NULL); 2440 mutex_unlock(&dev->struct_mutex); 2441 pmap_update(ufi->orig_map->pmap); 2442 #ifdef notyet 2443 err_rpm: 2444 #endif 2445 intel_runtime_pm_put(dev_priv); 2446 i915_gem_object_unpin_pages(obj); 2447 err: 2448 switch (ret) { 2449 case -EIO: 2450 /* 2451 * We eat errors when the gpu is terminally wedged to avoid 2452 * userspace unduly crashing (gl has no provisions for mmaps to 2453 * fail). But any other -EIO isn't ours (e.g. swap in failure) 2454 * and so needs to be reported. 2455 */ 2456 if (!i915_terminally_wedged(&dev_priv->gpu_error)) 2457 return VM_PAGER_ERROR; 2458 /* else: fall through */ 2459 case -EAGAIN: 2460 /* 2461 * EAGAIN means the gpu is hung and we'll wait for the error 2462 * handler to reset everything when re-faulting in 2463 * i915_mutex_lock_interruptible. 2464 */ 2465 case 0: 2466 case -ERESTART: 2467 case -EINTR: 2468 case -EBUSY: 2469 /* 2470 * EBUSY is ok: this just means that another thread 2471 * already did the job. 2472 */ 2473 return VM_PAGER_OK; 2474 case -ENOMEM: 2475 return VM_PAGER_ERROR; 2476 case -ENOSPC: 2477 case -EFAULT: 2478 return VM_PAGER_ERROR; 2479 default: 2480 WARN_ONCE(ret, "unhandled error in i915_gem_fault: %i\n", ret); 2481 return VM_PAGER_ERROR; 2482 } 2483 } 2484 2485 #endif 2486 2487 static void __i915_gem_object_release_mmap(struct drm_i915_gem_object *obj) 2488 { 2489 struct i915_vma *vma; 2490 2491 GEM_BUG_ON(!obj->userfault_count); 2492 2493 obj->userfault_count = 0; 2494 list_del(&obj->userfault_link); 2495 #ifdef __linux__ 2496 drm_vma_node_unmap(&obj->base.vma_node, 2497 obj->base.dev->anon_inode->i_mapping); 2498 #else 2499 if (drm_vma_node_has_offset(&obj->base.vma_node)) { 2500 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 2501 struct i915_vma *vma; 2502 struct vm_page *pg; 2503 2504 for_each_ggtt_vma(vma, obj) { 2505 for (pg = &dev_priv->pgs[atop(vma->node.start)]; 2506 pg != &dev_priv->pgs[atop(vma->node.start + vma->size)]; 2507 pg++) 2508 pmap_page_protect(pg, PROT_NONE); 2509 } 2510 } 2511 #endif 2512 2513 for_each_ggtt_vma(vma, obj) 2514 i915_vma_unset_userfault(vma); 2515 } 2516 2517 /** 2518 * i915_gem_release_mmap - remove physical page mappings 2519 * @obj: obj in question 2520 * 2521 * Preserve the reservation of the mmapping with the DRM core code, but 2522 * relinquish ownership of the pages back to the system. 2523 * 2524 * It is vital that we remove the page mapping if we have mapped a tiled 2525 * object through the GTT and then lose the fence register due to 2526 * resource pressure. Similarly if the object has been moved out of the 2527 * aperture, than pages mapped into userspace must be revoked. Removing the 2528 * mapping will then trigger a page fault on the next user access, allowing 2529 * fixup by i915_gem_fault(). 2530 */ 2531 void 2532 i915_gem_release_mmap(struct drm_i915_gem_object *obj) 2533 { 2534 struct drm_i915_private *i915 = to_i915(obj->base.dev); 2535 2536 /* Serialisation between user GTT access and our code depends upon 2537 * revoking the CPU's PTE whilst the mutex is held. The next user 2538 * pagefault then has to wait until we release the mutex. 2539 * 2540 * Note that RPM complicates somewhat by adding an additional 2541 * requirement that operations to the GGTT be made holding the RPM 2542 * wakeref. 2543 */ 2544 lockdep_assert_held(&i915->drm.struct_mutex); 2545 intel_runtime_pm_get(i915); 2546 2547 if (!obj->userfault_count) 2548 goto out; 2549 2550 __i915_gem_object_release_mmap(obj); 2551 2552 /* Ensure that the CPU's PTE are revoked and there are not outstanding 2553 * memory transactions from userspace before we return. The TLB 2554 * flushing implied above by changing the PTE above *should* be 2555 * sufficient, an extra barrier here just provides us with a bit 2556 * of paranoid documentation about our requirement to serialise 2557 * memory writes before touching registers / GSM. 2558 */ 2559 wmb(); 2560 2561 out: 2562 intel_runtime_pm_put(i915); 2563 } 2564 2565 void i915_gem_runtime_suspend(struct drm_i915_private *dev_priv) 2566 { 2567 struct drm_i915_gem_object *obj, *on; 2568 int i; 2569 2570 /* 2571 * Only called during RPM suspend. All users of the userfault_list 2572 * must be holding an RPM wakeref to ensure that this can not 2573 * run concurrently with themselves (and use the struct_mutex for 2574 * protection between themselves). 2575 */ 2576 2577 list_for_each_entry_safe(obj, on, 2578 &dev_priv->mm.userfault_list, userfault_link) 2579 __i915_gem_object_release_mmap(obj); 2580 2581 /* The fence will be lost when the device powers down. If any were 2582 * in use by hardware (i.e. they are pinned), we should not be powering 2583 * down! All other fences will be reacquired by the user upon waking. 2584 */ 2585 for (i = 0; i < dev_priv->num_fence_regs; i++) { 2586 struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i]; 2587 2588 /* Ideally we want to assert that the fence register is not 2589 * live at this point (i.e. that no piece of code will be 2590 * trying to write through fence + GTT, as that both violates 2591 * our tracking of activity and associated locking/barriers, 2592 * but also is illegal given that the hw is powered down). 2593 * 2594 * Previously we used reg->pin_count as a "liveness" indicator. 2595 * That is not sufficient, and we need a more fine-grained 2596 * tool if we want to have a sanity check here. 2597 */ 2598 2599 if (!reg->vma) 2600 continue; 2601 2602 GEM_BUG_ON(i915_vma_has_userfault(reg->vma)); 2603 reg->dirty = true; 2604 } 2605 } 2606 2607 static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj) 2608 { 2609 struct drm_i915_private *dev_priv = to_i915(obj->base.dev); 2610 int err; 2611 2612 err = drm_gem_create_mmap_offset(&obj->base); 2613 if (likely(!err)) 2614 return 0; 2615 2616 /* Attempt to reap some mmap space from dead objects */ 2617 do { 2618 err = i915_gem_wait_for_idle(dev_priv, 2619 I915_WAIT_INTERRUPTIBLE, 2620 MAX_SCHEDULE_TIMEOUT); 2621 if (err) 2622 break; 2623 2624 i915_gem_drain_freed_objects(dev_priv); 2625 err = drm_gem_create_mmap_offset(&obj->base); 2626 if (!err) 2627 break; 2628 2629 } while (flush_delayed_work(&dev_priv->gt.retire_work)); 2630 2631 return err; 2632 } 2633 2634 static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj) 2635 { 2636 drm_gem_free_mmap_offset(&obj->base); 2637 } 2638 2639 int 2640 i915_gem_mmap_gtt(struct drm_file *file, 2641 struct drm_device *dev, 2642 uint32_t handle, 2643 uint64_t *offset) 2644 { 2645 struct drm_i915_gem_object *obj; 2646 int ret; 2647 2648 obj = i915_gem_object_lookup(file, handle); 2649 if (!obj) 2650 return -ENOENT; 2651 2652 ret = i915_gem_object_create_mmap_offset(obj); 2653 if (ret == 0) 2654 *offset = drm_vma_node_offset_addr(&obj->base.vma_node); 2655 2656 i915_gem_object_put(obj); 2657 return ret; 2658 } 2659 2660 /** 2661 * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing 2662 * @dev: DRM device 2663 * @data: GTT mapping ioctl data 2664 * @file: GEM object info 2665 * 2666 * Simply returns the fake offset to userspace so it can mmap it. 2667 * The mmap call will end up in drm_gem_mmap(), which will set things 2668 * up so we can get faults in the handler above. 2669 * 2670 * The fault handler will take care of binding the object into the GTT 2671 * (since it may have been evicted to make room for something), allocating 2672 * a fence register, and mapping the appropriate aperture address into 2673 * userspace. 2674 */ 2675 int 2676 i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data, 2677 struct drm_file *file) 2678 { 2679 struct drm_i915_gem_mmap_gtt *args = data; 2680 2681 return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset); 2682 } 2683 2684 /* Immediately discard the backing storage */ 2685 static void 2686 i915_gem_object_truncate(struct drm_i915_gem_object *obj) 2687 { 2688 i915_gem_object_free_mmap_offset(obj); 2689 2690 if (obj->base.filp == NULL) 2691 return; 2692 2693 /* Our goal here is to return as much of the memory as 2694 * is possible back to the system as we are called from OOM. 2695 * To do this we must instruct the shmfs to drop all of its 2696 * backing pages, *now*. 2697 */ 2698 #ifdef __linux__ 2699 shmem_truncate_range(file_inode(obj->base.filp), 0, (loff_t)-1); 2700 #else 2701 obj->base.uao->pgops->pgo_flush(obj->base.uao, 0, obj->base.size, 2702 PGO_ALLPAGES | PGO_FREE); 2703 #endif 2704 obj->mm.madv = __I915_MADV_PURGED; 2705 obj->mm.pages = ERR_PTR(-EFAULT); 2706 } 2707 2708 /* Try to discard unwanted pages */ 2709 void __i915_gem_object_invalidate(struct drm_i915_gem_object *obj) 2710 { 2711 #ifdef __linux__ 2712 struct address_space *mapping; 2713 #endif 2714 2715 lockdep_assert_held(&obj->mm.lock); 2716 GEM_BUG_ON(i915_gem_object_has_pages(obj)); 2717 2718 switch (obj->mm.madv) { 2719 case I915_MADV_DONTNEED: 2720 i915_gem_object_truncate(obj); 2721 case __I915_MADV_PURGED: 2722 return; 2723 } 2724 2725 if (obj->base.filp == NULL) 2726 return; 2727 2728 #ifdef __linux__ 2729 mapping = obj->base.filp->f_mapping, 2730 invalidate_mapping_pages(mapping, 0, (loff_t)-1); 2731 #endif 2732 } 2733 2734 static void 2735 i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj, 2736 struct sg_table *pages) 2737 { 2738 struct sgt_iter sgt_iter; 2739 struct vm_page *page; 2740 2741 __i915_gem_object_release_shmem(obj, pages, true); 2742 2743 i915_gem_gtt_finish_pages(obj, pages); 2744 2745 if (i915_gem_object_needs_bit17_swizzle(obj)) 2746 i915_gem_object_save_bit_17_swizzle(obj, pages); 2747 2748 for_each_sgt_page(page, sgt_iter, pages) { 2749 if (obj->mm.dirty) 2750 set_page_dirty(page); 2751 2752 #ifdef __linux__ 2753 if (obj->mm.madv == I915_MADV_WILLNEED) 2754 mark_page_accessed(page); 2755 2756 put_page(page); 2757 #endif 2758 } 2759 #ifdef __OpenBSD__ 2760 uvm_objunwire(obj->base.uao, 0, obj->base.size); 2761 #endif 2762 obj->mm.dirty = false; 2763 2764 sg_free_table(pages); 2765 kfree(pages); 2766 } 2767 2768 static void __i915_gem_object_reset_page_iter(struct drm_i915_gem_object *obj) 2769 { 2770 struct radix_tree_iter iter; 2771 void __rcu **slot; 2772 2773 rcu_read_lock(); 2774 radix_tree_for_each_slot(slot, &obj->mm.get_page.radix, &iter, 0) 2775 radix_tree_delete(&obj->mm.get_page.radix, iter.index); 2776 rcu_read_unlock(); 2777 } 2778 2779 static struct sg_table * 2780 __i915_gem_object_unset_pages(struct drm_i915_gem_object *obj) 2781 { 2782 struct drm_i915_private *i915 = to_i915(obj->base.dev); 2783 struct sg_table *pages; 2784 2785 pages = fetch_and_zero(&obj->mm.pages); 2786 if (!pages) 2787 return NULL; 2788 2789 spin_lock(&i915->mm.obj_lock); 2790 list_del(&obj->mm.link); 2791 spin_unlock(&i915->mm.obj_lock); 2792 2793 if (obj->mm.mapping) { 2794 void *ptr; 2795 2796 ptr = page_mask_bits(obj->mm.mapping); 2797 if (is_vmalloc_addr(ptr)) 2798 vunmap(ptr, obj->base.size); 2799 else 2800 kunmap(kmap_to_page(ptr)); 2801 2802 obj->mm.mapping = NULL; 2803 } 2804 2805 __i915_gem_object_reset_page_iter(obj); 2806 obj->mm.page_sizes.phys = obj->mm.page_sizes.sg = 0; 2807 2808 return pages; 2809 } 2810 2811 void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj, 2812 enum i915_mm_subclass subclass) 2813 { 2814 struct sg_table *pages; 2815 2816 if (i915_gem_object_has_pinned_pages(obj)) 2817 return; 2818 2819 GEM_BUG_ON(obj->bind_count); 2820 if (!i915_gem_object_has_pages(obj)) 2821 return; 2822 2823 /* May be called by shrinker from within get_pages() (on another bo) */ 2824 mutex_lock_nested(&obj->mm.lock, subclass); 2825 if (unlikely(atomic_read(&obj->mm.pages_pin_count))) 2826 goto unlock; 2827 2828 /* 2829 * ->put_pages might need to allocate memory for the bit17 swizzle 2830 * array, hence protect them from being reaped by removing them from gtt 2831 * lists early. 2832 */ 2833 pages = __i915_gem_object_unset_pages(obj); 2834 if (!IS_ERR(pages)) 2835 obj->ops->put_pages(obj, pages); 2836 2837 unlock: 2838 mutex_unlock(&obj->mm.lock); 2839 } 2840 2841 static bool i915_sg_trim(struct sg_table *orig_st) 2842 { 2843 struct sg_table new_st; 2844 struct scatterlist *sg, *new_sg; 2845 unsigned int i; 2846 2847 if (orig_st->nents == orig_st->orig_nents) 2848 return false; 2849 2850 if (sg_alloc_table(&new_st, orig_st->nents, GFP_KERNEL | __GFP_NOWARN)) 2851 return false; 2852 2853 new_sg = new_st.sgl; 2854 for_each_sg(orig_st->sgl, sg, orig_st->nents, i) { 2855 sg_set_page(new_sg, sg_page(sg), sg->length, 0); 2856 /* called before being DMA mapped, no need to copy sg->dma_* */ 2857 new_sg = sg_next(new_sg); 2858 } 2859 GEM_BUG_ON(new_sg); /* Should walk exactly nents and hit the end */ 2860 2861 sg_free_table(orig_st); 2862 2863 *orig_st = new_st; 2864 return true; 2865 } 2866 2867 static int i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) 2868 { 2869 struct drm_i915_private *dev_priv = to_i915(obj->base.dev); 2870 const unsigned long page_count = obj->base.size / PAGE_SIZE; 2871 unsigned long i; 2872 struct address_space *mapping; 2873 struct sg_table *st; 2874 struct scatterlist *sg; 2875 struct sgt_iter sgt_iter; 2876 struct pglist plist; 2877 struct vm_page *page; 2878 unsigned long last_pfn = 0; /* suppress gcc warning */ 2879 unsigned int max_segment = i915_sg_segment_size(); 2880 unsigned int sg_page_sizes; 2881 gfp_t noreclaim; 2882 int ret; 2883 2884 /* Assert that the object is not currently in any GPU domain. As it 2885 * wasn't in the GTT, there shouldn't be any way it could have been in 2886 * a GPU cache 2887 */ 2888 GEM_BUG_ON(obj->read_domains & I915_GEM_GPU_DOMAINS); 2889 GEM_BUG_ON(obj->write_domain & I915_GEM_GPU_DOMAINS); 2890 2891 st = kmalloc(sizeof(*st), GFP_KERNEL); 2892 if (st == NULL) 2893 return -ENOMEM; 2894 2895 rebuild_st: 2896 if (sg_alloc_table(st, page_count, GFP_KERNEL)) { 2897 kfree(st); 2898 return -ENOMEM; 2899 } 2900 2901 #ifdef __linux__ 2902 /* Get the list of pages out of our struct file. They'll be pinned 2903 * at this point until we release them. 2904 * 2905 * Fail silently without starting the shrinker 2906 */ 2907 mapping = obj->base.filp->f_mapping; 2908 noreclaim = mapping_gfp_constraint(mapping, ~__GFP_RECLAIM); 2909 noreclaim |= __GFP_NORETRY | __GFP_NOWARN; 2910 2911 sg = st->sgl; 2912 st->nents = 0; 2913 sg_page_sizes = 0; 2914 for (i = 0; i < page_count; i++) { 2915 const unsigned int shrink[] = { 2916 I915_SHRINK_BOUND | I915_SHRINK_UNBOUND | I915_SHRINK_PURGEABLE, 2917 0, 2918 }, *s = shrink; 2919 gfp_t gfp = noreclaim; 2920 2921 do { 2922 page = shmem_read_mapping_page_gfp(mapping, i, gfp); 2923 if (likely(!IS_ERR(page))) 2924 break; 2925 2926 if (!*s) { 2927 ret = PTR_ERR(page); 2928 goto err_sg; 2929 } 2930 2931 i915_gem_shrink(dev_priv, 2 * page_count, NULL, *s++); 2932 cond_resched(); 2933 2934 /* We've tried hard to allocate the memory by reaping 2935 * our own buffer, now let the real VM do its job and 2936 * go down in flames if truly OOM. 2937 * 2938 * However, since graphics tend to be disposable, 2939 * defer the oom here by reporting the ENOMEM back 2940 * to userspace. 2941 */ 2942 if (!*s) { 2943 /* reclaim and warn, but no oom */ 2944 gfp = mapping_gfp_mask(mapping); 2945 2946 /* Our bo are always dirty and so we require 2947 * kswapd to reclaim our pages (direct reclaim 2948 * does not effectively begin pageout of our 2949 * buffers on its own). However, direct reclaim 2950 * only waits for kswapd when under allocation 2951 * congestion. So as a result __GFP_RECLAIM is 2952 * unreliable and fails to actually reclaim our 2953 * dirty pages -- unless you try over and over 2954 * again with !__GFP_NORETRY. However, we still 2955 * want to fail this allocation rather than 2956 * trigger the out-of-memory killer and for 2957 * this we want __GFP_RETRY_MAYFAIL. 2958 */ 2959 gfp |= __GFP_RETRY_MAYFAIL; 2960 } 2961 } while (1); 2962 2963 if (!i || 2964 sg->length >= max_segment || 2965 page_to_pfn(page) != last_pfn + 1) { 2966 if (i) { 2967 sg_page_sizes |= sg->length; 2968 sg = sg_next(sg); 2969 } 2970 st->nents++; 2971 sg_set_page(sg, page, PAGE_SIZE, 0); 2972 } else { 2973 sg->length += PAGE_SIZE; 2974 } 2975 last_pfn = page_to_pfn(page); 2976 2977 /* Check that the i965g/gm workaround works. */ 2978 WARN_ON((gfp & __GFP_DMA32) && (last_pfn >= 0x00100000UL)); 2979 } 2980 #else 2981 sg = st->sgl; 2982 st->nents = 0; 2983 sg_page_sizes = 0; 2984 2985 TAILQ_INIT(&plist); 2986 if (uvm_objwire(obj->base.uao, 0, obj->base.size, &plist)) { 2987 ret = -ENOMEM; 2988 goto err_pages; 2989 } 2990 2991 i = 0; 2992 TAILQ_FOREACH(page, &plist, pageq) { 2993 if (i) { 2994 sg_page_sizes |= sg->length; 2995 sg = sg_next(sg); 2996 } 2997 st->nents++; 2998 sg_set_page(sg, page, PAGE_SIZE, 0); 2999 i++; 3000 } 3001 #endif 3002 if (sg) { /* loop terminated early; short sg table */ 3003 sg_page_sizes |= sg->length; 3004 sg_mark_end(sg); 3005 } 3006 3007 /* Trim unused sg entries to avoid wasting memory. */ 3008 i915_sg_trim(st); 3009 3010 ret = i915_gem_gtt_prepare_pages(obj, st); 3011 if (ret) { 3012 /* DMA remapping failed? One possible cause is that 3013 * it could not reserve enough large entries, asking 3014 * for PAGE_SIZE chunks instead may be helpful. 3015 */ 3016 if (max_segment > PAGE_SIZE) { 3017 #ifdef __linux__ 3018 for_each_sgt_page(page, sgt_iter, st) 3019 put_page(page); 3020 #else 3021 uvm_objunwire(obj->base.uao, 0, obj->base.size); 3022 #endif 3023 sg_free_table(st); 3024 3025 max_segment = PAGE_SIZE; 3026 goto rebuild_st; 3027 } else { 3028 dev_warn(&dev_priv->drm.pdev->dev, 3029 "Failed to DMA remap %lu pages\n", 3030 page_count); 3031 goto err_pages; 3032 } 3033 } 3034 3035 if (i915_gem_object_needs_bit17_swizzle(obj)) 3036 i915_gem_object_do_bit_17_swizzle(obj, st); 3037 3038 __i915_gem_object_set_pages(obj, st, sg_page_sizes); 3039 3040 return 0; 3041 3042 #ifdef __linux__ 3043 err_sg: 3044 #endif 3045 sg_mark_end(sg); 3046 err_pages: 3047 #ifdef __linux__ 3048 for_each_sgt_page(page, sgt_iter, st) 3049 put_page(page); 3050 #else 3051 uvm_objunwire(obj->base.uao, 0, obj->base.size); 3052 #endif 3053 sg_free_table(st); 3054 kfree(st); 3055 3056 /* shmemfs first checks if there is enough memory to allocate the page 3057 * and reports ENOSPC should there be insufficient, along with the usual 3058 * ENOMEM for a genuine allocation failure. 3059 * 3060 * We use ENOSPC in our driver to mean that we have run out of aperture 3061 * space and so want to translate the error from shmemfs back to our 3062 * usual understanding of ENOMEM. 3063 */ 3064 if (ret == -ENOSPC) 3065 ret = -ENOMEM; 3066 3067 return ret; 3068 } 3069 3070 void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj, 3071 struct sg_table *pages, 3072 unsigned int sg_page_sizes) 3073 { 3074 struct drm_i915_private *i915 = to_i915(obj->base.dev); 3075 unsigned long supported = INTEL_INFO(i915)->page_sizes; 3076 int i; 3077 3078 lockdep_assert_held(&obj->mm.lock); 3079 3080 obj->mm.get_page.sg_pos = pages->sgl; 3081 obj->mm.get_page.sg_idx = 0; 3082 3083 obj->mm.pages = pages; 3084 3085 if (i915_gem_object_is_tiled(obj) && 3086 i915->quirks & QUIRK_PIN_SWIZZLED_PAGES) { 3087 GEM_BUG_ON(obj->mm.quirked); 3088 __i915_gem_object_pin_pages(obj); 3089 obj->mm.quirked = true; 3090 } 3091 3092 GEM_BUG_ON(!sg_page_sizes); 3093 obj->mm.page_sizes.phys = sg_page_sizes; 3094 3095 /* 3096 * Calculate the supported page-sizes which fit into the given 3097 * sg_page_sizes. This will give us the page-sizes which we may be able 3098 * to use opportunistically when later inserting into the GTT. For 3099 * example if phys=2G, then in theory we should be able to use 1G, 2M, 3100 * 64K or 4K pages, although in practice this will depend on a number of 3101 * other factors. 3102 */ 3103 obj->mm.page_sizes.sg = 0; 3104 for_each_set_bit(i, &supported, ilog2(I915_GTT_MAX_PAGE_SIZE) + 1) { 3105 if (obj->mm.page_sizes.phys & ~0u << i) 3106 obj->mm.page_sizes.sg |= BIT(i); 3107 } 3108 GEM_BUG_ON(!HAS_PAGE_SIZES(i915, obj->mm.page_sizes.sg)); 3109 3110 spin_lock(&i915->mm.obj_lock); 3111 list_add(&obj->mm.link, &i915->mm.unbound_list); 3112 spin_unlock(&i915->mm.obj_lock); 3113 } 3114 3115 static int ____i915_gem_object_get_pages(struct drm_i915_gem_object *obj) 3116 { 3117 int err; 3118 3119 if (unlikely(obj->mm.madv != I915_MADV_WILLNEED)) { 3120 DRM_DEBUG("Attempting to obtain a purgeable object\n"); 3121 return -EFAULT; 3122 } 3123 3124 err = obj->ops->get_pages(obj); 3125 GEM_BUG_ON(!err && !i915_gem_object_has_pages(obj)); 3126 3127 return err; 3128 } 3129 3130 /* Ensure that the associated pages are gathered from the backing storage 3131 * and pinned into our object. i915_gem_object_pin_pages() may be called 3132 * multiple times before they are released by a single call to 3133 * i915_gem_object_unpin_pages() - once the pages are no longer referenced 3134 * either as a result of memory pressure (reaping pages under the shrinker) 3135 * or as the object is itself released. 3136 */ 3137 int __i915_gem_object_get_pages(struct drm_i915_gem_object *obj) 3138 { 3139 int err; 3140 3141 err = mutex_lock_interruptible(&obj->mm.lock); 3142 if (err) 3143 return err; 3144 3145 if (unlikely(!i915_gem_object_has_pages(obj))) { 3146 GEM_BUG_ON(i915_gem_object_has_pinned_pages(obj)); 3147 3148 err = ____i915_gem_object_get_pages(obj); 3149 if (err) 3150 goto unlock; 3151 3152 smp_mb__before_atomic(); 3153 } 3154 atomic_inc(&obj->mm.pages_pin_count); 3155 3156 unlock: 3157 mutex_unlock(&obj->mm.lock); 3158 return err; 3159 } 3160 3161 /* The 'mapping' part of i915_gem_object_pin_map() below */ 3162 static void *i915_gem_object_map(const struct drm_i915_gem_object *obj, 3163 enum i915_map_type type) 3164 { 3165 unsigned long n_pages = obj->base.size >> PAGE_SHIFT; 3166 struct sg_table *sgt = obj->mm.pages; 3167 struct sgt_iter sgt_iter; 3168 struct vm_page *page; 3169 struct vm_page *stack_pages[32]; 3170 struct vm_page **pages = stack_pages; 3171 unsigned long i = 0; 3172 pgprot_t pgprot; 3173 void *addr; 3174 3175 #if 0 3176 /* A single page can always be kmapped */ 3177 if (n_pages == 1 && type == I915_MAP_WB) 3178 return kmap(sg_page(sgt->sgl)); 3179 #endif 3180 3181 if (n_pages > ARRAY_SIZE(stack_pages)) { 3182 /* Too big for stack -- allocate temporary array instead */ 3183 pages = kvmalloc_array(n_pages, sizeof(*pages), GFP_KERNEL); 3184 if (!pages) 3185 return NULL; 3186 } 3187 3188 for_each_sgt_page(page, sgt_iter, sgt) 3189 pages[i++] = page; 3190 3191 /* Check that we have the expected number of pages */ 3192 GEM_BUG_ON(i != n_pages); 3193 3194 switch (type) { 3195 default: 3196 MISSING_CASE(type); 3197 /* fallthrough to use PAGE_KERNEL anyway */ 3198 case I915_MAP_WB: 3199 pgprot = PAGE_KERNEL; 3200 break; 3201 case I915_MAP_WC: 3202 pgprot = pgprot_writecombine(PAGE_KERNEL_IO); 3203 break; 3204 } 3205 addr = vmap(pages, n_pages, 0, pgprot); 3206 3207 if (pages != stack_pages) 3208 kvfree(pages); 3209 3210 return addr; 3211 } 3212 3213 /* get, pin, and map the pages of the object into kernel space */ 3214 void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj, 3215 enum i915_map_type type) 3216 { 3217 enum i915_map_type has_type; 3218 bool pinned; 3219 void *ptr; 3220 int ret; 3221 3222 if (unlikely(!i915_gem_object_has_struct_page(obj))) 3223 return ERR_PTR(-ENXIO); 3224 3225 ret = mutex_lock_interruptible(&obj->mm.lock); 3226 if (ret) 3227 return ERR_PTR(ret); 3228 3229 pinned = !(type & I915_MAP_OVERRIDE); 3230 type &= ~I915_MAP_OVERRIDE; 3231 3232 if (!atomic_inc_not_zero(&obj->mm.pages_pin_count)) { 3233 if (unlikely(!i915_gem_object_has_pages(obj))) { 3234 GEM_BUG_ON(i915_gem_object_has_pinned_pages(obj)); 3235 3236 ret = ____i915_gem_object_get_pages(obj); 3237 if (ret) 3238 goto err_unlock; 3239 3240 smp_mb__before_atomic(); 3241 } 3242 atomic_inc(&obj->mm.pages_pin_count); 3243 pinned = false; 3244 } 3245 GEM_BUG_ON(!i915_gem_object_has_pages(obj)); 3246 3247 ptr = page_unpack_bits(obj->mm.mapping, &has_type); 3248 if (ptr && has_type != type) { 3249 if (pinned) { 3250 ret = -EBUSY; 3251 goto err_unpin; 3252 } 3253 3254 if (is_vmalloc_addr(ptr)) 3255 vunmap(ptr, obj->base.size); 3256 else 3257 kunmap(kmap_to_page(ptr)); 3258 3259 ptr = obj->mm.mapping = NULL; 3260 } 3261 3262 if (!ptr) { 3263 ptr = i915_gem_object_map(obj, type); 3264 if (!ptr) { 3265 ret = -ENOMEM; 3266 goto err_unpin; 3267 } 3268 3269 obj->mm.mapping = page_pack_bits(ptr, type); 3270 } 3271 3272 out_unlock: 3273 mutex_unlock(&obj->mm.lock); 3274 return ptr; 3275 3276 err_unpin: 3277 atomic_dec(&obj->mm.pages_pin_count); 3278 err_unlock: 3279 ptr = ERR_PTR(ret); 3280 goto out_unlock; 3281 } 3282 3283 static int 3284 i915_gem_object_pwrite_gtt(struct drm_i915_gem_object *obj, 3285 const struct drm_i915_gem_pwrite *arg) 3286 { 3287 #ifdef __linux__ 3288 struct address_space *mapping = obj->base.filp->f_mapping; 3289 #endif 3290 char __user *user_data = u64_to_user_ptr(arg->data_ptr); 3291 u64 remain, offset; 3292 unsigned int pg; 3293 3294 /* Before we instantiate/pin the backing store for our use, we 3295 * can prepopulate the shmemfs filp efficiently using a write into 3296 * the pagecache. We avoid the penalty of instantiating all the 3297 * pages, important if the user is just writing to a few and never 3298 * uses the object on the GPU, and using a direct write into shmemfs 3299 * allows it to avoid the cost of retrieving a page (either swapin 3300 * or clearing-before-use) before it is overwritten. 3301 */ 3302 if (i915_gem_object_has_pages(obj)) 3303 return -ENODEV; 3304 3305 if (obj->mm.madv != I915_MADV_WILLNEED) 3306 return -EFAULT; 3307 3308 /* Before the pages are instantiated the object is treated as being 3309 * in the CPU domain. The pages will be clflushed as required before 3310 * use, and we can freely write into the pages directly. If userspace 3311 * races pwrite with any other operation; corruption will ensue - 3312 * that is userspace's prerogative! 3313 */ 3314 3315 remain = arg->size; 3316 offset = arg->offset; 3317 pg = offset_in_page(offset); 3318 3319 do { 3320 unsigned int len, unwritten; 3321 struct vm_page *page; 3322 void *data, *vaddr; 3323 int err; 3324 3325 len = PAGE_SIZE - pg; 3326 if (len > remain) 3327 len = remain; 3328 3329 #ifdef __linux__ 3330 err = pagecache_write_begin(obj->base.filp, mapping, 3331 offset, len, 0, 3332 &page, &data); 3333 if (err < 0) 3334 return err; 3335 #else 3336 struct pglist plist; 3337 TAILQ_INIT(&plist); 3338 if (uvm_objwire(obj->base.uao, trunc_page(offset), 3339 trunc_page(offset) + PAGE_SIZE, &plist)) { 3340 err = -ENOMEM; 3341 return err; 3342 } 3343 page = TAILQ_FIRST(&plist); 3344 #endif 3345 3346 vaddr = kmap(page); 3347 unwritten = copy_from_user(vaddr + pg, user_data, len); 3348 kunmap(vaddr); 3349 3350 #ifdef __linux__ 3351 err = pagecache_write_end(obj->base.filp, mapping, 3352 offset, len, len - unwritten, 3353 page, data); 3354 if (err < 0) 3355 return err; 3356 #else 3357 uvm_objunwire(obj->base.uao, trunc_page(offset), 3358 trunc_page(offset) + PAGE_SIZE); 3359 #endif 3360 3361 if (unwritten) 3362 return -EFAULT; 3363 3364 remain -= len; 3365 user_data += len; 3366 offset += len; 3367 pg = 0; 3368 } while (remain); 3369 3370 return 0; 3371 } 3372 3373 static void i915_gem_client_mark_guilty(struct drm_i915_file_private *file_priv, 3374 const struct i915_gem_context *ctx) 3375 { 3376 unsigned int score; 3377 unsigned long prev_hang; 3378 3379 if (i915_gem_context_is_banned(ctx)) 3380 score = I915_CLIENT_SCORE_CONTEXT_BAN; 3381 else 3382 score = 0; 3383 3384 prev_hang = xchg(&file_priv->hang_timestamp, jiffies); 3385 if (time_before(jiffies, prev_hang + I915_CLIENT_FAST_HANG_JIFFIES)) 3386 score += I915_CLIENT_SCORE_HANG_FAST; 3387 3388 if (score) { 3389 atomic_add(score, &file_priv->ban_score); 3390 3391 DRM_DEBUG_DRIVER("client %s: gained %u ban score, now %u\n", 3392 ctx->name, score, 3393 atomic_read(&file_priv->ban_score)); 3394 } 3395 } 3396 3397 static void i915_gem_context_mark_guilty(struct i915_gem_context *ctx) 3398 { 3399 unsigned int score; 3400 bool banned, bannable; 3401 3402 atomic_inc(&ctx->guilty_count); 3403 3404 bannable = i915_gem_context_is_bannable(ctx); 3405 score = atomic_add_return(CONTEXT_SCORE_GUILTY, &ctx->ban_score); 3406 banned = score >= CONTEXT_SCORE_BAN_THRESHOLD; 3407 3408 /* Cool contexts don't accumulate client ban score */ 3409 if (!bannable) 3410 return; 3411 3412 if (banned) { 3413 DRM_DEBUG_DRIVER("context %s: guilty %d, score %u, banned\n", 3414 ctx->name, atomic_read(&ctx->guilty_count), 3415 score); 3416 i915_gem_context_set_banned(ctx); 3417 } 3418 3419 if (!IS_ERR_OR_NULL(ctx->file_priv)) 3420 i915_gem_client_mark_guilty(ctx->file_priv, ctx); 3421 } 3422 3423 static void i915_gem_context_mark_innocent(struct i915_gem_context *ctx) 3424 { 3425 atomic_inc(&ctx->active_count); 3426 } 3427 3428 struct i915_request * 3429 i915_gem_find_active_request(struct intel_engine_cs *engine) 3430 { 3431 struct i915_request *request, *active = NULL; 3432 unsigned long flags; 3433 3434 /* 3435 * We are called by the error capture, reset and to dump engine 3436 * state at random points in time. In particular, note that neither is 3437 * crucially ordered with an interrupt. After a hang, the GPU is dead 3438 * and we assume that no more writes can happen (we waited long enough 3439 * for all writes that were in transaction to be flushed) - adding an 3440 * extra delay for a recent interrupt is pointless. Hence, we do 3441 * not need an engine->irq_seqno_barrier() before the seqno reads. 3442 * At all other times, we must assume the GPU is still running, but 3443 * we only care about the snapshot of this moment. 3444 */ 3445 spin_lock_irqsave(&engine->timeline.lock, flags); 3446 list_for_each_entry(request, &engine->timeline.requests, link) { 3447 if (__i915_request_completed(request, request->global_seqno)) 3448 continue; 3449 3450 active = request; 3451 break; 3452 } 3453 spin_unlock_irqrestore(&engine->timeline.lock, flags); 3454 3455 return active; 3456 } 3457 3458 /* 3459 * Ensure irq handler finishes, and not run again. 3460 * Also return the active request so that we only search for it once. 3461 */ 3462 struct i915_request * 3463 i915_gem_reset_prepare_engine(struct intel_engine_cs *engine) 3464 { 3465 struct i915_request *request; 3466 3467 /* 3468 * During the reset sequence, we must prevent the engine from 3469 * entering RC6. As the context state is undefined until we restart 3470 * the engine, if it does enter RC6 during the reset, the state 3471 * written to the powercontext is undefined and so we may lose 3472 * GPU state upon resume, i.e. fail to restart after a reset. 3473 */ 3474 intel_uncore_forcewake_get(engine->i915, FORCEWAKE_ALL); 3475 3476 request = engine->reset.prepare(engine); 3477 if (request && request->fence.error == -EIO) 3478 request = ERR_PTR(-EIO); /* Previous reset failed! */ 3479 3480 return request; 3481 } 3482 3483 int i915_gem_reset_prepare(struct drm_i915_private *dev_priv) 3484 { 3485 struct intel_engine_cs *engine; 3486 struct i915_request *request; 3487 enum intel_engine_id id; 3488 int err = 0; 3489 3490 for_each_engine(engine, dev_priv, id) { 3491 request = i915_gem_reset_prepare_engine(engine); 3492 if (IS_ERR(request)) { 3493 err = PTR_ERR(request); 3494 continue; 3495 } 3496 3497 engine->hangcheck.active_request = request; 3498 } 3499 3500 i915_gem_revoke_fences(dev_priv); 3501 intel_uc_sanitize(dev_priv); 3502 3503 return err; 3504 } 3505 3506 static void engine_skip_context(struct i915_request *request) 3507 { 3508 struct intel_engine_cs *engine = request->engine; 3509 struct i915_gem_context *hung_ctx = request->gem_context; 3510 struct i915_timeline *timeline = request->timeline; 3511 unsigned long flags; 3512 3513 GEM_BUG_ON(timeline == &engine->timeline); 3514 3515 spin_lock_irqsave(&engine->timeline.lock, flags); 3516 spin_lock(&timeline->lock); 3517 3518 list_for_each_entry_continue(request, &engine->timeline.requests, link) 3519 if (request->gem_context == hung_ctx) 3520 i915_request_skip(request, -EIO); 3521 3522 list_for_each_entry(request, &timeline->requests, link) 3523 i915_request_skip(request, -EIO); 3524 3525 spin_unlock(&timeline->lock); 3526 spin_unlock_irqrestore(&engine->timeline.lock, flags); 3527 } 3528 3529 /* Returns the request if it was guilty of the hang */ 3530 static struct i915_request * 3531 i915_gem_reset_request(struct intel_engine_cs *engine, 3532 struct i915_request *request, 3533 bool stalled) 3534 { 3535 /* The guilty request will get skipped on a hung engine. 3536 * 3537 * Users of client default contexts do not rely on logical 3538 * state preserved between batches so it is safe to execute 3539 * queued requests following the hang. Non default contexts 3540 * rely on preserved state, so skipping a batch loses the 3541 * evolution of the state and it needs to be considered corrupted. 3542 * Executing more queued batches on top of corrupted state is 3543 * risky. But we take the risk by trying to advance through 3544 * the queued requests in order to make the client behaviour 3545 * more predictable around resets, by not throwing away random 3546 * amount of batches it has prepared for execution. Sophisticated 3547 * clients can use gem_reset_stats_ioctl and dma fence status 3548 * (exported via sync_file info ioctl on explicit fences) to observe 3549 * when it loses the context state and should rebuild accordingly. 3550 * 3551 * The context ban, and ultimately the client ban, mechanism are safety 3552 * valves if client submission ends up resulting in nothing more than 3553 * subsequent hangs. 3554 */ 3555 3556 if (i915_request_completed(request)) { 3557 GEM_TRACE("%s pardoned global=%d (fence %llx:%d), current %d\n", 3558 engine->name, request->global_seqno, 3559 request->fence.context, request->fence.seqno, 3560 intel_engine_get_seqno(engine)); 3561 stalled = false; 3562 } 3563 3564 if (stalled) { 3565 i915_gem_context_mark_guilty(request->gem_context); 3566 i915_request_skip(request, -EIO); 3567 3568 /* If this context is now banned, skip all pending requests. */ 3569 if (i915_gem_context_is_banned(request->gem_context)) 3570 engine_skip_context(request); 3571 } else { 3572 /* 3573 * Since this is not the hung engine, it may have advanced 3574 * since the hang declaration. Double check by refinding 3575 * the active request at the time of the reset. 3576 */ 3577 request = i915_gem_find_active_request(engine); 3578 if (request) { 3579 unsigned long flags; 3580 3581 i915_gem_context_mark_innocent(request->gem_context); 3582 dma_fence_set_error(&request->fence, -EAGAIN); 3583 3584 /* Rewind the engine to replay the incomplete rq */ 3585 spin_lock_irqsave(&engine->timeline.lock, flags); 3586 request = list_prev_entry(request, link); 3587 if (&request->link == &engine->timeline.requests) 3588 request = NULL; 3589 spin_unlock_irqrestore(&engine->timeline.lock, flags); 3590 } 3591 } 3592 3593 return request; 3594 } 3595 3596 void i915_gem_reset_engine(struct intel_engine_cs *engine, 3597 struct i915_request *request, 3598 bool stalled) 3599 { 3600 /* 3601 * Make sure this write is visible before we re-enable the interrupt 3602 * handlers on another CPU, as tasklet_enable() resolves to just 3603 * a compiler barrier which is insufficient for our purpose here. 3604 */ 3605 smp_store_mb(engine->irq_posted, 0); 3606 3607 if (request) 3608 request = i915_gem_reset_request(engine, request, stalled); 3609 3610 /* Setup the CS to resume from the breadcrumb of the hung request */ 3611 engine->reset.reset(engine, request); 3612 } 3613 3614 void i915_gem_reset(struct drm_i915_private *dev_priv, 3615 unsigned int stalled_mask) 3616 { 3617 struct intel_engine_cs *engine; 3618 enum intel_engine_id id; 3619 3620 lockdep_assert_held(&dev_priv->drm.struct_mutex); 3621 3622 i915_retire_requests(dev_priv); 3623 3624 for_each_engine(engine, dev_priv, id) { 3625 struct intel_context *ce; 3626 3627 i915_gem_reset_engine(engine, 3628 engine->hangcheck.active_request, 3629 stalled_mask & ENGINE_MASK(id)); 3630 ce = fetch_and_zero(&engine->last_retired_context); 3631 if (ce) 3632 intel_context_unpin(ce); 3633 3634 /* 3635 * Ostensibily, we always want a context loaded for powersaving, 3636 * so if the engine is idle after the reset, send a request 3637 * to load our scratch kernel_context. 3638 * 3639 * More mysteriously, if we leave the engine idle after a reset, 3640 * the next userspace batch may hang, with what appears to be 3641 * an incoherent read by the CS (presumably stale TLB). An 3642 * empty request appears sufficient to paper over the glitch. 3643 */ 3644 if (intel_engine_is_idle(engine)) { 3645 struct i915_request *rq; 3646 3647 rq = i915_request_alloc(engine, 3648 dev_priv->kernel_context); 3649 if (!IS_ERR(rq)) 3650 i915_request_add(rq); 3651 } 3652 } 3653 3654 i915_gem_restore_fences(dev_priv); 3655 } 3656 3657 void i915_gem_reset_finish_engine(struct intel_engine_cs *engine) 3658 { 3659 engine->reset.finish(engine); 3660 3661 intel_uncore_forcewake_put(engine->i915, FORCEWAKE_ALL); 3662 } 3663 3664 void i915_gem_reset_finish(struct drm_i915_private *dev_priv) 3665 { 3666 struct intel_engine_cs *engine; 3667 enum intel_engine_id id; 3668 3669 lockdep_assert_held(&dev_priv->drm.struct_mutex); 3670 3671 for_each_engine(engine, dev_priv, id) { 3672 engine->hangcheck.active_request = NULL; 3673 i915_gem_reset_finish_engine(engine); 3674 } 3675 } 3676 3677 static void nop_submit_request(struct i915_request *request) 3678 { 3679 GEM_TRACE("%s fence %llx:%d -> -EIO\n", 3680 request->engine->name, 3681 request->fence.context, request->fence.seqno); 3682 dma_fence_set_error(&request->fence, -EIO); 3683 3684 i915_request_submit(request); 3685 } 3686 3687 static void nop_complete_submit_request(struct i915_request *request) 3688 { 3689 unsigned long flags; 3690 3691 GEM_TRACE("%s fence %llx:%d -> -EIO\n", 3692 request->engine->name, 3693 request->fence.context, request->fence.seqno); 3694 dma_fence_set_error(&request->fence, -EIO); 3695 3696 spin_lock_irqsave(&request->engine->timeline.lock, flags); 3697 __i915_request_submit(request); 3698 intel_engine_init_global_seqno(request->engine, request->global_seqno); 3699 spin_unlock_irqrestore(&request->engine->timeline.lock, flags); 3700 } 3701 3702 void i915_gem_set_wedged(struct drm_i915_private *i915) 3703 { 3704 struct intel_engine_cs *engine; 3705 enum intel_engine_id id; 3706 3707 GEM_TRACE("start\n"); 3708 3709 if (GEM_SHOW_DEBUG()) { 3710 struct drm_printer p = drm_debug_printer(__func__); 3711 3712 for_each_engine(engine, i915, id) 3713 intel_engine_dump(engine, &p, "%s\n", engine->name); 3714 } 3715 3716 set_bit(I915_WEDGED, &i915->gpu_error.flags); 3717 smp_mb__after_atomic(); 3718 3719 /* 3720 * First, stop submission to hw, but do not yet complete requests by 3721 * rolling the global seqno forward (since this would complete requests 3722 * for which we haven't set the fence error to EIO yet). 3723 */ 3724 for_each_engine(engine, i915, id) { 3725 i915_gem_reset_prepare_engine(engine); 3726 3727 engine->submit_request = nop_submit_request; 3728 engine->schedule = NULL; 3729 } 3730 i915->caps.scheduler = 0; 3731 3732 /* Even if the GPU reset fails, it should still stop the engines */ 3733 intel_gpu_reset(i915, ALL_ENGINES); 3734 3735 /* 3736 * Make sure no one is running the old callback before we proceed with 3737 * cancelling requests and resetting the completion tracking. Otherwise 3738 * we might submit a request to the hardware which never completes. 3739 */ 3740 synchronize_rcu(); 3741 3742 for_each_engine(engine, i915, id) { 3743 /* Mark all executing requests as skipped */ 3744 engine->cancel_requests(engine); 3745 3746 /* 3747 * Only once we've force-cancelled all in-flight requests can we 3748 * start to complete all requests. 3749 */ 3750 engine->submit_request = nop_complete_submit_request; 3751 } 3752 3753 /* 3754 * Make sure no request can slip through without getting completed by 3755 * either this call here to intel_engine_init_global_seqno, or the one 3756 * in nop_complete_submit_request. 3757 */ 3758 synchronize_rcu(); 3759 3760 for_each_engine(engine, i915, id) { 3761 unsigned long flags; 3762 3763 /* 3764 * Mark all pending requests as complete so that any concurrent 3765 * (lockless) lookup doesn't try and wait upon the request as we 3766 * reset it. 3767 */ 3768 spin_lock_irqsave(&engine->timeline.lock, flags); 3769 intel_engine_init_global_seqno(engine, 3770 intel_engine_last_submit(engine)); 3771 spin_unlock_irqrestore(&engine->timeline.lock, flags); 3772 3773 i915_gem_reset_finish_engine(engine); 3774 } 3775 3776 GEM_TRACE("end\n"); 3777 3778 wake_up_all(&i915->gpu_error.reset_queue); 3779 } 3780 3781 bool i915_gem_unset_wedged(struct drm_i915_private *i915) 3782 { 3783 struct i915_timeline *tl; 3784 3785 lockdep_assert_held(&i915->drm.struct_mutex); 3786 if (!test_bit(I915_WEDGED, &i915->gpu_error.flags)) 3787 return true; 3788 3789 GEM_TRACE("start\n"); 3790 3791 /* 3792 * Before unwedging, make sure that all pending operations 3793 * are flushed and errored out - we may have requests waiting upon 3794 * third party fences. We marked all inflight requests as EIO, and 3795 * every execbuf since returned EIO, for consistency we want all 3796 * the currently pending requests to also be marked as EIO, which 3797 * is done inside our nop_submit_request - and so we must wait. 3798 * 3799 * No more can be submitted until we reset the wedged bit. 3800 */ 3801 list_for_each_entry(tl, &i915->gt.timelines, link) { 3802 struct i915_request *rq; 3803 3804 rq = i915_gem_active_peek(&tl->last_request, 3805 &i915->drm.struct_mutex); 3806 if (!rq) 3807 continue; 3808 3809 /* 3810 * We can't use our normal waiter as we want to 3811 * avoid recursively trying to handle the current 3812 * reset. The basic dma_fence_default_wait() installs 3813 * a callback for dma_fence_signal(), which is 3814 * triggered by our nop handler (indirectly, the 3815 * callback enables the signaler thread which is 3816 * woken by the nop_submit_request() advancing the seqno 3817 * and when the seqno passes the fence, the signaler 3818 * then signals the fence waking us up). 3819 */ 3820 if (dma_fence_default_wait(&rq->fence, true, 3821 MAX_SCHEDULE_TIMEOUT) < 0) 3822 return false; 3823 } 3824 i915_retire_requests(i915); 3825 GEM_BUG_ON(i915->gt.active_requests); 3826 3827 /* 3828 * Undo nop_submit_request. We prevent all new i915 requests from 3829 * being queued (by disallowing execbuf whilst wedged) so having 3830 * waited for all active requests above, we know the system is idle 3831 * and do not have to worry about a thread being inside 3832 * engine->submit_request() as we swap over. So unlike installing 3833 * the nop_submit_request on reset, we can do this from normal 3834 * context and do not require stop_machine(). 3835 */ 3836 intel_engines_reset_default_submission(i915); 3837 i915_gem_contexts_lost(i915); 3838 3839 GEM_TRACE("end\n"); 3840 3841 smp_mb__before_atomic(); /* complete takeover before enabling execbuf */ 3842 clear_bit(I915_WEDGED, &i915->gpu_error.flags); 3843 3844 return true; 3845 } 3846 3847 static void 3848 i915_gem_retire_work_handler(struct work_struct *work) 3849 { 3850 struct drm_i915_private *dev_priv = 3851 container_of(work, typeof(*dev_priv), gt.retire_work.work); 3852 struct drm_device *dev = &dev_priv->drm; 3853 3854 /* Come back later if the device is busy... */ 3855 if (mutex_trylock(&dev->struct_mutex)) { 3856 i915_retire_requests(dev_priv); 3857 mutex_unlock(&dev->struct_mutex); 3858 } 3859 3860 /* 3861 * Keep the retire handler running until we are finally idle. 3862 * We do not need to do this test under locking as in the worst-case 3863 * we queue the retire worker once too often. 3864 */ 3865 if (READ_ONCE(dev_priv->gt.awake)) 3866 queue_delayed_work(dev_priv->wq, 3867 &dev_priv->gt.retire_work, 3868 round_jiffies_up_relative(HZ)); 3869 } 3870 3871 static void shrink_caches(struct drm_i915_private *i915) 3872 { 3873 #ifdef __linux__ 3874 /* 3875 * kmem_cache_shrink() discards empty slabs and reorders partially 3876 * filled slabs to prioritise allocating from the mostly full slabs, 3877 * with the aim of reducing fragmentation. 3878 */ 3879 kmem_cache_shrink(i915->priorities); 3880 kmem_cache_shrink(i915->dependencies); 3881 kmem_cache_shrink(i915->requests); 3882 kmem_cache_shrink(i915->luts); 3883 kmem_cache_shrink(i915->vmas); 3884 kmem_cache_shrink(i915->objects); 3885 #endif 3886 } 3887 3888 struct sleep_rcu_work { 3889 union { 3890 struct rcu_head rcu; 3891 struct work_struct work; 3892 }; 3893 struct drm_i915_private *i915; 3894 unsigned int epoch; 3895 }; 3896 3897 static inline bool 3898 same_epoch(struct drm_i915_private *i915, unsigned int epoch) 3899 { 3900 /* 3901 * There is a small chance that the epoch wrapped since we started 3902 * sleeping. If we assume that epoch is at least a u32, then it will 3903 * take at least 2^32 * 100ms for it to wrap, or about 326 years. 3904 */ 3905 return epoch == READ_ONCE(i915->gt.epoch); 3906 } 3907 3908 static void __sleep_work(struct work_struct *work) 3909 { 3910 struct sleep_rcu_work *s = container_of(work, typeof(*s), work); 3911 struct drm_i915_private *i915 = s->i915; 3912 unsigned int epoch = s->epoch; 3913 3914 kfree(s); 3915 if (same_epoch(i915, epoch)) 3916 shrink_caches(i915); 3917 } 3918 3919 static void __sleep_rcu(struct rcu_head *rcu) 3920 { 3921 struct sleep_rcu_work *s = container_of(rcu, typeof(*s), rcu); 3922 struct drm_i915_private *i915 = s->i915; 3923 3924 if (same_epoch(i915, s->epoch)) { 3925 INIT_WORK(&s->work, __sleep_work); 3926 queue_work(i915->wq, &s->work); 3927 } else { 3928 kfree(s); 3929 } 3930 } 3931 3932 static inline bool 3933 new_requests_since_last_retire(const struct drm_i915_private *i915) 3934 { 3935 return (READ_ONCE(i915->gt.active_requests) || 3936 work_pending(&i915->gt.idle_work.work)); 3937 } 3938 3939 static void assert_kernel_context_is_current(struct drm_i915_private *i915) 3940 { 3941 struct intel_engine_cs *engine; 3942 enum intel_engine_id id; 3943 3944 if (i915_terminally_wedged(&i915->gpu_error)) 3945 return; 3946 3947 GEM_BUG_ON(i915->gt.active_requests); 3948 for_each_engine(engine, i915, id) { 3949 GEM_BUG_ON(__i915_gem_active_peek(&engine->timeline.last_request)); 3950 GEM_BUG_ON(engine->last_retired_context != 3951 to_intel_context(i915->kernel_context, engine)); 3952 } 3953 } 3954 3955 static void 3956 i915_gem_idle_work_handler(struct work_struct *work) 3957 { 3958 struct drm_i915_private *dev_priv = 3959 container_of(work, typeof(*dev_priv), gt.idle_work.work); 3960 unsigned int epoch = I915_EPOCH_INVALID; 3961 bool rearm_hangcheck; 3962 3963 if (!READ_ONCE(dev_priv->gt.awake)) 3964 return; 3965 3966 if (READ_ONCE(dev_priv->gt.active_requests)) 3967 return; 3968 3969 /* 3970 * Flush out the last user context, leaving only the pinned 3971 * kernel context resident. When we are idling on the kernel_context, 3972 * no more new requests (with a context switch) are emitted and we 3973 * can finally rest. A consequence is that the idle work handler is 3974 * always called at least twice before idling (and if the system is 3975 * idle that implies a round trip through the retire worker). 3976 */ 3977 mutex_lock(&dev_priv->drm.struct_mutex); 3978 i915_gem_switch_to_kernel_context(dev_priv); 3979 mutex_unlock(&dev_priv->drm.struct_mutex); 3980 3981 GEM_TRACE("active_requests=%d (after switch-to-kernel-context)\n", 3982 READ_ONCE(dev_priv->gt.active_requests)); 3983 3984 /* 3985 * Wait for last execlists context complete, but bail out in case a 3986 * new request is submitted. As we don't trust the hardware, we 3987 * continue on if the wait times out. This is necessary to allow 3988 * the machine to suspend even if the hardware dies, and we will 3989 * try to recover in resume (after depriving the hardware of power, 3990 * it may be in a better mmod). 3991 */ 3992 __wait_for(if (new_requests_since_last_retire(dev_priv)) return, 3993 intel_engines_are_idle(dev_priv), 3994 I915_IDLE_ENGINES_TIMEOUT * 1000, 3995 10, 500); 3996 3997 rearm_hangcheck = 3998 cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work); 3999 4000 if (!mutex_trylock(&dev_priv->drm.struct_mutex)) { 4001 /* Currently busy, come back later */ 4002 mod_delayed_work(dev_priv->wq, 4003 &dev_priv->gt.idle_work, 4004 msecs_to_jiffies(50)); 4005 goto out_rearm; 4006 } 4007 4008 /* 4009 * New request retired after this work handler started, extend active 4010 * period until next instance of the work. 4011 */ 4012 if (new_requests_since_last_retire(dev_priv)) 4013 goto out_unlock; 4014 4015 epoch = __i915_gem_park(dev_priv); 4016 4017 assert_kernel_context_is_current(dev_priv); 4018 4019 rearm_hangcheck = false; 4020 out_unlock: 4021 mutex_unlock(&dev_priv->drm.struct_mutex); 4022 4023 out_rearm: 4024 if (rearm_hangcheck) { 4025 GEM_BUG_ON(!dev_priv->gt.awake); 4026 i915_queue_hangcheck(dev_priv); 4027 } 4028 4029 /* 4030 * When we are idle, it is an opportune time to reap our caches. 4031 * However, we have many objects that utilise RCU and the ordered 4032 * i915->wq that this work is executing on. To try and flush any 4033 * pending frees now we are idle, we first wait for an RCU grace 4034 * period, and then queue a task (that will run last on the wq) to 4035 * shrink and re-optimize the caches. 4036 */ 4037 if (same_epoch(dev_priv, epoch)) { 4038 struct sleep_rcu_work *s = kmalloc(sizeof(*s), GFP_KERNEL); 4039 if (s) { 4040 s->i915 = dev_priv; 4041 s->epoch = epoch; 4042 call_rcu(&s->rcu, __sleep_rcu); 4043 } 4044 } 4045 } 4046 4047 void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file) 4048 { 4049 struct drm_i915_private *i915 = to_i915(gem->dev); 4050 struct drm_i915_gem_object *obj = to_intel_bo(gem); 4051 struct drm_i915_file_private *fpriv = file->driver_priv; 4052 struct i915_lut_handle *lut, *ln; 4053 4054 mutex_lock(&i915->drm.struct_mutex); 4055 4056 list_for_each_entry_safe(lut, ln, &obj->lut_list, obj_link) { 4057 struct i915_gem_context *ctx = lut->ctx; 4058 struct i915_vma *vma; 4059 4060 GEM_BUG_ON(ctx->file_priv == ERR_PTR(-EBADF)); 4061 if (ctx->file_priv != fpriv) 4062 continue; 4063 4064 vma = radix_tree_delete(&ctx->handles_vma, lut->handle); 4065 GEM_BUG_ON(vma->obj != obj); 4066 4067 /* We allow the process to have multiple handles to the same 4068 * vma, in the same fd namespace, by virtue of flink/open. 4069 */ 4070 GEM_BUG_ON(!vma->open_count); 4071 if (!--vma->open_count && !i915_vma_is_ggtt(vma)) 4072 i915_vma_close(vma); 4073 4074 list_del(&lut->obj_link); 4075 list_del(&lut->ctx_link); 4076 4077 #ifdef __linux__ 4078 kmem_cache_free(i915->luts, lut); 4079 #else 4080 pool_put(&i915->luts, lut); 4081 #endif 4082 __i915_gem_object_release_unless_active(obj); 4083 } 4084 4085 mutex_unlock(&i915->drm.struct_mutex); 4086 } 4087 4088 static unsigned long to_wait_timeout(s64 timeout_ns) 4089 { 4090 if (timeout_ns < 0) 4091 return MAX_SCHEDULE_TIMEOUT; 4092 4093 if (timeout_ns == 0) 4094 return 0; 4095 4096 return nsecs_to_jiffies_timeout(timeout_ns); 4097 } 4098 4099 /** 4100 * i915_gem_wait_ioctl - implements DRM_IOCTL_I915_GEM_WAIT 4101 * @dev: drm device pointer 4102 * @data: ioctl data blob 4103 * @file: drm file pointer 4104 * 4105 * Returns 0 if successful, else an error is returned with the remaining time in 4106 * the timeout parameter. 4107 * -ETIME: object is still busy after timeout 4108 * -ERESTARTSYS: signal interrupted the wait 4109 * -ENONENT: object doesn't exist 4110 * Also possible, but rare: 4111 * -EAGAIN: incomplete, restart syscall 4112 * -ENOMEM: damn 4113 * -ENODEV: Internal IRQ fail 4114 * -E?: The add request failed 4115 * 4116 * The wait ioctl with a timeout of 0 reimplements the busy ioctl. With any 4117 * non-zero timeout parameter the wait ioctl will wait for the given number of 4118 * nanoseconds on an object becoming unbusy. Since the wait itself does so 4119 * without holding struct_mutex the object may become re-busied before this 4120 * function completes. A similar but shorter * race condition exists in the busy 4121 * ioctl 4122 */ 4123 int 4124 i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) 4125 { 4126 struct drm_i915_gem_wait *args = data; 4127 struct drm_i915_gem_object *obj; 4128 ktime_t start; 4129 long ret; 4130 4131 if (args->flags != 0) 4132 return -EINVAL; 4133 4134 obj = i915_gem_object_lookup(file, args->bo_handle); 4135 if (!obj) 4136 return -ENOENT; 4137 4138 start = ktime_get(); 4139 4140 ret = i915_gem_object_wait(obj, 4141 I915_WAIT_INTERRUPTIBLE | I915_WAIT_ALL, 4142 to_wait_timeout(args->timeout_ns), 4143 to_rps_client(file)); 4144 4145 if (args->timeout_ns > 0) { 4146 args->timeout_ns -= ktime_to_ns(ktime_sub(ktime_get(), start)); 4147 if (args->timeout_ns < 0) 4148 args->timeout_ns = 0; 4149 4150 /* 4151 * Apparently ktime isn't accurate enough and occasionally has a 4152 * bit of mismatch in the jiffies<->nsecs<->ktime loop. So patch 4153 * things up to make the test happy. We allow up to 1 jiffy. 4154 * 4155 * This is a regression from the timespec->ktime conversion. 4156 */ 4157 if (ret == -ETIME && !nsecs_to_jiffies(args->timeout_ns)) 4158 args->timeout_ns = 0; 4159 4160 /* Asked to wait beyond the jiffie/scheduler precision? */ 4161 if (ret == -ETIME && args->timeout_ns) 4162 ret = -EAGAIN; 4163 } 4164 4165 i915_gem_object_put(obj); 4166 return ret; 4167 } 4168 4169 static long wait_for_timeline(struct i915_timeline *tl, 4170 unsigned int flags, long timeout) 4171 { 4172 struct i915_request *rq; 4173 4174 rq = i915_gem_active_get_unlocked(&tl->last_request); 4175 if (!rq) 4176 return timeout; 4177 4178 /* 4179 * "Race-to-idle". 4180 * 4181 * Switching to the kernel context is often used a synchronous 4182 * step prior to idling, e.g. in suspend for flushing all 4183 * current operations to memory before sleeping. These we 4184 * want to complete as quickly as possible to avoid prolonged 4185 * stalls, so allow the gpu to boost to maximum clocks. 4186 */ 4187 if (flags & I915_WAIT_FOR_IDLE_BOOST) 4188 gen6_rps_boost(rq, NULL); 4189 4190 timeout = i915_request_wait(rq, flags, timeout); 4191 i915_request_put(rq); 4192 4193 return timeout; 4194 } 4195 4196 static int wait_for_engines(struct drm_i915_private *i915) 4197 { 4198 if (wait_for(intel_engines_are_idle(i915), I915_IDLE_ENGINES_TIMEOUT)) { 4199 dev_err(i915->drm.dev, 4200 "Failed to idle engines, declaring wedged!\n"); 4201 GEM_TRACE_DUMP(); 4202 i915_gem_set_wedged(i915); 4203 return -EIO; 4204 } 4205 4206 return 0; 4207 } 4208 4209 int i915_gem_wait_for_idle(struct drm_i915_private *i915, 4210 unsigned int flags, long timeout) 4211 { 4212 GEM_TRACE("flags=%x (%s), timeout=%ld%s\n", 4213 flags, flags & I915_WAIT_LOCKED ? "locked" : "unlocked", 4214 timeout, timeout == MAX_SCHEDULE_TIMEOUT ? " (forever)" : ""); 4215 4216 /* If the device is asleep, we have no requests outstanding */ 4217 if (!READ_ONCE(i915->gt.awake)) 4218 return 0; 4219 4220 if (flags & I915_WAIT_LOCKED) { 4221 struct i915_timeline *tl; 4222 int err; 4223 4224 lockdep_assert_held(&i915->drm.struct_mutex); 4225 4226 list_for_each_entry(tl, &i915->gt.timelines, link) { 4227 timeout = wait_for_timeline(tl, flags, timeout); 4228 if (timeout < 0) 4229 return timeout; 4230 } 4231 4232 err = wait_for_engines(i915); 4233 if (err) 4234 return err; 4235 4236 i915_retire_requests(i915); 4237 GEM_BUG_ON(i915->gt.active_requests); 4238 } else { 4239 struct intel_engine_cs *engine; 4240 enum intel_engine_id id; 4241 4242 for_each_engine(engine, i915, id) { 4243 struct i915_timeline *tl = &engine->timeline; 4244 4245 timeout = wait_for_timeline(tl, flags, timeout); 4246 if (timeout < 0) 4247 return timeout; 4248 } 4249 } 4250 4251 return 0; 4252 } 4253 4254 static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj) 4255 { 4256 /* 4257 * We manually flush the CPU domain so that we can override and 4258 * force the flush for the display, and perform it asyncrhonously. 4259 */ 4260 flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); 4261 if (obj->cache_dirty) 4262 i915_gem_clflush_object(obj, I915_CLFLUSH_FORCE); 4263 obj->write_domain = 0; 4264 } 4265 4266 void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj) 4267 { 4268 if (!READ_ONCE(obj->pin_global)) 4269 return; 4270 4271 mutex_lock(&obj->base.dev->struct_mutex); 4272 __i915_gem_object_flush_for_display(obj); 4273 mutex_unlock(&obj->base.dev->struct_mutex); 4274 } 4275 4276 /** 4277 * Moves a single object to the WC read, and possibly write domain. 4278 * @obj: object to act on 4279 * @write: ask for write access or read only 4280 * 4281 * This function returns when the move is complete, including waiting on 4282 * flushes to occur. 4283 */ 4284 int 4285 i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write) 4286 { 4287 int ret; 4288 4289 lockdep_assert_held(&obj->base.dev->struct_mutex); 4290 4291 ret = i915_gem_object_wait(obj, 4292 I915_WAIT_INTERRUPTIBLE | 4293 I915_WAIT_LOCKED | 4294 (write ? I915_WAIT_ALL : 0), 4295 MAX_SCHEDULE_TIMEOUT, 4296 NULL); 4297 if (ret) 4298 return ret; 4299 4300 if (obj->write_domain == I915_GEM_DOMAIN_WC) 4301 return 0; 4302 4303 /* Flush and acquire obj->pages so that we are coherent through 4304 * direct access in memory with previous cached writes through 4305 * shmemfs and that our cache domain tracking remains valid. 4306 * For example, if the obj->filp was moved to swap without us 4307 * being notified and releasing the pages, we would mistakenly 4308 * continue to assume that the obj remained out of the CPU cached 4309 * domain. 4310 */ 4311 ret = i915_gem_object_pin_pages(obj); 4312 if (ret) 4313 return ret; 4314 4315 flush_write_domain(obj, ~I915_GEM_DOMAIN_WC); 4316 4317 /* Serialise direct access to this object with the barriers for 4318 * coherent writes from the GPU, by effectively invalidating the 4319 * WC domain upon first access. 4320 */ 4321 if ((obj->read_domains & I915_GEM_DOMAIN_WC) == 0) 4322 mb(); 4323 4324 /* It should now be out of any other write domains, and we can update 4325 * the domain values for our changes. 4326 */ 4327 GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_WC) != 0); 4328 obj->read_domains |= I915_GEM_DOMAIN_WC; 4329 if (write) { 4330 obj->read_domains = I915_GEM_DOMAIN_WC; 4331 obj->write_domain = I915_GEM_DOMAIN_WC; 4332 obj->mm.dirty = true; 4333 } 4334 4335 i915_gem_object_unpin_pages(obj); 4336 return 0; 4337 } 4338 4339 /** 4340 * Moves a single object to the GTT read, and possibly write domain. 4341 * @obj: object to act on 4342 * @write: ask for write access or read only 4343 * 4344 * This function returns when the move is complete, including waiting on 4345 * flushes to occur. 4346 */ 4347 int 4348 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) 4349 { 4350 int ret; 4351 4352 lockdep_assert_held(&obj->base.dev->struct_mutex); 4353 4354 ret = i915_gem_object_wait(obj, 4355 I915_WAIT_INTERRUPTIBLE | 4356 I915_WAIT_LOCKED | 4357 (write ? I915_WAIT_ALL : 0), 4358 MAX_SCHEDULE_TIMEOUT, 4359 NULL); 4360 if (ret) 4361 return ret; 4362 4363 if (obj->write_domain == I915_GEM_DOMAIN_GTT) 4364 return 0; 4365 4366 /* Flush and acquire obj->pages so that we are coherent through 4367 * direct access in memory with previous cached writes through 4368 * shmemfs and that our cache domain tracking remains valid. 4369 * For example, if the obj->filp was moved to swap without us 4370 * being notified and releasing the pages, we would mistakenly 4371 * continue to assume that the obj remained out of the CPU cached 4372 * domain. 4373 */ 4374 ret = i915_gem_object_pin_pages(obj); 4375 if (ret) 4376 return ret; 4377 4378 flush_write_domain(obj, ~I915_GEM_DOMAIN_GTT); 4379 4380 /* Serialise direct access to this object with the barriers for 4381 * coherent writes from the GPU, by effectively invalidating the 4382 * GTT domain upon first access. 4383 */ 4384 if ((obj->read_domains & I915_GEM_DOMAIN_GTT) == 0) 4385 mb(); 4386 4387 /* It should now be out of any other write domains, and we can update 4388 * the domain values for our changes. 4389 */ 4390 GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_GTT) != 0); 4391 obj->read_domains |= I915_GEM_DOMAIN_GTT; 4392 if (write) { 4393 obj->read_domains = I915_GEM_DOMAIN_GTT; 4394 obj->write_domain = I915_GEM_DOMAIN_GTT; 4395 obj->mm.dirty = true; 4396 } 4397 4398 i915_gem_object_unpin_pages(obj); 4399 return 0; 4400 } 4401 4402 /** 4403 * Changes the cache-level of an object across all VMA. 4404 * @obj: object to act on 4405 * @cache_level: new cache level to set for the object 4406 * 4407 * After this function returns, the object will be in the new cache-level 4408 * across all GTT and the contents of the backing storage will be coherent, 4409 * with respect to the new cache-level. In order to keep the backing storage 4410 * coherent for all users, we only allow a single cache level to be set 4411 * globally on the object and prevent it from being changed whilst the 4412 * hardware is reading from the object. That is if the object is currently 4413 * on the scanout it will be set to uncached (or equivalent display 4414 * cache coherency) and all non-MOCS GPU access will also be uncached so 4415 * that all direct access to the scanout remains coherent. 4416 */ 4417 int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, 4418 enum i915_cache_level cache_level) 4419 { 4420 struct i915_vma *vma; 4421 int ret; 4422 4423 lockdep_assert_held(&obj->base.dev->struct_mutex); 4424 4425 if (obj->cache_level == cache_level) 4426 return 0; 4427 4428 /* Inspect the list of currently bound VMA and unbind any that would 4429 * be invalid given the new cache-level. This is principally to 4430 * catch the issue of the CS prefetch crossing page boundaries and 4431 * reading an invalid PTE on older architectures. 4432 */ 4433 restart: 4434 list_for_each_entry(vma, &obj->vma_list, obj_link) { 4435 if (!drm_mm_node_allocated(&vma->node)) 4436 continue; 4437 4438 if (i915_vma_is_pinned(vma)) { 4439 DRM_DEBUG("can not change the cache level of pinned objects\n"); 4440 return -EBUSY; 4441 } 4442 4443 if (!i915_vma_is_closed(vma) && 4444 i915_gem_valid_gtt_space(vma, cache_level)) 4445 continue; 4446 4447 ret = i915_vma_unbind(vma); 4448 if (ret) 4449 return ret; 4450 4451 /* As unbinding may affect other elements in the 4452 * obj->vma_list (due to side-effects from retiring 4453 * an active vma), play safe and restart the iterator. 4454 */ 4455 goto restart; 4456 } 4457 4458 /* We can reuse the existing drm_mm nodes but need to change the 4459 * cache-level on the PTE. We could simply unbind them all and 4460 * rebind with the correct cache-level on next use. However since 4461 * we already have a valid slot, dma mapping, pages etc, we may as 4462 * rewrite the PTE in the belief that doing so tramples upon less 4463 * state and so involves less work. 4464 */ 4465 if (obj->bind_count) { 4466 /* Before we change the PTE, the GPU must not be accessing it. 4467 * If we wait upon the object, we know that all the bound 4468 * VMA are no longer active. 4469 */ 4470 ret = i915_gem_object_wait(obj, 4471 I915_WAIT_INTERRUPTIBLE | 4472 I915_WAIT_LOCKED | 4473 I915_WAIT_ALL, 4474 MAX_SCHEDULE_TIMEOUT, 4475 NULL); 4476 if (ret) 4477 return ret; 4478 4479 if (!HAS_LLC(to_i915(obj->base.dev)) && 4480 cache_level != I915_CACHE_NONE) { 4481 /* Access to snoopable pages through the GTT is 4482 * incoherent and on some machines causes a hard 4483 * lockup. Relinquish the CPU mmaping to force 4484 * userspace to refault in the pages and we can 4485 * then double check if the GTT mapping is still 4486 * valid for that pointer access. 4487 */ 4488 i915_gem_release_mmap(obj); 4489 4490 /* As we no longer need a fence for GTT access, 4491 * we can relinquish it now (and so prevent having 4492 * to steal a fence from someone else on the next 4493 * fence request). Note GPU activity would have 4494 * dropped the fence as all snoopable access is 4495 * supposed to be linear. 4496 */ 4497 for_each_ggtt_vma(vma, obj) { 4498 ret = i915_vma_put_fence(vma); 4499 if (ret) 4500 return ret; 4501 } 4502 } else { 4503 /* We either have incoherent backing store and 4504 * so no GTT access or the architecture is fully 4505 * coherent. In such cases, existing GTT mmaps 4506 * ignore the cache bit in the PTE and we can 4507 * rewrite it without confusing the GPU or having 4508 * to force userspace to fault back in its mmaps. 4509 */ 4510 } 4511 4512 list_for_each_entry(vma, &obj->vma_list, obj_link) { 4513 if (!drm_mm_node_allocated(&vma->node)) 4514 continue; 4515 4516 ret = i915_vma_bind(vma, cache_level, PIN_UPDATE); 4517 if (ret) 4518 return ret; 4519 } 4520 } 4521 4522 list_for_each_entry(vma, &obj->vma_list, obj_link) 4523 vma->node.color = cache_level; 4524 i915_gem_object_set_cache_coherency(obj, cache_level); 4525 obj->cache_dirty = true; /* Always invalidate stale cachelines */ 4526 4527 return 0; 4528 } 4529 4530 int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data, 4531 struct drm_file *file) 4532 { 4533 struct drm_i915_gem_caching *args = data; 4534 struct drm_i915_gem_object *obj; 4535 int err = 0; 4536 4537 rcu_read_lock(); 4538 obj = i915_gem_object_lookup_rcu(file, args->handle); 4539 if (!obj) { 4540 err = -ENOENT; 4541 goto out; 4542 } 4543 4544 switch (obj->cache_level) { 4545 case I915_CACHE_LLC: 4546 case I915_CACHE_L3_LLC: 4547 args->caching = I915_CACHING_CACHED; 4548 break; 4549 4550 case I915_CACHE_WT: 4551 args->caching = I915_CACHING_DISPLAY; 4552 break; 4553 4554 default: 4555 args->caching = I915_CACHING_NONE; 4556 break; 4557 } 4558 out: 4559 rcu_read_unlock(); 4560 return err; 4561 } 4562 4563 int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data, 4564 struct drm_file *file) 4565 { 4566 struct drm_i915_private *i915 = to_i915(dev); 4567 struct drm_i915_gem_caching *args = data; 4568 struct drm_i915_gem_object *obj; 4569 enum i915_cache_level level; 4570 int ret = 0; 4571 4572 switch (args->caching) { 4573 case I915_CACHING_NONE: 4574 level = I915_CACHE_NONE; 4575 break; 4576 case I915_CACHING_CACHED: 4577 /* 4578 * Due to a HW issue on BXT A stepping, GPU stores via a 4579 * snooped mapping may leave stale data in a corresponding CPU 4580 * cacheline, whereas normally such cachelines would get 4581 * invalidated. 4582 */ 4583 if (!HAS_LLC(i915) && !HAS_SNOOP(i915)) 4584 return -ENODEV; 4585 4586 level = I915_CACHE_LLC; 4587 break; 4588 case I915_CACHING_DISPLAY: 4589 level = HAS_WT(i915) ? I915_CACHE_WT : I915_CACHE_NONE; 4590 break; 4591 default: 4592 return -EINVAL; 4593 } 4594 4595 obj = i915_gem_object_lookup(file, args->handle); 4596 if (!obj) 4597 return -ENOENT; 4598 4599 /* 4600 * The caching mode of proxy object is handled by its generator, and 4601 * not allowed to be changed by userspace. 4602 */ 4603 if (i915_gem_object_is_proxy(obj)) { 4604 ret = -ENXIO; 4605 goto out; 4606 } 4607 4608 if (obj->cache_level == level) 4609 goto out; 4610 4611 ret = i915_gem_object_wait(obj, 4612 I915_WAIT_INTERRUPTIBLE, 4613 MAX_SCHEDULE_TIMEOUT, 4614 to_rps_client(file)); 4615 if (ret) 4616 goto out; 4617 4618 ret = i915_mutex_lock_interruptible(dev); 4619 if (ret) 4620 goto out; 4621 4622 ret = i915_gem_object_set_cache_level(obj, level); 4623 mutex_unlock(&dev->struct_mutex); 4624 4625 out: 4626 i915_gem_object_put(obj); 4627 return ret; 4628 } 4629 4630 /* 4631 * Prepare buffer for display plane (scanout, cursors, etc). Can be called from 4632 * an uninterruptible phase (modesetting) and allows any flushes to be pipelined 4633 * (for pageflips). We only flush the caches while preparing the buffer for 4634 * display, the callers are responsible for frontbuffer flush. 4635 */ 4636 struct i915_vma * 4637 i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, 4638 u32 alignment, 4639 const struct i915_ggtt_view *view, 4640 unsigned int flags) 4641 { 4642 struct i915_vma *vma; 4643 int ret; 4644 4645 lockdep_assert_held(&obj->base.dev->struct_mutex); 4646 4647 /* Mark the global pin early so that we account for the 4648 * display coherency whilst setting up the cache domains. 4649 */ 4650 obj->pin_global++; 4651 4652 /* The display engine is not coherent with the LLC cache on gen6. As 4653 * a result, we make sure that the pinning that is about to occur is 4654 * done with uncached PTEs. This is lowest common denominator for all 4655 * chipsets. 4656 * 4657 * However for gen6+, we could do better by using the GFDT bit instead 4658 * of uncaching, which would allow us to flush all the LLC-cached data 4659 * with that bit in the PTE to main memory with just one PIPE_CONTROL. 4660 */ 4661 ret = i915_gem_object_set_cache_level(obj, 4662 HAS_WT(to_i915(obj->base.dev)) ? 4663 I915_CACHE_WT : I915_CACHE_NONE); 4664 if (ret) { 4665 vma = ERR_PTR(ret); 4666 goto err_unpin_global; 4667 } 4668 4669 /* As the user may map the buffer once pinned in the display plane 4670 * (e.g. libkms for the bootup splash), we have to ensure that we 4671 * always use map_and_fenceable for all scanout buffers. However, 4672 * it may simply be too big to fit into mappable, in which case 4673 * put it anyway and hope that userspace can cope (but always first 4674 * try to preserve the existing ABI). 4675 */ 4676 vma = ERR_PTR(-ENOSPC); 4677 if ((flags & PIN_MAPPABLE) == 0 && 4678 (!view || view->type == I915_GGTT_VIEW_NORMAL)) 4679 vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, 4680 flags | 4681 PIN_MAPPABLE | 4682 PIN_NONBLOCK); 4683 if (IS_ERR(vma)) 4684 vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, flags); 4685 if (IS_ERR(vma)) 4686 goto err_unpin_global; 4687 4688 vma->display_alignment = max_t(u64, vma->display_alignment, alignment); 4689 4690 __i915_gem_object_flush_for_display(obj); 4691 4692 /* It should now be out of any other write domains, and we can update 4693 * the domain values for our changes. 4694 */ 4695 obj->read_domains |= I915_GEM_DOMAIN_GTT; 4696 4697 return vma; 4698 4699 err_unpin_global: 4700 obj->pin_global--; 4701 return vma; 4702 } 4703 4704 void 4705 i915_gem_object_unpin_from_display_plane(struct i915_vma *vma) 4706 { 4707 lockdep_assert_held(&vma->vm->i915->drm.struct_mutex); 4708 4709 if (WARN_ON(vma->obj->pin_global == 0)) 4710 return; 4711 4712 if (--vma->obj->pin_global == 0) 4713 vma->display_alignment = I915_GTT_MIN_ALIGNMENT; 4714 4715 /* Bump the LRU to try and avoid premature eviction whilst flipping */ 4716 i915_gem_object_bump_inactive_ggtt(vma->obj); 4717 4718 i915_vma_unpin(vma); 4719 } 4720 4721 /** 4722 * Moves a single object to the CPU read, and possibly write domain. 4723 * @obj: object to act on 4724 * @write: requesting write or read-only access 4725 * 4726 * This function returns when the move is complete, including waiting on 4727 * flushes to occur. 4728 */ 4729 int 4730 i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) 4731 { 4732 int ret; 4733 4734 lockdep_assert_held(&obj->base.dev->struct_mutex); 4735 4736 ret = i915_gem_object_wait(obj, 4737 I915_WAIT_INTERRUPTIBLE | 4738 I915_WAIT_LOCKED | 4739 (write ? I915_WAIT_ALL : 0), 4740 MAX_SCHEDULE_TIMEOUT, 4741 NULL); 4742 if (ret) 4743 return ret; 4744 4745 flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU); 4746 4747 /* Flush the CPU cache if it's still invalid. */ 4748 if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) { 4749 i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC); 4750 obj->read_domains |= I915_GEM_DOMAIN_CPU; 4751 } 4752 4753 /* It should now be out of any other write domains, and we can update 4754 * the domain values for our changes. 4755 */ 4756 GEM_BUG_ON(obj->write_domain & ~I915_GEM_DOMAIN_CPU); 4757 4758 /* If we're writing through the CPU, then the GPU read domains will 4759 * need to be invalidated at next use. 4760 */ 4761 if (write) 4762 __start_cpu_write(obj); 4763 4764 return 0; 4765 } 4766 4767 /* Throttle our rendering by waiting until the ring has completed our requests 4768 * emitted over 20 msec ago. 4769 * 4770 * Note that if we were to use the current jiffies each time around the loop, 4771 * we wouldn't escape the function with any frames outstanding if the time to 4772 * render a frame was over 20ms. 4773 * 4774 * This should get us reasonable parallelism between CPU and GPU but also 4775 * relatively low latency when blocking on a particular request to finish. 4776 */ 4777 static int 4778 i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file) 4779 { 4780 struct drm_i915_private *dev_priv = to_i915(dev); 4781 struct drm_i915_file_private *file_priv = file->driver_priv; 4782 unsigned long recent_enough = jiffies - DRM_I915_THROTTLE_JIFFIES; 4783 struct i915_request *request, *target = NULL; 4784 long ret; 4785 4786 /* ABI: return -EIO if already wedged */ 4787 if (i915_terminally_wedged(&dev_priv->gpu_error)) 4788 return -EIO; 4789 4790 spin_lock(&file_priv->mm.lock); 4791 list_for_each_entry(request, &file_priv->mm.request_list, client_link) { 4792 if (time_after_eq(request->emitted_jiffies, recent_enough)) 4793 break; 4794 4795 if (target) { 4796 list_del(&target->client_link); 4797 target->file_priv = NULL; 4798 } 4799 4800 target = request; 4801 } 4802 if (target) 4803 i915_request_get(target); 4804 spin_unlock(&file_priv->mm.lock); 4805 4806 if (target == NULL) 4807 return 0; 4808 4809 ret = i915_request_wait(target, 4810 I915_WAIT_INTERRUPTIBLE, 4811 MAX_SCHEDULE_TIMEOUT); 4812 i915_request_put(target); 4813 4814 return ret < 0 ? ret : 0; 4815 } 4816 4817 struct i915_vma * 4818 i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, 4819 const struct i915_ggtt_view *view, 4820 u64 size, 4821 u64 alignment, 4822 u64 flags) 4823 { 4824 struct drm_i915_private *dev_priv = to_i915(obj->base.dev); 4825 struct i915_address_space *vm = &dev_priv->ggtt.vm; 4826 struct i915_vma *vma; 4827 int ret; 4828 4829 lockdep_assert_held(&obj->base.dev->struct_mutex); 4830 4831 if (flags & PIN_MAPPABLE && 4832 (!view || view->type == I915_GGTT_VIEW_NORMAL)) { 4833 /* If the required space is larger than the available 4834 * aperture, we will not able to find a slot for the 4835 * object and unbinding the object now will be in 4836 * vain. Worse, doing so may cause us to ping-pong 4837 * the object in and out of the Global GTT and 4838 * waste a lot of cycles under the mutex. 4839 */ 4840 if (obj->base.size > dev_priv->ggtt.mappable_end) 4841 return ERR_PTR(-E2BIG); 4842 4843 /* If NONBLOCK is set the caller is optimistically 4844 * trying to cache the full object within the mappable 4845 * aperture, and *must* have a fallback in place for 4846 * situations where we cannot bind the object. We 4847 * can be a little more lax here and use the fallback 4848 * more often to avoid costly migrations of ourselves 4849 * and other objects within the aperture. 4850 * 4851 * Half-the-aperture is used as a simple heuristic. 4852 * More interesting would to do search for a free 4853 * block prior to making the commitment to unbind. 4854 * That caters for the self-harm case, and with a 4855 * little more heuristics (e.g. NOFAULT, NOEVICT) 4856 * we could try to minimise harm to others. 4857 */ 4858 if (flags & PIN_NONBLOCK && 4859 obj->base.size > dev_priv->ggtt.mappable_end / 2) 4860 return ERR_PTR(-ENOSPC); 4861 } 4862 4863 vma = i915_vma_instance(obj, vm, view); 4864 if (unlikely(IS_ERR(vma))) 4865 return vma; 4866 4867 if (i915_vma_misplaced(vma, size, alignment, flags)) { 4868 if (flags & PIN_NONBLOCK) { 4869 if (i915_vma_is_pinned(vma) || i915_vma_is_active(vma)) 4870 return ERR_PTR(-ENOSPC); 4871 4872 if (flags & PIN_MAPPABLE && 4873 vma->fence_size > dev_priv->ggtt.mappable_end / 2) 4874 return ERR_PTR(-ENOSPC); 4875 } 4876 4877 WARN(i915_vma_is_pinned(vma), 4878 "bo is already pinned in ggtt with incorrect alignment:" 4879 " offset=%08x, req.alignment=%llx," 4880 " req.map_and_fenceable=%d, vma->map_and_fenceable=%d\n", 4881 i915_ggtt_offset(vma), alignment, 4882 !!(flags & PIN_MAPPABLE), 4883 i915_vma_is_map_and_fenceable(vma)); 4884 ret = i915_vma_unbind(vma); 4885 if (ret) 4886 return ERR_PTR(ret); 4887 } 4888 4889 ret = i915_vma_pin(vma, size, alignment, flags | PIN_GLOBAL); 4890 if (ret) 4891 return ERR_PTR(ret); 4892 4893 return vma; 4894 } 4895 4896 static __always_inline unsigned int __busy_read_flag(unsigned int id) 4897 { 4898 /* Note that we could alias engines in the execbuf API, but 4899 * that would be very unwise as it prevents userspace from 4900 * fine control over engine selection. Ahem. 4901 * 4902 * This should be something like EXEC_MAX_ENGINE instead of 4903 * I915_NUM_ENGINES. 4904 */ 4905 BUILD_BUG_ON(I915_NUM_ENGINES > 16); 4906 return 0x10000 << id; 4907 } 4908 4909 static __always_inline unsigned int __busy_write_id(unsigned int id) 4910 { 4911 /* The uABI guarantees an active writer is also amongst the read 4912 * engines. This would be true if we accessed the activity tracking 4913 * under the lock, but as we perform the lookup of the object and 4914 * its activity locklessly we can not guarantee that the last_write 4915 * being active implies that we have set the same engine flag from 4916 * last_read - hence we always set both read and write busy for 4917 * last_write. 4918 */ 4919 return id | __busy_read_flag(id); 4920 } 4921 4922 static __always_inline unsigned int 4923 __busy_set_if_active(const struct dma_fence *fence, 4924 unsigned int (*flag)(unsigned int id)) 4925 { 4926 struct i915_request *rq; 4927 4928 /* We have to check the current hw status of the fence as the uABI 4929 * guarantees forward progress. We could rely on the idle worker 4930 * to eventually flush us, but to minimise latency just ask the 4931 * hardware. 4932 * 4933 * Note we only report on the status of native fences. 4934 */ 4935 if (!dma_fence_is_i915(fence)) 4936 return 0; 4937 4938 /* opencode to_request() in order to avoid const warnings */ 4939 rq = container_of(fence, struct i915_request, fence); 4940 if (i915_request_completed(rq)) 4941 return 0; 4942 4943 return flag(rq->engine->uabi_id); 4944 } 4945 4946 static __always_inline unsigned int 4947 busy_check_reader(const struct dma_fence *fence) 4948 { 4949 return __busy_set_if_active(fence, __busy_read_flag); 4950 } 4951 4952 static __always_inline unsigned int 4953 busy_check_writer(const struct dma_fence *fence) 4954 { 4955 if (!fence) 4956 return 0; 4957 4958 return __busy_set_if_active(fence, __busy_write_id); 4959 } 4960 4961 int 4962 i915_gem_busy_ioctl(struct drm_device *dev, void *data, 4963 struct drm_file *file) 4964 { 4965 struct drm_i915_gem_busy *args = data; 4966 struct drm_i915_gem_object *obj; 4967 struct reservation_object_list *list; 4968 unsigned int seq; 4969 int err; 4970 4971 err = -ENOENT; 4972 rcu_read_lock(); 4973 obj = i915_gem_object_lookup_rcu(file, args->handle); 4974 if (!obj) 4975 goto out; 4976 4977 /* A discrepancy here is that we do not report the status of 4978 * non-i915 fences, i.e. even though we may report the object as idle, 4979 * a call to set-domain may still stall waiting for foreign rendering. 4980 * This also means that wait-ioctl may report an object as busy, 4981 * where busy-ioctl considers it idle. 4982 * 4983 * We trade the ability to warn of foreign fences to report on which 4984 * i915 engines are active for the object. 4985 * 4986 * Alternatively, we can trade that extra information on read/write 4987 * activity with 4988 * args->busy = 4989 * !reservation_object_test_signaled_rcu(obj->resv, true); 4990 * to report the overall busyness. This is what the wait-ioctl does. 4991 * 4992 */ 4993 retry: 4994 seq = raw_read_seqcount(&obj->resv->seq); 4995 4996 /* Translate the exclusive fence to the READ *and* WRITE engine */ 4997 args->busy = busy_check_writer(rcu_dereference(obj->resv->fence_excl)); 4998 4999 /* Translate shared fences to READ set of engines */ 5000 list = rcu_dereference(obj->resv->fence); 5001 if (list) { 5002 unsigned int shared_count = list->shared_count, i; 5003 5004 for (i = 0; i < shared_count; ++i) { 5005 struct dma_fence *fence = 5006 rcu_dereference(list->shared[i]); 5007 5008 args->busy |= busy_check_reader(fence); 5009 } 5010 } 5011 5012 if (args->busy && read_seqcount_retry(&obj->resv->seq, seq)) 5013 goto retry; 5014 5015 err = 0; 5016 out: 5017 rcu_read_unlock(); 5018 return err; 5019 } 5020 5021 int 5022 i915_gem_throttle_ioctl(struct drm_device *dev, void *data, 5023 struct drm_file *file_priv) 5024 { 5025 return i915_gem_ring_throttle(dev, file_priv); 5026 } 5027 5028 int 5029 i915_gem_madvise_ioctl(struct drm_device *dev, void *data, 5030 struct drm_file *file_priv) 5031 { 5032 struct drm_i915_private *dev_priv = to_i915(dev); 5033 struct drm_i915_gem_madvise *args = data; 5034 struct drm_i915_gem_object *obj; 5035 int err; 5036 5037 switch (args->madv) { 5038 case I915_MADV_DONTNEED: 5039 case I915_MADV_WILLNEED: 5040 break; 5041 default: 5042 return -EINVAL; 5043 } 5044 5045 obj = i915_gem_object_lookup(file_priv, args->handle); 5046 if (!obj) 5047 return -ENOENT; 5048 5049 err = mutex_lock_interruptible(&obj->mm.lock); 5050 if (err) 5051 goto out; 5052 5053 if (i915_gem_object_has_pages(obj) && 5054 i915_gem_object_is_tiled(obj) && 5055 dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) { 5056 if (obj->mm.madv == I915_MADV_WILLNEED) { 5057 GEM_BUG_ON(!obj->mm.quirked); 5058 __i915_gem_object_unpin_pages(obj); 5059 obj->mm.quirked = false; 5060 } 5061 if (args->madv == I915_MADV_WILLNEED) { 5062 GEM_BUG_ON(obj->mm.quirked); 5063 __i915_gem_object_pin_pages(obj); 5064 obj->mm.quirked = true; 5065 } 5066 } 5067 5068 if (obj->mm.madv != __I915_MADV_PURGED) 5069 obj->mm.madv = args->madv; 5070 5071 /* if the object is no longer attached, discard its backing storage */ 5072 if (obj->mm.madv == I915_MADV_DONTNEED && 5073 !i915_gem_object_has_pages(obj)) 5074 i915_gem_object_truncate(obj); 5075 5076 args->retained = obj->mm.madv != __I915_MADV_PURGED; 5077 mutex_unlock(&obj->mm.lock); 5078 5079 out: 5080 i915_gem_object_put(obj); 5081 return err; 5082 } 5083 5084 static void 5085 frontbuffer_retire(struct i915_gem_active *active, struct i915_request *request) 5086 { 5087 struct drm_i915_gem_object *obj = 5088 container_of(active, typeof(*obj), frontbuffer_write); 5089 5090 intel_fb_obj_flush(obj, ORIGIN_CS); 5091 } 5092 5093 void i915_gem_object_init(struct drm_i915_gem_object *obj, 5094 const struct drm_i915_gem_object_ops *ops) 5095 { 5096 rw_init(&obj->mm.lock, "mmlk"); 5097 5098 INIT_LIST_HEAD(&obj->vma_list); 5099 INIT_LIST_HEAD(&obj->lut_list); 5100 INIT_LIST_HEAD(&obj->batch_pool_link); 5101 5102 obj->ops = ops; 5103 5104 reservation_object_init(&obj->__builtin_resv); 5105 obj->resv = &obj->__builtin_resv; 5106 5107 obj->frontbuffer_ggtt_origin = ORIGIN_GTT; 5108 init_request_active(&obj->frontbuffer_write, frontbuffer_retire); 5109 5110 obj->mm.madv = I915_MADV_WILLNEED; 5111 INIT_RADIX_TREE(&obj->mm.get_page.radix, GFP_KERNEL | __GFP_NOWARN); 5112 rw_init(&obj->mm.get_page.lock, "mmget"); 5113 5114 i915_gem_info_add_obj(to_i915(obj->base.dev), obj->base.size); 5115 } 5116 5117 static const struct drm_i915_gem_object_ops i915_gem_object_ops = { 5118 .flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE | 5119 I915_GEM_OBJECT_IS_SHRINKABLE, 5120 5121 .get_pages = i915_gem_object_get_pages_gtt, 5122 .put_pages = i915_gem_object_put_pages_gtt, 5123 5124 .pwrite = i915_gem_object_pwrite_gtt, 5125 }; 5126 5127 static int i915_gem_object_create_shmem(struct drm_device *dev, 5128 struct drm_gem_object *obj, 5129 size_t size) 5130 { 5131 #ifdef __linux__ 5132 struct drm_i915_private *i915 = to_i915(dev); 5133 unsigned long flags = VM_NORESERVE; 5134 struct file *filp; 5135 5136 drm_gem_private_object_init(dev, obj, size); 5137 5138 if (i915->mm.gemfs) 5139 filp = shmem_file_setup_with_mnt(i915->mm.gemfs, "i915", size, 5140 flags); 5141 else 5142 filp = shmem_file_setup("i915", size, flags); 5143 5144 if (IS_ERR(filp)) 5145 return PTR_ERR(filp); 5146 5147 obj->filp = filp; 5148 5149 return 0; 5150 #else 5151 drm_gem_private_object_init(dev, obj, size); 5152 5153 return 0; 5154 #endif 5155 } 5156 5157 struct drm_i915_gem_object * 5158 i915_gem_object_create(struct drm_i915_private *dev_priv, u64 size) 5159 { 5160 struct drm_i915_gem_object *obj; 5161 struct address_space *mapping; 5162 unsigned int cache_level; 5163 gfp_t mask; 5164 int ret; 5165 5166 /* There is a prevalence of the assumption that we fit the object's 5167 * page count inside a 32bit _signed_ variable. Let's document this and 5168 * catch if we ever need to fix it. In the meantime, if you do spot 5169 * such a local variable, please consider fixing! 5170 */ 5171 if (size >> PAGE_SHIFT > INT_MAX) 5172 return ERR_PTR(-E2BIG); 5173 5174 if (overflows_type(size, obj->base.size)) 5175 return ERR_PTR(-E2BIG); 5176 5177 obj = i915_gem_object_alloc(dev_priv); 5178 if (obj == NULL) 5179 return ERR_PTR(-ENOMEM); 5180 5181 #ifdef __linux__ 5182 ret = i915_gem_object_create_shmem(&dev_priv->drm, &obj->base, size); 5183 #else 5184 ret = drm_gem_object_init(&dev_priv->drm, &obj->base, size); 5185 #endif 5186 if (ret) 5187 goto fail; 5188 5189 mask = GFP_HIGHUSER | __GFP_RECLAIMABLE; 5190 if (IS_I965GM(dev_priv) || IS_I965G(dev_priv)) { 5191 /* 965gm cannot relocate objects above 4GiB. */ 5192 mask &= ~__GFP_HIGHMEM; 5193 mask |= __GFP_DMA32; 5194 } 5195 5196 #ifdef __linux__ 5197 mapping = obj->base.filp->f_mapping; 5198 mapping_set_gfp_mask(mapping, mask); 5199 GEM_BUG_ON(!(mapping_gfp_mask(mapping) & __GFP_RECLAIM)); 5200 #endif 5201 5202 i915_gem_object_init(obj, &i915_gem_object_ops); 5203 5204 obj->write_domain = I915_GEM_DOMAIN_CPU; 5205 obj->read_domains = I915_GEM_DOMAIN_CPU; 5206 5207 if (HAS_LLC(dev_priv)) 5208 /* On some devices, we can have the GPU use the LLC (the CPU 5209 * cache) for about a 10% performance improvement 5210 * compared to uncached. Graphics requests other than 5211 * display scanout are coherent with the CPU in 5212 * accessing this cache. This means in this mode we 5213 * don't need to clflush on the CPU side, and on the 5214 * GPU side we only need to flush internal caches to 5215 * get data visible to the CPU. 5216 * 5217 * However, we maintain the display planes as UC, and so 5218 * need to rebind when first used as such. 5219 */ 5220 cache_level = I915_CACHE_LLC; 5221 else 5222 cache_level = I915_CACHE_NONE; 5223 5224 i915_gem_object_set_cache_coherency(obj, cache_level); 5225 5226 trace_i915_gem_object_create(obj); 5227 5228 return obj; 5229 5230 fail: 5231 i915_gem_object_free(obj); 5232 return ERR_PTR(ret); 5233 } 5234 5235 static bool discard_backing_storage(struct drm_i915_gem_object *obj) 5236 { 5237 /* If we are the last user of the backing storage (be it shmemfs 5238 * pages or stolen etc), we know that the pages are going to be 5239 * immediately released. In this case, we can then skip copying 5240 * back the contents from the GPU. 5241 */ 5242 5243 if (obj->mm.madv != I915_MADV_WILLNEED) 5244 return false; 5245 5246 if (obj->base.filp == NULL) 5247 return true; 5248 5249 /* At first glance, this looks racy, but then again so would be 5250 * userspace racing mmap against close. However, the first external 5251 * reference to the filp can only be obtained through the 5252 * i915_gem_mmap_ioctl() which safeguards us against the user 5253 * acquiring such a reference whilst we are in the middle of 5254 * freeing the object. 5255 */ 5256 return atomic_long_read(&obj->base.filp->f_count) == 1; 5257 } 5258 5259 static void __i915_gem_free_objects(struct drm_i915_private *i915, 5260 struct llist_node *freed) 5261 { 5262 struct drm_i915_gem_object *obj, *on; 5263 5264 intel_runtime_pm_get(i915); 5265 llist_for_each_entry_safe(obj, on, freed, freed) { 5266 struct i915_vma *vma, *vn; 5267 5268 trace_i915_gem_object_destroy(obj); 5269 5270 mutex_lock(&i915->drm.struct_mutex); 5271 5272 GEM_BUG_ON(i915_gem_object_is_active(obj)); 5273 list_for_each_entry_safe(vma, vn, 5274 &obj->vma_list, obj_link) { 5275 GEM_BUG_ON(i915_vma_is_active(vma)); 5276 vma->flags &= ~I915_VMA_PIN_MASK; 5277 i915_vma_destroy(vma); 5278 } 5279 GEM_BUG_ON(!list_empty(&obj->vma_list)); 5280 GEM_BUG_ON(!RB_EMPTY_ROOT(&obj->vma_tree)); 5281 5282 /* This serializes freeing with the shrinker. Since the free 5283 * is delayed, first by RCU then by the workqueue, we want the 5284 * shrinker to be able to free pages of unreferenced objects, 5285 * or else we may oom whilst there are plenty of deferred 5286 * freed objects. 5287 */ 5288 if (i915_gem_object_has_pages(obj)) { 5289 spin_lock(&i915->mm.obj_lock); 5290 list_del_init(&obj->mm.link); 5291 spin_unlock(&i915->mm.obj_lock); 5292 } 5293 5294 mutex_unlock(&i915->drm.struct_mutex); 5295 5296 GEM_BUG_ON(obj->bind_count); 5297 GEM_BUG_ON(obj->userfault_count); 5298 GEM_BUG_ON(atomic_read(&obj->frontbuffer_bits)); 5299 GEM_BUG_ON(!list_empty(&obj->lut_list)); 5300 5301 if (obj->ops->release) 5302 obj->ops->release(obj); 5303 5304 if (WARN_ON(i915_gem_object_has_pinned_pages(obj))) 5305 atomic_set(&obj->mm.pages_pin_count, 0); 5306 __i915_gem_object_put_pages(obj, I915_MM_NORMAL); 5307 GEM_BUG_ON(i915_gem_object_has_pages(obj)); 5308 5309 #ifdef notyet 5310 if (obj->base.import_attach) 5311 drm_prime_gem_destroy(&obj->base, NULL); 5312 #endif 5313 5314 reservation_object_fini(&obj->__builtin_resv); 5315 drm_gem_object_release(&obj->base); 5316 i915_gem_info_remove_obj(i915, obj->base.size); 5317 5318 kfree(obj->bit_17); 5319 i915_gem_object_free(obj); 5320 5321 GEM_BUG_ON(!atomic_read(&i915->mm.free_count)); 5322 atomic_dec(&i915->mm.free_count); 5323 5324 if (on) 5325 cond_resched(); 5326 } 5327 intel_runtime_pm_put(i915); 5328 } 5329 5330 static void i915_gem_flush_free_objects(struct drm_i915_private *i915) 5331 { 5332 struct llist_node *freed; 5333 5334 /* Free the oldest, most stale object to keep the free_list short */ 5335 freed = NULL; 5336 if (!llist_empty(&i915->mm.free_list)) { /* quick test for hotpath */ 5337 /* Only one consumer of llist_del_first() allowed */ 5338 spin_lock(&i915->mm.free_lock); 5339 freed = llist_del_first(&i915->mm.free_list); 5340 spin_unlock(&i915->mm.free_lock); 5341 } 5342 if (unlikely(freed)) { 5343 freed->next = NULL; 5344 __i915_gem_free_objects(i915, freed); 5345 } 5346 } 5347 5348 static void __i915_gem_free_work(struct work_struct *work) 5349 { 5350 struct drm_i915_private *i915 = 5351 container_of(work, struct drm_i915_private, mm.free_work); 5352 struct llist_node *freed; 5353 5354 /* 5355 * All file-owned VMA should have been released by this point through 5356 * i915_gem_close_object(), or earlier by i915_gem_context_close(). 5357 * However, the object may also be bound into the global GTT (e.g. 5358 * older GPUs without per-process support, or for direct access through 5359 * the GTT either for the user or for scanout). Those VMA still need to 5360 * unbound now. 5361 */ 5362 5363 spin_lock(&i915->mm.free_lock); 5364 while ((freed = llist_del_all(&i915->mm.free_list))) { 5365 spin_unlock(&i915->mm.free_lock); 5366 5367 __i915_gem_free_objects(i915, freed); 5368 if (drm_need_resched()) 5369 return; 5370 5371 spin_lock(&i915->mm.free_lock); 5372 } 5373 spin_unlock(&i915->mm.free_lock); 5374 } 5375 5376 static void __i915_gem_free_object_rcu(struct rcu_head *head) 5377 { 5378 struct drm_i915_gem_object *obj = 5379 container_of(head, typeof(*obj), rcu); 5380 struct drm_i915_private *i915 = to_i915(obj->base.dev); 5381 5382 /* 5383 * Since we require blocking on struct_mutex to unbind the freed 5384 * object from the GPU before releasing resources back to the 5385 * system, we can not do that directly from the RCU callback (which may 5386 * be a softirq context), but must instead then defer that work onto a 5387 * kthread. We use the RCU callback rather than move the freed object 5388 * directly onto the work queue so that we can mix between using the 5389 * worker and performing frees directly from subsequent allocations for 5390 * crude but effective memory throttling. 5391 */ 5392 if (llist_add(&obj->freed, &i915->mm.free_list)) 5393 queue_work(i915->wq, &i915->mm.free_work); 5394 } 5395 5396 void i915_gem_free_object(struct drm_gem_object *gem_obj) 5397 { 5398 struct drm_i915_gem_object *obj = to_intel_bo(gem_obj); 5399 5400 if (obj->mm.quirked) 5401 __i915_gem_object_unpin_pages(obj); 5402 5403 if (discard_backing_storage(obj)) 5404 obj->mm.madv = I915_MADV_DONTNEED; 5405 5406 /* 5407 * Before we free the object, make sure any pure RCU-only 5408 * read-side critical sections are complete, e.g. 5409 * i915_gem_busy_ioctl(). For the corresponding synchronized 5410 * lookup see i915_gem_object_lookup_rcu(). 5411 */ 5412 atomic_inc(&to_i915(obj->base.dev)->mm.free_count); 5413 call_rcu(&obj->rcu, __i915_gem_free_object_rcu); 5414 } 5415 5416 void __i915_gem_object_release_unless_active(struct drm_i915_gem_object *obj) 5417 { 5418 lockdep_assert_held(&obj->base.dev->struct_mutex); 5419 5420 if (!i915_gem_object_has_active_reference(obj) && 5421 i915_gem_object_is_active(obj)) 5422 i915_gem_object_set_active_reference(obj); 5423 else 5424 i915_gem_object_put(obj); 5425 } 5426 5427 void i915_gem_sanitize(struct drm_i915_private *i915) 5428 { 5429 int err; 5430 5431 GEM_TRACE("\n"); 5432 5433 mutex_lock(&i915->drm.struct_mutex); 5434 5435 intel_runtime_pm_get(i915); 5436 intel_uncore_forcewake_get(i915, FORCEWAKE_ALL); 5437 5438 /* 5439 * As we have just resumed the machine and woken the device up from 5440 * deep PCI sleep (presumably D3_cold), assume the HW has been reset 5441 * back to defaults, recovering from whatever wedged state we left it 5442 * in and so worth trying to use the device once more. 5443 */ 5444 if (i915_terminally_wedged(&i915->gpu_error)) 5445 i915_gem_unset_wedged(i915); 5446 5447 /* 5448 * If we inherit context state from the BIOS or earlier occupants 5449 * of the GPU, the GPU may be in an inconsistent state when we 5450 * try to take over. The only way to remove the earlier state 5451 * is by resetting. However, resetting on earlier gen is tricky as 5452 * it may impact the display and we are uncertain about the stability 5453 * of the reset, so this could be applied to even earlier gen. 5454 */ 5455 err = -ENODEV; 5456 if (INTEL_GEN(i915) >= 5 && intel_has_gpu_reset(i915)) 5457 err = WARN_ON(intel_gpu_reset(i915, ALL_ENGINES)); 5458 if (!err) 5459 intel_engines_sanitize(i915); 5460 5461 intel_uncore_forcewake_put(i915, FORCEWAKE_ALL); 5462 intel_runtime_pm_put(i915); 5463 5464 i915_gem_contexts_lost(i915); 5465 mutex_unlock(&i915->drm.struct_mutex); 5466 } 5467 5468 int i915_gem_suspend(struct drm_i915_private *i915) 5469 { 5470 int ret; 5471 5472 GEM_TRACE("\n"); 5473 5474 intel_runtime_pm_get(i915); 5475 intel_suspend_gt_powersave(i915); 5476 5477 mutex_lock(&i915->drm.struct_mutex); 5478 5479 /* 5480 * We have to flush all the executing contexts to main memory so 5481 * that they can saved in the hibernation image. To ensure the last 5482 * context image is coherent, we have to switch away from it. That 5483 * leaves the i915->kernel_context still active when 5484 * we actually suspend, and its image in memory may not match the GPU 5485 * state. Fortunately, the kernel_context is disposable and we do 5486 * not rely on its state. 5487 */ 5488 if (!i915_terminally_wedged(&i915->gpu_error)) { 5489 ret = i915_gem_switch_to_kernel_context(i915); 5490 if (ret) 5491 goto err_unlock; 5492 5493 ret = i915_gem_wait_for_idle(i915, 5494 I915_WAIT_INTERRUPTIBLE | 5495 I915_WAIT_LOCKED | 5496 I915_WAIT_FOR_IDLE_BOOST, 5497 MAX_SCHEDULE_TIMEOUT); 5498 if (ret && ret != -EIO) 5499 goto err_unlock; 5500 5501 assert_kernel_context_is_current(i915); 5502 } 5503 i915_retire_requests(i915); /* ensure we flush after wedging */ 5504 5505 mutex_unlock(&i915->drm.struct_mutex); 5506 5507 intel_uc_suspend(i915); 5508 5509 cancel_delayed_work_sync(&i915->gpu_error.hangcheck_work); 5510 cancel_delayed_work_sync(&i915->gt.retire_work); 5511 5512 /* 5513 * As the idle_work is rearming if it detects a race, play safe and 5514 * repeat the flush until it is definitely idle. 5515 */ 5516 drain_delayed_work(&i915->gt.idle_work); 5517 5518 /* 5519 * Assert that we successfully flushed all the work and 5520 * reset the GPU back to its idle, low power state. 5521 */ 5522 WARN_ON(i915->gt.awake); 5523 if (WARN_ON(!intel_engines_are_idle(i915))) 5524 i915_gem_set_wedged(i915); /* no hope, discard everything */ 5525 5526 intel_runtime_pm_put(i915); 5527 return 0; 5528 5529 err_unlock: 5530 mutex_unlock(&i915->drm.struct_mutex); 5531 intel_runtime_pm_put(i915); 5532 return ret; 5533 } 5534 5535 void i915_gem_suspend_late(struct drm_i915_private *i915) 5536 { 5537 struct drm_i915_gem_object *obj; 5538 struct list_head *phases[] = { 5539 &i915->mm.unbound_list, 5540 &i915->mm.bound_list, 5541 NULL 5542 }, **phase; 5543 5544 /* 5545 * Neither the BIOS, ourselves or any other kernel 5546 * expects the system to be in execlists mode on startup, 5547 * so we need to reset the GPU back to legacy mode. And the only 5548 * known way to disable logical contexts is through a GPU reset. 5549 * 5550 * So in order to leave the system in a known default configuration, 5551 * always reset the GPU upon unload and suspend. Afterwards we then 5552 * clean up the GEM state tracking, flushing off the requests and 5553 * leaving the system in a known idle state. 5554 * 5555 * Note that is of the upmost importance that the GPU is idle and 5556 * all stray writes are flushed *before* we dismantle the backing 5557 * storage for the pinned objects. 5558 * 5559 * However, since we are uncertain that resetting the GPU on older 5560 * machines is a good idea, we don't - just in case it leaves the 5561 * machine in an unusable condition. 5562 */ 5563 5564 mutex_lock(&i915->drm.struct_mutex); 5565 for (phase = phases; *phase; phase++) { 5566 list_for_each_entry(obj, *phase, mm.link) 5567 WARN_ON(i915_gem_object_set_to_gtt_domain(obj, false)); 5568 } 5569 mutex_unlock(&i915->drm.struct_mutex); 5570 5571 intel_uc_sanitize(i915); 5572 i915_gem_sanitize(i915); 5573 } 5574 5575 void i915_gem_resume(struct drm_i915_private *i915) 5576 { 5577 GEM_TRACE("\n"); 5578 5579 WARN_ON(i915->gt.awake); 5580 5581 mutex_lock(&i915->drm.struct_mutex); 5582 intel_uncore_forcewake_get(i915, FORCEWAKE_ALL); 5583 5584 i915_gem_restore_gtt_mappings(i915); 5585 i915_gem_restore_fences(i915); 5586 5587 /* 5588 * As we didn't flush the kernel context before suspend, we cannot 5589 * guarantee that the context image is complete. So let's just reset 5590 * it and start again. 5591 */ 5592 i915->gt.resume(i915); 5593 5594 if (i915_gem_init_hw(i915)) 5595 goto err_wedged; 5596 5597 intel_uc_resume(i915); 5598 5599 /* Always reload a context for powersaving. */ 5600 if (i915_gem_switch_to_kernel_context(i915)) 5601 goto err_wedged; 5602 5603 out_unlock: 5604 intel_uncore_forcewake_put(i915, FORCEWAKE_ALL); 5605 mutex_unlock(&i915->drm.struct_mutex); 5606 return; 5607 5608 err_wedged: 5609 if (!i915_terminally_wedged(&i915->gpu_error)) { 5610 DRM_ERROR("failed to re-initialize GPU, declaring wedged!\n"); 5611 i915_gem_set_wedged(i915); 5612 } 5613 goto out_unlock; 5614 } 5615 5616 void i915_gem_init_swizzling(struct drm_i915_private *dev_priv) 5617 { 5618 if (INTEL_GEN(dev_priv) < 5 || 5619 dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE) 5620 return; 5621 5622 I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) | 5623 DISP_TILE_SURFACE_SWIZZLING); 5624 5625 if (IS_GEN5(dev_priv)) 5626 return; 5627 5628 I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_SWZCTL); 5629 if (IS_GEN6(dev_priv)) 5630 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_SNB)); 5631 else if (IS_GEN7(dev_priv)) 5632 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_IVB)); 5633 else if (IS_GEN8(dev_priv)) 5634 I915_WRITE(GAMTARBMODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_BDW)); 5635 else 5636 BUG(); 5637 } 5638 5639 static void init_unused_ring(struct drm_i915_private *dev_priv, u32 base) 5640 { 5641 I915_WRITE(RING_CTL(base), 0); 5642 I915_WRITE(RING_HEAD(base), 0); 5643 I915_WRITE(RING_TAIL(base), 0); 5644 I915_WRITE(RING_START(base), 0); 5645 } 5646 5647 static void init_unused_rings(struct drm_i915_private *dev_priv) 5648 { 5649 if (IS_I830(dev_priv)) { 5650 init_unused_ring(dev_priv, PRB1_BASE); 5651 init_unused_ring(dev_priv, SRB0_BASE); 5652 init_unused_ring(dev_priv, SRB1_BASE); 5653 init_unused_ring(dev_priv, SRB2_BASE); 5654 init_unused_ring(dev_priv, SRB3_BASE); 5655 } else if (IS_GEN2(dev_priv)) { 5656 init_unused_ring(dev_priv, SRB0_BASE); 5657 init_unused_ring(dev_priv, SRB1_BASE); 5658 } else if (IS_GEN3(dev_priv)) { 5659 init_unused_ring(dev_priv, PRB1_BASE); 5660 init_unused_ring(dev_priv, PRB2_BASE); 5661 } 5662 } 5663 5664 static int __i915_gem_restart_engines(void *data) 5665 { 5666 struct drm_i915_private *i915 = data; 5667 struct intel_engine_cs *engine; 5668 enum intel_engine_id id; 5669 int err; 5670 5671 for_each_engine(engine, i915, id) { 5672 err = engine->init_hw(engine); 5673 if (err) { 5674 DRM_ERROR("Failed to restart %s (%d)\n", 5675 engine->name, err); 5676 return err; 5677 } 5678 } 5679 5680 return 0; 5681 } 5682 5683 int i915_gem_init_hw(struct drm_i915_private *dev_priv) 5684 { 5685 int ret; 5686 5687 dev_priv->gt.last_init_time = ktime_get(); 5688 5689 /* Double layer security blanket, see i915_gem_init() */ 5690 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 5691 5692 if (HAS_EDRAM(dev_priv) && INTEL_GEN(dev_priv) < 9) 5693 I915_WRITE(HSW_IDICR, I915_READ(HSW_IDICR) | IDIHASHMSK(0xf)); 5694 5695 if (IS_HASWELL(dev_priv)) 5696 I915_WRITE(MI_PREDICATE_RESULT_2, IS_HSW_GT3(dev_priv) ? 5697 LOWER_SLICE_ENABLED : LOWER_SLICE_DISABLED); 5698 5699 if (HAS_PCH_NOP(dev_priv)) { 5700 if (IS_IVYBRIDGE(dev_priv)) { 5701 u32 temp = I915_READ(GEN7_MSG_CTL); 5702 temp &= ~(WAIT_FOR_PCH_FLR_ACK | WAIT_FOR_PCH_RESET_ACK); 5703 I915_WRITE(GEN7_MSG_CTL, temp); 5704 } else if (INTEL_GEN(dev_priv) >= 7) { 5705 u32 temp = I915_READ(HSW_NDE_RSTWRN_OPT); 5706 temp &= ~RESET_PCH_HANDSHAKE_ENABLE; 5707 I915_WRITE(HSW_NDE_RSTWRN_OPT, temp); 5708 } 5709 } 5710 5711 intel_gt_workarounds_apply(dev_priv); 5712 5713 i915_gem_init_swizzling(dev_priv); 5714 5715 /* 5716 * At least 830 can leave some of the unused rings 5717 * "active" (ie. head != tail) after resume which 5718 * will prevent c3 entry. Makes sure all unused rings 5719 * are totally idle. 5720 */ 5721 init_unused_rings(dev_priv); 5722 5723 BUG_ON(!dev_priv->kernel_context); 5724 if (i915_terminally_wedged(&dev_priv->gpu_error)) { 5725 ret = -EIO; 5726 goto out; 5727 } 5728 5729 ret = i915_ppgtt_init_hw(dev_priv); 5730 if (ret) { 5731 DRM_ERROR("Enabling PPGTT failed (%d)\n", ret); 5732 goto out; 5733 } 5734 5735 ret = intel_wopcm_init_hw(&dev_priv->wopcm); 5736 if (ret) { 5737 DRM_ERROR("Enabling WOPCM failed (%d)\n", ret); 5738 goto out; 5739 } 5740 5741 /* We can't enable contexts until all firmware is loaded */ 5742 ret = intel_uc_init_hw(dev_priv); 5743 if (ret) { 5744 DRM_ERROR("Enabling uc failed (%d)\n", ret); 5745 goto out; 5746 } 5747 5748 intel_mocs_init_l3cc_table(dev_priv); 5749 5750 /* Only when the HW is re-initialised, can we replay the requests */ 5751 ret = __i915_gem_restart_engines(dev_priv); 5752 if (ret) 5753 goto cleanup_uc; 5754 5755 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 5756 5757 return 0; 5758 5759 cleanup_uc: 5760 intel_uc_fini_hw(dev_priv); 5761 out: 5762 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 5763 5764 return ret; 5765 } 5766 5767 static int __intel_engines_record_defaults(struct drm_i915_private *i915) 5768 { 5769 struct i915_gem_context *ctx; 5770 struct intel_engine_cs *engine; 5771 enum intel_engine_id id; 5772 int err; 5773 5774 /* 5775 * As we reset the gpu during very early sanitisation, the current 5776 * register state on the GPU should reflect its defaults values. 5777 * We load a context onto the hw (with restore-inhibit), then switch 5778 * over to a second context to save that default register state. We 5779 * can then prime every new context with that state so they all start 5780 * from the same default HW values. 5781 */ 5782 5783 ctx = i915_gem_context_create_kernel(i915, 0); 5784 if (IS_ERR(ctx)) 5785 return PTR_ERR(ctx); 5786 5787 for_each_engine(engine, i915, id) { 5788 struct i915_request *rq; 5789 5790 rq = i915_request_alloc(engine, ctx); 5791 if (IS_ERR(rq)) { 5792 err = PTR_ERR(rq); 5793 goto out_ctx; 5794 } 5795 5796 err = 0; 5797 if (engine->init_context) 5798 err = engine->init_context(rq); 5799 5800 i915_request_add(rq); 5801 if (err) 5802 goto err_active; 5803 } 5804 5805 err = i915_gem_switch_to_kernel_context(i915); 5806 if (err) 5807 goto err_active; 5808 5809 if (i915_gem_wait_for_idle(i915, I915_WAIT_LOCKED, HZ / 5)) { 5810 i915_gem_set_wedged(i915); 5811 err = -EIO; /* Caller will declare us wedged */ 5812 goto err_active; 5813 } 5814 5815 assert_kernel_context_is_current(i915); 5816 5817 for_each_engine(engine, i915, id) { 5818 struct i915_vma *state; 5819 5820 state = to_intel_context(ctx, engine)->state; 5821 if (!state) 5822 continue; 5823 5824 /* 5825 * As we will hold a reference to the logical state, it will 5826 * not be torn down with the context, and importantly the 5827 * object will hold onto its vma (making it possible for a 5828 * stray GTT write to corrupt our defaults). Unmap the vma 5829 * from the GTT to prevent such accidents and reclaim the 5830 * space. 5831 */ 5832 err = i915_vma_unbind(state); 5833 if (err) 5834 goto err_active; 5835 5836 err = i915_gem_object_set_to_cpu_domain(state->obj, false); 5837 if (err) 5838 goto err_active; 5839 5840 engine->default_state = i915_gem_object_get(state->obj); 5841 } 5842 5843 if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) { 5844 unsigned int found = intel_engines_has_context_isolation(i915); 5845 5846 /* 5847 * Make sure that classes with multiple engine instances all 5848 * share the same basic configuration. 5849 */ 5850 for_each_engine(engine, i915, id) { 5851 unsigned int bit = BIT(engine->uabi_class); 5852 unsigned int expected = engine->default_state ? bit : 0; 5853 5854 if ((found & bit) != expected) { 5855 DRM_ERROR("mismatching default context state for class %d on engine %s\n", 5856 engine->uabi_class, engine->name); 5857 } 5858 } 5859 } 5860 5861 out_ctx: 5862 i915_gem_context_set_closed(ctx); 5863 i915_gem_context_put(ctx); 5864 return err; 5865 5866 err_active: 5867 /* 5868 * If we have to abandon now, we expect the engines to be idle 5869 * and ready to be torn-down. First try to flush any remaining 5870 * request, ensure we are pointing at the kernel context and 5871 * then remove it. 5872 */ 5873 if (WARN_ON(i915_gem_switch_to_kernel_context(i915))) 5874 goto out_ctx; 5875 5876 if (WARN_ON(i915_gem_wait_for_idle(i915, 5877 I915_WAIT_LOCKED, 5878 MAX_SCHEDULE_TIMEOUT))) 5879 goto out_ctx; 5880 5881 i915_gem_contexts_lost(i915); 5882 goto out_ctx; 5883 } 5884 5885 int i915_gem_init(struct drm_i915_private *dev_priv) 5886 { 5887 int ret; 5888 5889 /* We need to fallback to 4K pages if host doesn't support huge gtt. */ 5890 if (intel_vgpu_active(dev_priv) && !intel_vgpu_has_huge_gtt(dev_priv)) 5891 mkwrite_device_info(dev_priv)->page_sizes = 5892 I915_GTT_PAGE_SIZE_4K; 5893 5894 dev_priv->mm.unordered_timeline = dma_fence_context_alloc(1); 5895 5896 if (HAS_LOGICAL_RING_CONTEXTS(dev_priv)) { 5897 dev_priv->gt.resume = intel_lr_context_resume; 5898 dev_priv->gt.cleanup_engine = intel_logical_ring_cleanup; 5899 } else { 5900 dev_priv->gt.resume = intel_legacy_submission_resume; 5901 dev_priv->gt.cleanup_engine = intel_engine_cleanup; 5902 } 5903 5904 ret = i915_gem_init_userptr(dev_priv); 5905 if (ret) 5906 return ret; 5907 5908 ret = intel_uc_init_misc(dev_priv); 5909 if (ret) 5910 return ret; 5911 5912 ret = intel_wopcm_init(&dev_priv->wopcm); 5913 if (ret) 5914 goto err_uc_misc; 5915 5916 /* This is just a security blanket to placate dragons. 5917 * On some systems, we very sporadically observe that the first TLBs 5918 * used by the CS may be stale, despite us poking the TLB reset. If 5919 * we hold the forcewake during initialisation these problems 5920 * just magically go away. 5921 */ 5922 mutex_lock(&dev_priv->drm.struct_mutex); 5923 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 5924 5925 ret = i915_gem_init_ggtt(dev_priv); 5926 if (ret) { 5927 GEM_BUG_ON(ret == -EIO); 5928 goto err_unlock; 5929 } 5930 5931 ret = i915_gem_contexts_init(dev_priv); 5932 if (ret) { 5933 GEM_BUG_ON(ret == -EIO); 5934 goto err_ggtt; 5935 } 5936 5937 ret = intel_engines_init(dev_priv); 5938 if (ret) { 5939 GEM_BUG_ON(ret == -EIO); 5940 goto err_context; 5941 } 5942 5943 intel_init_gt_powersave(dev_priv); 5944 5945 ret = intel_uc_init(dev_priv); 5946 if (ret) 5947 goto err_pm; 5948 5949 ret = i915_gem_init_hw(dev_priv); 5950 if (ret) 5951 goto err_uc_init; 5952 5953 /* 5954 * Despite its name intel_init_clock_gating applies both display 5955 * clock gating workarounds; GT mmio workarounds and the occasional 5956 * GT power context workaround. Worse, sometimes it includes a context 5957 * register workaround which we need to apply before we record the 5958 * default HW state for all contexts. 5959 * 5960 * FIXME: break up the workarounds and apply them at the right time! 5961 */ 5962 intel_init_clock_gating(dev_priv); 5963 5964 ret = __intel_engines_record_defaults(dev_priv); 5965 if (ret) 5966 goto err_init_hw; 5967 5968 if (i915_inject_load_failure()) { 5969 ret = -ENODEV; 5970 goto err_init_hw; 5971 } 5972 5973 if (i915_inject_load_failure()) { 5974 ret = -EIO; 5975 goto err_init_hw; 5976 } 5977 5978 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 5979 mutex_unlock(&dev_priv->drm.struct_mutex); 5980 5981 return 0; 5982 5983 /* 5984 * Unwinding is complicated by that we want to handle -EIO to mean 5985 * disable GPU submission but keep KMS alive. We want to mark the 5986 * HW as irrevisibly wedged, but keep enough state around that the 5987 * driver doesn't explode during runtime. 5988 */ 5989 err_init_hw: 5990 mutex_unlock(&dev_priv->drm.struct_mutex); 5991 5992 WARN_ON(i915_gem_suspend(dev_priv)); 5993 i915_gem_suspend_late(dev_priv); 5994 5995 i915_gem_drain_workqueue(dev_priv); 5996 5997 mutex_lock(&dev_priv->drm.struct_mutex); 5998 intel_uc_fini_hw(dev_priv); 5999 err_uc_init: 6000 intel_uc_fini(dev_priv); 6001 err_pm: 6002 if (ret != -EIO) { 6003 intel_cleanup_gt_powersave(dev_priv); 6004 i915_gem_cleanup_engines(dev_priv); 6005 } 6006 err_context: 6007 if (ret != -EIO) 6008 i915_gem_contexts_fini(dev_priv); 6009 err_ggtt: 6010 err_unlock: 6011 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 6012 mutex_unlock(&dev_priv->drm.struct_mutex); 6013 6014 err_uc_misc: 6015 intel_uc_fini_misc(dev_priv); 6016 6017 if (ret != -EIO) 6018 i915_gem_cleanup_userptr(dev_priv); 6019 6020 if (ret == -EIO) { 6021 /* 6022 * Allow engine initialisation to fail by marking the GPU as 6023 * wedged. But we only want to do this where the GPU is angry, 6024 * for all other failure, such as an allocation failure, bail. 6025 */ 6026 if (!i915_terminally_wedged(&dev_priv->gpu_error)) { 6027 i915_load_error(dev_priv, 6028 "Failed to initialize GPU, declaring it wedged!\n"); 6029 i915_gem_set_wedged(dev_priv); 6030 } 6031 ret = 0; 6032 } 6033 6034 i915_gem_drain_freed_objects(dev_priv); 6035 return ret; 6036 } 6037 6038 void i915_gem_fini(struct drm_i915_private *dev_priv) 6039 { 6040 i915_gem_suspend_late(dev_priv); 6041 6042 /* Flush any outstanding unpin_work. */ 6043 i915_gem_drain_workqueue(dev_priv); 6044 6045 mutex_lock(&dev_priv->drm.struct_mutex); 6046 intel_uc_fini_hw(dev_priv); 6047 intel_uc_fini(dev_priv); 6048 i915_gem_cleanup_engines(dev_priv); 6049 i915_gem_contexts_fini(dev_priv); 6050 mutex_unlock(&dev_priv->drm.struct_mutex); 6051 6052 intel_uc_fini_misc(dev_priv); 6053 i915_gem_cleanup_userptr(dev_priv); 6054 6055 i915_gem_drain_freed_objects(dev_priv); 6056 6057 WARN_ON(!list_empty(&dev_priv->contexts.list)); 6058 } 6059 6060 void i915_gem_init_mmio(struct drm_i915_private *i915) 6061 { 6062 i915_gem_sanitize(i915); 6063 } 6064 6065 void 6066 i915_gem_cleanup_engines(struct drm_i915_private *dev_priv) 6067 { 6068 struct intel_engine_cs *engine; 6069 enum intel_engine_id id; 6070 6071 for_each_engine(engine, dev_priv, id) 6072 dev_priv->gt.cleanup_engine(engine); 6073 } 6074 6075 void 6076 i915_gem_load_init_fences(struct drm_i915_private *dev_priv) 6077 { 6078 int i; 6079 6080 if (INTEL_GEN(dev_priv) >= 7 && !IS_VALLEYVIEW(dev_priv) && 6081 !IS_CHERRYVIEW(dev_priv)) 6082 dev_priv->num_fence_regs = 32; 6083 else if (INTEL_GEN(dev_priv) >= 4 || 6084 IS_I945G(dev_priv) || IS_I945GM(dev_priv) || 6085 IS_G33(dev_priv) || IS_PINEVIEW(dev_priv)) 6086 dev_priv->num_fence_regs = 16; 6087 else 6088 dev_priv->num_fence_regs = 8; 6089 6090 if (intel_vgpu_active(dev_priv)) 6091 dev_priv->num_fence_regs = 6092 I915_READ(vgtif_reg(avail_rs.fence_num)); 6093 6094 /* Initialize fence registers to zero */ 6095 for (i = 0; i < dev_priv->num_fence_regs; i++) { 6096 struct drm_i915_fence_reg *fence = &dev_priv->fence_regs[i]; 6097 6098 fence->i915 = dev_priv; 6099 fence->id = i; 6100 list_add_tail(&fence->link, &dev_priv->mm.fence_list); 6101 } 6102 i915_gem_restore_fences(dev_priv); 6103 6104 i915_gem_detect_bit_6_swizzle(dev_priv); 6105 } 6106 6107 static void i915_gem_init__mm(struct drm_i915_private *i915) 6108 { 6109 mtx_init(&i915->mm.object_stat_lock, IPL_NONE); 6110 mtx_init(&i915->mm.obj_lock, IPL_NONE); 6111 mtx_init(&i915->mm.free_lock, IPL_NONE); 6112 6113 init_llist_head(&i915->mm.free_list); 6114 6115 INIT_LIST_HEAD(&i915->mm.unbound_list); 6116 INIT_LIST_HEAD(&i915->mm.bound_list); 6117 INIT_LIST_HEAD(&i915->mm.fence_list); 6118 INIT_LIST_HEAD(&i915->mm.userfault_list); 6119 6120 INIT_WORK(&i915->mm.free_work, __i915_gem_free_work); 6121 } 6122 6123 int i915_gem_init_early(struct drm_i915_private *dev_priv) 6124 { 6125 int err = -ENOMEM; 6126 6127 #ifdef __linux__ 6128 dev_priv->objects = KMEM_CACHE(drm_i915_gem_object, SLAB_HWCACHE_ALIGN); 6129 if (!dev_priv->objects) 6130 goto err_out; 6131 6132 dev_priv->vmas = KMEM_CACHE(i915_vma, SLAB_HWCACHE_ALIGN); 6133 if (!dev_priv->vmas) 6134 goto err_objects; 6135 6136 dev_priv->luts = KMEM_CACHE(i915_lut_handle, 0); 6137 if (!dev_priv->luts) 6138 goto err_vmas; 6139 6140 dev_priv->requests = KMEM_CACHE(i915_request, 6141 SLAB_HWCACHE_ALIGN | 6142 SLAB_RECLAIM_ACCOUNT | 6143 SLAB_TYPESAFE_BY_RCU); 6144 if (!dev_priv->requests) 6145 goto err_luts; 6146 6147 dev_priv->dependencies = KMEM_CACHE(i915_dependency, 6148 SLAB_HWCACHE_ALIGN | 6149 SLAB_RECLAIM_ACCOUNT); 6150 if (!dev_priv->dependencies) 6151 goto err_requests; 6152 6153 dev_priv->priorities = KMEM_CACHE(i915_priolist, SLAB_HWCACHE_ALIGN); 6154 if (!dev_priv->priorities) 6155 goto err_dependencies; 6156 #else 6157 pool_init(&dev_priv->objects, sizeof(struct drm_i915_gem_object), 6158 0, IPL_NONE, 0, "drmobj", NULL); 6159 pool_init(&dev_priv->vmas, sizeof(struct i915_vma), 6160 0, IPL_NONE, 0, "drmvma", NULL); 6161 pool_init(&dev_priv->luts, sizeof(struct i915_lut_handle), 6162 0, IPL_NONE, 0, "drmlut", NULL); 6163 pool_init(&dev_priv->requests, sizeof(struct i915_request), 6164 0, IPL_TTY, 0, "drmreq", NULL); 6165 pool_init(&dev_priv->dependencies, sizeof(struct i915_dependency), 6166 0, IPL_TTY, 0, "drmdep", NULL); 6167 pool_init(&dev_priv->priorities, sizeof(struct i915_priolist), 6168 0, IPL_TTY, 0, "drmpri", NULL); 6169 #endif 6170 6171 INIT_LIST_HEAD(&dev_priv->gt.timelines); 6172 INIT_LIST_HEAD(&dev_priv->gt.active_rings); 6173 INIT_LIST_HEAD(&dev_priv->gt.closed_vma); 6174 6175 i915_gem_init__mm(dev_priv); 6176 6177 INIT_DELAYED_WORK(&dev_priv->gt.retire_work, 6178 i915_gem_retire_work_handler); 6179 INIT_DELAYED_WORK(&dev_priv->gt.idle_work, 6180 i915_gem_idle_work_handler); 6181 init_waitqueue_head(&dev_priv->gpu_error.wait_queue); 6182 init_waitqueue_head(&dev_priv->gpu_error.reset_queue); 6183 6184 atomic_set(&dev_priv->mm.bsd_engine_dispatch_index, 0); 6185 6186 mtx_init(&dev_priv->fb_tracking.lock, IPL_TTY); 6187 6188 err = i915_gemfs_init(dev_priv); 6189 if (err) 6190 DRM_NOTE("Unable to create a private tmpfs mount, hugepage support will be disabled(%d).\n", err); 6191 6192 return 0; 6193 6194 #ifdef __linux__ 6195 err_dependencies: 6196 kmem_cache_destroy(dev_priv->dependencies); 6197 err_requests: 6198 kmem_cache_destroy(dev_priv->requests); 6199 err_luts: 6200 kmem_cache_destroy(dev_priv->luts); 6201 err_vmas: 6202 kmem_cache_destroy(dev_priv->vmas); 6203 err_objects: 6204 kmem_cache_destroy(dev_priv->objects); 6205 err_out: 6206 return err; 6207 #endif 6208 } 6209 6210 void i915_gem_cleanup_early(struct drm_i915_private *dev_priv) 6211 { 6212 i915_gem_drain_freed_objects(dev_priv); 6213 GEM_BUG_ON(!llist_empty(&dev_priv->mm.free_list)); 6214 GEM_BUG_ON(atomic_read(&dev_priv->mm.free_count)); 6215 WARN_ON(dev_priv->mm.object_count); 6216 WARN_ON(!list_empty(&dev_priv->gt.timelines)); 6217 6218 #ifdef __linux__ 6219 kmem_cache_destroy(dev_priv->priorities); 6220 kmem_cache_destroy(dev_priv->dependencies); 6221 kmem_cache_destroy(dev_priv->requests); 6222 kmem_cache_destroy(dev_priv->luts); 6223 kmem_cache_destroy(dev_priv->vmas); 6224 kmem_cache_destroy(dev_priv->objects); 6225 #endif 6226 6227 /* And ensure that our DESTROY_BY_RCU slabs are truly destroyed */ 6228 rcu_barrier(); 6229 6230 i915_gemfs_fini(dev_priv); 6231 } 6232 6233 int i915_gem_freeze(struct drm_i915_private *dev_priv) 6234 { 6235 /* Discard all purgeable objects, let userspace recover those as 6236 * required after resuming. 6237 */ 6238 i915_gem_shrink_all(dev_priv); 6239 6240 return 0; 6241 } 6242 6243 int i915_gem_freeze_late(struct drm_i915_private *i915) 6244 { 6245 struct drm_i915_gem_object *obj; 6246 struct list_head *phases[] = { 6247 &i915->mm.unbound_list, 6248 &i915->mm.bound_list, 6249 NULL 6250 }, **phase; 6251 6252 /* 6253 * Called just before we write the hibernation image. 6254 * 6255 * We need to update the domain tracking to reflect that the CPU 6256 * will be accessing all the pages to create and restore from the 6257 * hibernation, and so upon restoration those pages will be in the 6258 * CPU domain. 6259 * 6260 * To make sure the hibernation image contains the latest state, 6261 * we update that state just before writing out the image. 6262 * 6263 * To try and reduce the hibernation image, we manually shrink 6264 * the objects as well, see i915_gem_freeze() 6265 */ 6266 6267 i915_gem_shrink(i915, -1UL, NULL, I915_SHRINK_UNBOUND); 6268 i915_gem_drain_freed_objects(i915); 6269 6270 mutex_lock(&i915->drm.struct_mutex); 6271 for (phase = phases; *phase; phase++) { 6272 list_for_each_entry(obj, *phase, mm.link) 6273 WARN_ON(i915_gem_object_set_to_cpu_domain(obj, true)); 6274 } 6275 mutex_unlock(&i915->drm.struct_mutex); 6276 6277 return 0; 6278 } 6279 6280 void i915_gem_release(struct drm_device *dev, struct drm_file *file) 6281 { 6282 struct drm_i915_file_private *file_priv = file->driver_priv; 6283 struct i915_request *request; 6284 6285 /* Clean up our request list when the client is going away, so that 6286 * later retire_requests won't dereference our soon-to-be-gone 6287 * file_priv. 6288 */ 6289 spin_lock(&file_priv->mm.lock); 6290 list_for_each_entry(request, &file_priv->mm.request_list, client_link) 6291 request->file_priv = NULL; 6292 spin_unlock(&file_priv->mm.lock); 6293 } 6294 6295 int i915_gem_open(struct drm_i915_private *i915, struct drm_file *file) 6296 { 6297 struct drm_i915_file_private *file_priv; 6298 int ret; 6299 6300 DRM_DEBUG("\n"); 6301 6302 file_priv = kzalloc(sizeof(*file_priv), GFP_KERNEL); 6303 if (!file_priv) 6304 return -ENOMEM; 6305 6306 file->driver_priv = file_priv; 6307 file_priv->dev_priv = i915; 6308 file_priv->file = file; 6309 6310 mtx_init(&file_priv->mm.lock, IPL_NONE); 6311 INIT_LIST_HEAD(&file_priv->mm.request_list); 6312 6313 file_priv->bsd_engine = -1; 6314 file_priv->hang_timestamp = jiffies; 6315 6316 ret = i915_gem_context_open(i915, file); 6317 if (ret) 6318 kfree(file_priv); 6319 6320 return ret; 6321 } 6322 6323 /** 6324 * i915_gem_track_fb - update frontbuffer tracking 6325 * @old: current GEM buffer for the frontbuffer slots 6326 * @new: new GEM buffer for the frontbuffer slots 6327 * @frontbuffer_bits: bitmask of frontbuffer slots 6328 * 6329 * This updates the frontbuffer tracking bits @frontbuffer_bits by clearing them 6330 * from @old and setting them in @new. Both @old and @new can be NULL. 6331 */ 6332 void i915_gem_track_fb(struct drm_i915_gem_object *old, 6333 struct drm_i915_gem_object *new, 6334 unsigned frontbuffer_bits) 6335 { 6336 /* Control of individual bits within the mask are guarded by 6337 * the owning plane->mutex, i.e. we can never see concurrent 6338 * manipulation of individual bits. But since the bitfield as a whole 6339 * is updated using RMW, we need to use atomics in order to update 6340 * the bits. 6341 */ 6342 BUILD_BUG_ON(INTEL_FRONTBUFFER_BITS_PER_PIPE * I915_MAX_PIPES > 6343 sizeof(atomic_t) * BITS_PER_BYTE); 6344 6345 if (old) { 6346 WARN_ON(!(atomic_read(&old->frontbuffer_bits) & frontbuffer_bits)); 6347 atomic_andnot(frontbuffer_bits, &old->frontbuffer_bits); 6348 } 6349 6350 if (new) { 6351 WARN_ON(atomic_read(&new->frontbuffer_bits) & frontbuffer_bits); 6352 atomic_or(frontbuffer_bits, &new->frontbuffer_bits); 6353 } 6354 } 6355 6356 /* Allocate a new GEM object and fill it with the supplied data */ 6357 struct drm_i915_gem_object * 6358 i915_gem_object_create_from_data(struct drm_i915_private *dev_priv, 6359 const void *data, size_t size) 6360 { 6361 STUB(); 6362 return NULL; 6363 #ifdef notyet 6364 struct drm_i915_gem_object *obj; 6365 struct file *file; 6366 size_t offset; 6367 int err; 6368 6369 obj = i915_gem_object_create(dev_priv, round_up(size, PAGE_SIZE)); 6370 if (IS_ERR(obj)) 6371 return obj; 6372 6373 GEM_BUG_ON(obj->write_domain != I915_GEM_DOMAIN_CPU); 6374 6375 file = obj->base.filp; 6376 offset = 0; 6377 do { 6378 unsigned int len = min_t(typeof(size), size, PAGE_SIZE); 6379 struct vm_page *page; 6380 void *pgdata, *vaddr; 6381 6382 err = pagecache_write_begin(file, file->f_mapping, 6383 offset, len, 0, 6384 &page, &pgdata); 6385 if (err < 0) 6386 goto fail; 6387 6388 vaddr = kmap(page); 6389 memcpy(vaddr, data, len); 6390 kunmap(vaddr); 6391 6392 err = pagecache_write_end(file, file->f_mapping, 6393 offset, len, len, 6394 page, pgdata); 6395 if (err < 0) 6396 goto fail; 6397 6398 size -= len; 6399 data += len; 6400 offset += len; 6401 } while (size); 6402 6403 return obj; 6404 6405 fail: 6406 i915_gem_object_put(obj); 6407 return ERR_PTR(err); 6408 #endif 6409 } 6410 6411 struct scatterlist * 6412 i915_gem_object_get_sg(struct drm_i915_gem_object *obj, 6413 unsigned int n, 6414 unsigned int *offset) 6415 { 6416 struct i915_gem_object_page_iter *iter = &obj->mm.get_page; 6417 struct scatterlist *sg; 6418 unsigned int idx, count; 6419 6420 might_sleep(); 6421 GEM_BUG_ON(n >= obj->base.size >> PAGE_SHIFT); 6422 GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); 6423 6424 /* As we iterate forward through the sg, we record each entry in a 6425 * radixtree for quick repeated (backwards) lookups. If we have seen 6426 * this index previously, we will have an entry for it. 6427 * 6428 * Initial lookup is O(N), but this is amortized to O(1) for 6429 * sequential page access (where each new request is consecutive 6430 * to the previous one). Repeated lookups are O(lg(obj->base.size)), 6431 * i.e. O(1) with a large constant! 6432 */ 6433 if (n < READ_ONCE(iter->sg_idx)) 6434 goto lookup; 6435 6436 mutex_lock(&iter->lock); 6437 6438 /* We prefer to reuse the last sg so that repeated lookup of this 6439 * (or the subsequent) sg are fast - comparing against the last 6440 * sg is faster than going through the radixtree. 6441 */ 6442 6443 sg = iter->sg_pos; 6444 idx = iter->sg_idx; 6445 count = __sg_page_count(sg); 6446 6447 while (idx + count <= n) { 6448 unsigned long exception, i; 6449 int ret; 6450 6451 /* If we cannot allocate and insert this entry, or the 6452 * individual pages from this range, cancel updating the 6453 * sg_idx so that on this lookup we are forced to linearly 6454 * scan onwards, but on future lookups we will try the 6455 * insertion again (in which case we need to be careful of 6456 * the error return reporting that we have already inserted 6457 * this index). 6458 */ 6459 ret = radix_tree_insert(&iter->radix, idx, sg); 6460 if (ret && ret != -EEXIST) 6461 goto scan; 6462 6463 exception = 6464 RADIX_TREE_EXCEPTIONAL_ENTRY | 6465 idx << RADIX_TREE_EXCEPTIONAL_SHIFT; 6466 for (i = 1; i < count; i++) { 6467 ret = radix_tree_insert(&iter->radix, idx + i, 6468 (void *)exception); 6469 if (ret && ret != -EEXIST) 6470 goto scan; 6471 } 6472 6473 idx += count; 6474 sg = ____sg_next(sg); 6475 count = __sg_page_count(sg); 6476 } 6477 6478 scan: 6479 iter->sg_pos = sg; 6480 iter->sg_idx = idx; 6481 6482 mutex_unlock(&iter->lock); 6483 6484 if (unlikely(n < idx)) /* insertion completed by another thread */ 6485 goto lookup; 6486 6487 /* In case we failed to insert the entry into the radixtree, we need 6488 * to look beyond the current sg. 6489 */ 6490 while (idx + count <= n) { 6491 idx += count; 6492 sg = ____sg_next(sg); 6493 count = __sg_page_count(sg); 6494 } 6495 6496 *offset = n - idx; 6497 return sg; 6498 6499 lookup: 6500 rcu_read_lock(); 6501 6502 sg = radix_tree_lookup(&iter->radix, n); 6503 GEM_BUG_ON(!sg); 6504 6505 /* If this index is in the middle of multi-page sg entry, 6506 * the radixtree will contain an exceptional entry that points 6507 * to the start of that range. We will return the pointer to 6508 * the base page and the offset of this page within the 6509 * sg entry's range. 6510 */ 6511 *offset = 0; 6512 if (unlikely(radix_tree_exception(sg))) { 6513 unsigned long base = 6514 (unsigned long)sg >> RADIX_TREE_EXCEPTIONAL_SHIFT; 6515 6516 sg = radix_tree_lookup(&iter->radix, base); 6517 GEM_BUG_ON(!sg); 6518 6519 *offset = n - base; 6520 } 6521 6522 rcu_read_unlock(); 6523 6524 return sg; 6525 } 6526 6527 struct vm_page * 6528 i915_gem_object_get_page(struct drm_i915_gem_object *obj, unsigned int n) 6529 { 6530 struct scatterlist *sg; 6531 unsigned int offset; 6532 6533 GEM_BUG_ON(!i915_gem_object_has_struct_page(obj)); 6534 6535 sg = i915_gem_object_get_sg(obj, n, &offset); 6536 return nth_page(sg_page(sg), offset); 6537 } 6538 6539 /* Like i915_gem_object_get_page(), but mark the returned page dirty */ 6540 struct vm_page * 6541 i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj, 6542 unsigned int n) 6543 { 6544 struct vm_page *page; 6545 6546 page = i915_gem_object_get_page(obj, n); 6547 if (!obj->mm.dirty) 6548 set_page_dirty(page); 6549 6550 return page; 6551 } 6552 6553 dma_addr_t 6554 i915_gem_object_get_dma_address(struct drm_i915_gem_object *obj, 6555 unsigned long n) 6556 { 6557 struct scatterlist *sg; 6558 unsigned int offset; 6559 6560 sg = i915_gem_object_get_sg(obj, n, &offset); 6561 return sg_dma_address(sg) + (offset << PAGE_SHIFT); 6562 } 6563 6564 int i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, int align) 6565 { 6566 struct sg_table *pages; 6567 int err; 6568 6569 if (align > obj->base.size) 6570 return -EINVAL; 6571 6572 if (obj->ops == &i915_gem_phys_ops) 6573 return 0; 6574 6575 if (obj->ops != &i915_gem_object_ops) 6576 return -EINVAL; 6577 6578 err = i915_gem_object_unbind(obj); 6579 if (err) 6580 return err; 6581 6582 mutex_lock(&obj->mm.lock); 6583 6584 if (obj->mm.madv != I915_MADV_WILLNEED) { 6585 err = -EFAULT; 6586 goto err_unlock; 6587 } 6588 6589 if (obj->mm.quirked) { 6590 err = -EFAULT; 6591 goto err_unlock; 6592 } 6593 6594 if (obj->mm.mapping) { 6595 err = -EBUSY; 6596 goto err_unlock; 6597 } 6598 6599 pages = __i915_gem_object_unset_pages(obj); 6600 6601 obj->ops = &i915_gem_phys_ops; 6602 6603 err = ____i915_gem_object_get_pages(obj); 6604 if (err) 6605 goto err_xfer; 6606 6607 /* Perma-pin (until release) the physical set of pages */ 6608 __i915_gem_object_pin_pages(obj); 6609 6610 if (!IS_ERR_OR_NULL(pages)) 6611 i915_gem_object_ops.put_pages(obj, pages); 6612 mutex_unlock(&obj->mm.lock); 6613 return 0; 6614 6615 err_xfer: 6616 obj->ops = &i915_gem_object_ops; 6617 if (!IS_ERR_OR_NULL(pages)) { 6618 unsigned int sg_page_sizes = i915_sg_page_sizes(pages->sgl); 6619 6620 __i915_gem_object_set_pages(obj, pages, sg_page_sizes); 6621 } 6622 err_unlock: 6623 mutex_unlock(&obj->mm.lock); 6624 return err; 6625 } 6626 6627 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) 6628 #include "selftests/scatterlist.c" 6629 #include "selftests/mock_gem_device.c" 6630 #include "selftests/huge_gem_object.c" 6631 #include "selftests/huge_pages.c" 6632 #include "selftests/i915_gem_object.c" 6633 #include "selftests/i915_gem_coherency.c" 6634 #endif 6635