1 /* 2 * Copyright © 2008-2015 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Eric Anholt <eric@anholt.net> 25 * 26 */ 27 28 #include <drm/drm_vma_manager.h> 29 #include <linux/dma-fence-array.h> 30 #include <linux/kthread.h> 31 #include <linux/dma-resv.h> 32 #include <linux/shmem_fs.h> 33 #include <linux/slab.h> 34 #include <linux/stop_machine.h> 35 #include <linux/swap.h> 36 #include <linux/pci.h> 37 #include <linux/dma-buf.h> 38 #include <linux/mman.h> 39 40 #include <dev/pci/agpvar.h> 41 42 #include "display/intel_display.h" 43 #include "display/intel_frontbuffer.h" 44 45 #include "gem/i915_gem_clflush.h" 46 #include "gem/i915_gem_context.h" 47 #include "gem/i915_gem_ioctls.h" 48 #include "gem/i915_gem_mman.h" 49 #include "gem/i915_gem_region.h" 50 #include "gt/intel_engine_user.h" 51 #include "gt/intel_gt.h" 52 #include "gt/intel_gt_pm.h" 53 #include "gt/intel_workarounds.h" 54 55 #include "i915_drv.h" 56 #include "i915_trace.h" 57 #include "i915_vgpu.h" 58 59 #include "intel_pm.h" 60 61 static int 62 insert_mappable_node(struct i915_ggtt *ggtt, struct drm_mm_node *node, u32 size) 63 { 64 int err; 65 66 err = mutex_lock_interruptible(&ggtt->vm.mutex); 67 if (err) 68 return err; 69 70 memset(node, 0, sizeof(*node)); 71 err = drm_mm_insert_node_in_range(&ggtt->vm.mm, node, 72 size, 0, I915_COLOR_UNEVICTABLE, 73 0, ggtt->mappable_end, 74 DRM_MM_INSERT_LOW); 75 76 mutex_unlock(&ggtt->vm.mutex); 77 78 return err; 79 } 80 81 static void 82 remove_mappable_node(struct i915_ggtt *ggtt, struct drm_mm_node *node) 83 { 84 mutex_lock(&ggtt->vm.mutex); 85 drm_mm_remove_node(node); 86 mutex_unlock(&ggtt->vm.mutex); 87 } 88 89 int 90 i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, 91 struct drm_file *file) 92 { 93 struct i915_ggtt *ggtt = &to_i915(dev)->ggtt; 94 struct drm_i915_gem_get_aperture *args = data; 95 struct i915_vma *vma; 96 u64 pinned; 97 98 if (mutex_lock_interruptible(&ggtt->vm.mutex)) 99 return -EINTR; 100 101 pinned = ggtt->vm.reserved; 102 list_for_each_entry(vma, &ggtt->vm.bound_list, vm_link) 103 if (i915_vma_is_pinned(vma)) 104 pinned += vma->node.size; 105 106 mutex_unlock(&ggtt->vm.mutex); 107 108 args->aper_size = ggtt->vm.total; 109 args->aper_available_size = args->aper_size - pinned; 110 111 return 0; 112 } 113 114 int i915_gem_object_unbind(struct drm_i915_gem_object *obj, 115 unsigned long flags) 116 { 117 struct intel_runtime_pm *rpm = &to_i915(obj->base.dev)->runtime_pm; 118 DRM_LIST_HEAD(still_in_list); 119 intel_wakeref_t wakeref; 120 struct i915_vma *vma; 121 int ret; 122 123 if (!atomic_read(&obj->bind_count)) 124 return 0; 125 126 /* 127 * As some machines use ACPI to handle runtime-resume callbacks, and 128 * ACPI is quite kmalloc happy, we cannot resume beneath the vm->mutex 129 * as they are required by the shrinker. Ergo, we wake the device up 130 * first just in case. 131 */ 132 wakeref = intel_runtime_pm_get(rpm); 133 134 try_again: 135 ret = 0; 136 spin_lock(&obj->vma.lock); 137 while (!ret && (vma = list_first_entry_or_null(&obj->vma.list, 138 struct i915_vma, 139 obj_link))) { 140 struct i915_address_space *vm = vma->vm; 141 142 list_move_tail(&vma->obj_link, &still_in_list); 143 if (!i915_vma_is_bound(vma, I915_VMA_BIND_MASK)) 144 continue; 145 146 ret = -EAGAIN; 147 if (!i915_vm_tryopen(vm)) 148 break; 149 150 /* Prevent vma being freed by i915_vma_parked as we unbind */ 151 vma = __i915_vma_get(vma); 152 spin_unlock(&obj->vma.lock); 153 154 if (vma) { 155 ret = -EBUSY; 156 if (flags & I915_GEM_OBJECT_UNBIND_ACTIVE || 157 !i915_vma_is_active(vma)) 158 ret = i915_vma_unbind(vma); 159 160 __i915_vma_put(vma); 161 } 162 163 i915_vm_close(vm); 164 spin_lock(&obj->vma.lock); 165 } 166 list_splice_init(&still_in_list, &obj->vma.list); 167 spin_unlock(&obj->vma.lock); 168 169 if (ret == -EAGAIN && flags & I915_GEM_OBJECT_UNBIND_BARRIER) { 170 rcu_barrier(); /* flush the i915_vm_release() */ 171 goto try_again; 172 } 173 174 intel_runtime_pm_put(rpm, wakeref); 175 176 return ret; 177 } 178 179 static int 180 i915_gem_phys_pwrite(struct drm_i915_gem_object *obj, 181 struct drm_i915_gem_pwrite *args, 182 struct drm_file *file) 183 { 184 void *vaddr = sg_page(obj->mm.pages->sgl) + args->offset; 185 char __user *user_data = u64_to_user_ptr(args->data_ptr); 186 187 /* 188 * We manually control the domain here and pretend that it 189 * remains coherent i.e. in the GTT domain, like shmem_pwrite. 190 */ 191 i915_gem_object_invalidate_frontbuffer(obj, ORIGIN_CPU); 192 193 if (copy_from_user(vaddr, user_data, args->size)) 194 return -EFAULT; 195 196 drm_clflush_virt_range(vaddr, args->size); 197 intel_gt_chipset_flush(&to_i915(obj->base.dev)->gt); 198 199 i915_gem_object_flush_frontbuffer(obj, ORIGIN_CPU); 200 return 0; 201 } 202 203 static int 204 i915_gem_create(struct drm_file *file, 205 struct intel_memory_region *mr, 206 u64 *size_p, 207 u32 *handle_p) 208 { 209 struct drm_i915_gem_object *obj; 210 u32 handle; 211 u64 size; 212 int ret; 213 214 GEM_BUG_ON(!is_power_of_2(mr->min_page_size)); 215 size = round_up(*size_p, mr->min_page_size); 216 if (size == 0) 217 return -EINVAL; 218 219 /* For most of the ABI (e.g. mmap) we think in system pages */ 220 GEM_BUG_ON(!IS_ALIGNED(size, PAGE_SIZE)); 221 222 /* Allocate the new object */ 223 obj = i915_gem_object_create_region(mr, size, 0); 224 if (IS_ERR(obj)) 225 return PTR_ERR(obj); 226 227 ret = drm_gem_handle_create(file, &obj->base, &handle); 228 /* drop reference from allocate - handle holds it now */ 229 i915_gem_object_put(obj); 230 if (ret) 231 return ret; 232 233 *handle_p = handle; 234 *size_p = size; 235 return 0; 236 } 237 238 int 239 i915_gem_dumb_create(struct drm_file *file, 240 struct drm_device *dev, 241 struct drm_mode_create_dumb *args) 242 { 243 enum intel_memory_type mem_type; 244 int cpp = DIV_ROUND_UP(args->bpp, 8); 245 u32 format; 246 247 switch (cpp) { 248 case 1: 249 format = DRM_FORMAT_C8; 250 break; 251 case 2: 252 format = DRM_FORMAT_RGB565; 253 break; 254 case 4: 255 format = DRM_FORMAT_XRGB8888; 256 break; 257 default: 258 return -EINVAL; 259 } 260 261 /* have to work out size/pitch and return them */ 262 args->pitch = roundup2(args->width * cpp, 64); 263 264 /* align stride to page size so that we can remap */ 265 if (args->pitch > intel_plane_fb_max_stride(to_i915(dev), format, 266 DRM_FORMAT_MOD_LINEAR)) 267 args->pitch = roundup2(args->pitch, 4096); 268 269 if (args->pitch < args->width) 270 return -EINVAL; 271 272 args->size = mul_u32_u32(args->pitch, args->height); 273 274 mem_type = INTEL_MEMORY_SYSTEM; 275 if (HAS_LMEM(to_i915(dev))) 276 mem_type = INTEL_MEMORY_LOCAL; 277 278 return i915_gem_create(file, 279 intel_memory_region_by_type(to_i915(dev), 280 mem_type), 281 &args->size, &args->handle); 282 } 283 284 /** 285 * Creates a new mm object and returns a handle to it. 286 * @dev: drm device pointer 287 * @data: ioctl data blob 288 * @file: drm file pointer 289 */ 290 int 291 i915_gem_create_ioctl(struct drm_device *dev, void *data, 292 struct drm_file *file) 293 { 294 struct drm_i915_private *i915 = to_i915(dev); 295 struct drm_i915_gem_create *args = data; 296 297 i915_gem_flush_free_objects(i915); 298 299 return i915_gem_create(file, 300 intel_memory_region_by_type(i915, 301 INTEL_MEMORY_SYSTEM), 302 &args->size, &args->handle); 303 } 304 305 static int 306 shmem_pread(struct vm_page *page, int offset, int len, char __user *user_data, 307 bool needs_clflush) 308 { 309 char *vaddr; 310 int ret; 311 312 vaddr = kmap(page); 313 314 if (needs_clflush) 315 drm_clflush_virt_range(vaddr + offset, len); 316 317 ret = __copy_to_user(user_data, vaddr + offset, len); 318 319 kunmap_va(vaddr); 320 321 return ret ? -EFAULT : 0; 322 } 323 324 static int 325 i915_gem_shmem_pread(struct drm_i915_gem_object *obj, 326 struct drm_i915_gem_pread *args) 327 { 328 unsigned int needs_clflush; 329 unsigned int idx, offset; 330 struct dma_fence *fence; 331 char __user *user_data; 332 u64 remain; 333 int ret; 334 335 ret = i915_gem_object_prepare_read(obj, &needs_clflush); 336 if (ret) 337 return ret; 338 339 fence = i915_gem_object_lock_fence(obj); 340 i915_gem_object_finish_access(obj); 341 if (!fence) 342 return -ENOMEM; 343 344 remain = args->size; 345 user_data = u64_to_user_ptr(args->data_ptr); 346 offset = offset_in_page(args->offset); 347 for (idx = args->offset >> PAGE_SHIFT; remain; idx++) { 348 struct vm_page *page = i915_gem_object_get_page(obj, idx); 349 unsigned int length = min_t(u64, remain, PAGE_SIZE - offset); 350 351 ret = shmem_pread(page, offset, length, user_data, 352 needs_clflush); 353 if (ret) 354 break; 355 356 remain -= length; 357 user_data += length; 358 offset = 0; 359 } 360 361 i915_gem_object_unlock_fence(obj, fence); 362 return ret; 363 } 364 365 #ifdef __linux__ 366 static inline bool 367 gtt_user_read(struct io_mapping *mapping, 368 loff_t base, int offset, 369 char __user *user_data, int length) 370 { 371 void __iomem *vaddr; 372 unsigned long unwritten; 373 374 /* We can use the cpu mem copy function because this is X86. */ 375 vaddr = io_mapping_map_atomic_wc(mapping, base); 376 unwritten = __copy_to_user_inatomic(user_data, 377 (void __force *)vaddr + offset, 378 length); 379 io_mapping_unmap_atomic(vaddr); 380 if (unwritten) { 381 vaddr = io_mapping_map_wc(mapping, base, PAGE_SIZE); 382 unwritten = copy_to_user(user_data, 383 (void __force *)vaddr + offset, 384 length); 385 io_mapping_unmap(vaddr); 386 } 387 return unwritten; 388 } 389 #else 390 static inline bool 391 gtt_user_read(struct drm_i915_private *dev_priv, 392 loff_t base, int offset, 393 char __user *user_data, int length) 394 { 395 bus_space_handle_t bsh; 396 void __iomem *vaddr; 397 unsigned long unwritten; 398 399 /* We can use the cpu mem copy function because this is X86. */ 400 agp_map_atomic(dev_priv->agph, base, &bsh); 401 vaddr = bus_space_vaddr(dev_priv->bst, bsh); 402 unwritten = __copy_to_user_inatomic(user_data, 403 (void __force *)vaddr + offset, 404 length); 405 agp_unmap_atomic(dev_priv->agph, bsh); 406 if (unwritten) { 407 agp_map_subregion(dev_priv->agph, base, PAGE_SIZE, &bsh); 408 vaddr = bus_space_vaddr(dev_priv->bst, bsh); 409 unwritten = copy_to_user(user_data, 410 (void __force *)vaddr + offset, 411 length); 412 agp_unmap_subregion(dev_priv->agph, bsh, PAGE_SIZE); 413 } 414 return unwritten; 415 } 416 #endif 417 418 static int 419 i915_gem_gtt_pread(struct drm_i915_gem_object *obj, 420 const struct drm_i915_gem_pread *args) 421 { 422 struct drm_i915_private *i915 = to_i915(obj->base.dev); 423 struct i915_ggtt *ggtt = &i915->ggtt; 424 intel_wakeref_t wakeref; 425 struct drm_mm_node node; 426 struct dma_fence *fence; 427 void __user *user_data; 428 struct i915_vma *vma; 429 u64 remain, offset; 430 int ret; 431 432 wakeref = intel_runtime_pm_get(&i915->runtime_pm); 433 vma = ERR_PTR(-ENODEV); 434 if (!i915_gem_object_is_tiled(obj)) 435 vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 436 PIN_MAPPABLE | 437 PIN_NONBLOCK /* NOWARN */ | 438 PIN_NOEVICT); 439 if (!IS_ERR(vma)) { 440 node.start = i915_ggtt_offset(vma); 441 node.flags = 0; 442 } else { 443 ret = insert_mappable_node(ggtt, &node, PAGE_SIZE); 444 if (ret) 445 goto out_rpm; 446 GEM_BUG_ON(!drm_mm_node_allocated(&node)); 447 } 448 449 ret = i915_gem_object_lock_interruptible(obj); 450 if (ret) 451 goto out_unpin; 452 453 ret = i915_gem_object_set_to_gtt_domain(obj, false); 454 if (ret) { 455 i915_gem_object_unlock(obj); 456 goto out_unpin; 457 } 458 459 fence = i915_gem_object_lock_fence(obj); 460 i915_gem_object_unlock(obj); 461 if (!fence) { 462 ret = -ENOMEM; 463 goto out_unpin; 464 } 465 466 user_data = u64_to_user_ptr(args->data_ptr); 467 remain = args->size; 468 offset = args->offset; 469 470 while (remain > 0) { 471 /* Operation in this page 472 * 473 * page_base = page offset within aperture 474 * page_offset = offset within page 475 * page_length = bytes to copy for this page 476 */ 477 u32 page_base = node.start; 478 unsigned page_offset = offset_in_page(offset); 479 unsigned page_length = PAGE_SIZE - page_offset; 480 page_length = remain < page_length ? remain : page_length; 481 if (drm_mm_node_allocated(&node)) { 482 ggtt->vm.insert_page(&ggtt->vm, 483 i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT), 484 node.start, I915_CACHE_NONE, 0); 485 } else { 486 page_base += offset & ~PAGE_MASK; 487 } 488 489 if (gtt_user_read(i915, page_base, page_offset, 490 user_data, page_length)) { 491 ret = -EFAULT; 492 break; 493 } 494 495 remain -= page_length; 496 user_data += page_length; 497 offset += page_length; 498 } 499 500 i915_gem_object_unlock_fence(obj, fence); 501 out_unpin: 502 if (drm_mm_node_allocated(&node)) { 503 ggtt->vm.clear_range(&ggtt->vm, node.start, node.size); 504 remove_mappable_node(ggtt, &node); 505 } else { 506 i915_vma_unpin(vma); 507 } 508 out_rpm: 509 intel_runtime_pm_put(&i915->runtime_pm, wakeref); 510 return ret; 511 } 512 513 /** 514 * Reads data from the object referenced by handle. 515 * @dev: drm device pointer 516 * @data: ioctl data blob 517 * @file: drm file pointer 518 * 519 * On error, the contents of *data are undefined. 520 */ 521 int 522 i915_gem_pread_ioctl(struct drm_device *dev, void *data, 523 struct drm_file *file) 524 { 525 struct drm_i915_gem_pread *args = data; 526 struct drm_i915_gem_object *obj; 527 int ret; 528 529 if (args->size == 0) 530 return 0; 531 532 if (!access_ok(u64_to_user_ptr(args->data_ptr), 533 args->size)) 534 return -EFAULT; 535 536 obj = i915_gem_object_lookup(file, args->handle); 537 if (!obj) 538 return -ENOENT; 539 540 /* Bounds check source. */ 541 if (range_overflows_t(u64, args->offset, args->size, obj->base.size)) { 542 ret = -EINVAL; 543 goto out; 544 } 545 546 trace_i915_gem_object_pread(obj, args->offset, args->size); 547 548 ret = i915_gem_object_wait(obj, 549 I915_WAIT_INTERRUPTIBLE, 550 MAX_SCHEDULE_TIMEOUT); 551 if (ret) 552 goto out; 553 554 ret = i915_gem_object_pin_pages(obj); 555 if (ret) 556 goto out; 557 558 ret = i915_gem_shmem_pread(obj, args); 559 if (ret == -EFAULT || ret == -ENODEV) 560 ret = i915_gem_gtt_pread(obj, args); 561 562 i915_gem_object_unpin_pages(obj); 563 out: 564 i915_gem_object_put(obj); 565 return ret; 566 } 567 568 /* This is the fast write path which cannot handle 569 * page faults in the source data 570 */ 571 #ifdef __linux__ 572 static inline bool 573 ggtt_write(struct io_mapping *mapping, 574 loff_t base, int offset, 575 char __user *user_data, int length) 576 { 577 void __iomem *vaddr; 578 unsigned long unwritten; 579 580 /* We can use the cpu mem copy function because this is X86. */ 581 vaddr = io_mapping_map_atomic_wc(mapping, base); 582 unwritten = __copy_from_user_inatomic_nocache((void __force *)vaddr + offset, 583 user_data, length); 584 io_mapping_unmap_atomic(vaddr); 585 if (unwritten) { 586 vaddr = io_mapping_map_wc(mapping, base, PAGE_SIZE); 587 unwritten = copy_from_user((void __force *)vaddr + offset, 588 user_data, length); 589 io_mapping_unmap(vaddr); 590 } 591 592 return unwritten; 593 } 594 #else 595 static inline bool 596 ggtt_write(struct drm_i915_private *dev_priv, 597 loff_t base, int offset, 598 char __user *user_data, int length) 599 { 600 bus_space_handle_t bsh; 601 void __iomem *vaddr; 602 unsigned long unwritten; 603 604 /* We can use the cpu mem copy function because this is X86. */ 605 agp_map_atomic(dev_priv->agph, base, &bsh); 606 vaddr = bus_space_vaddr(dev_priv->bst, bsh); 607 unwritten = __copy_from_user_inatomic_nocache((void __force *)vaddr + offset, 608 user_data, length); 609 agp_unmap_atomic(dev_priv->agph, bsh); 610 if (unwritten) { 611 agp_map_subregion(dev_priv->agph, base, PAGE_SIZE, &bsh); 612 vaddr = bus_space_vaddr(dev_priv->bst, bsh); 613 unwritten = copy_from_user((void __force *)vaddr + offset, 614 user_data, length); 615 agp_unmap_subregion(dev_priv->agph, bsh, PAGE_SIZE); 616 } 617 618 return unwritten; 619 } 620 #endif 621 622 /** 623 * This is the fast pwrite path, where we copy the data directly from the 624 * user into the GTT, uncached. 625 * @obj: i915 GEM object 626 * @args: pwrite arguments structure 627 */ 628 static int 629 i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj, 630 const struct drm_i915_gem_pwrite *args) 631 { 632 struct drm_i915_private *i915 = to_i915(obj->base.dev); 633 struct i915_ggtt *ggtt = &i915->ggtt; 634 struct intel_runtime_pm *rpm = &i915->runtime_pm; 635 intel_wakeref_t wakeref; 636 struct drm_mm_node node; 637 struct dma_fence *fence; 638 struct i915_vma *vma; 639 u64 remain, offset; 640 void __user *user_data; 641 int ret; 642 643 if (i915_gem_object_has_struct_page(obj)) { 644 /* 645 * Avoid waking the device up if we can fallback, as 646 * waking/resuming is very slow (worst-case 10-100 ms 647 * depending on PCI sleeps and our own resume time). 648 * This easily dwarfs any performance advantage from 649 * using the cache bypass of indirect GGTT access. 650 */ 651 wakeref = intel_runtime_pm_get_if_in_use(rpm); 652 if (!wakeref) 653 return -EFAULT; 654 } else { 655 /* No backing pages, no fallback, we must force GGTT access */ 656 wakeref = intel_runtime_pm_get(rpm); 657 } 658 659 vma = ERR_PTR(-ENODEV); 660 if (!i915_gem_object_is_tiled(obj)) 661 vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 662 PIN_MAPPABLE | 663 PIN_NONBLOCK /* NOWARN */ | 664 PIN_NOEVICT); 665 if (!IS_ERR(vma)) { 666 node.start = i915_ggtt_offset(vma); 667 node.flags = 0; 668 } else { 669 ret = insert_mappable_node(ggtt, &node, PAGE_SIZE); 670 if (ret) 671 goto out_rpm; 672 GEM_BUG_ON(!drm_mm_node_allocated(&node)); 673 } 674 675 ret = i915_gem_object_lock_interruptible(obj); 676 if (ret) 677 goto out_unpin; 678 679 ret = i915_gem_object_set_to_gtt_domain(obj, true); 680 if (ret) { 681 i915_gem_object_unlock(obj); 682 goto out_unpin; 683 } 684 685 fence = i915_gem_object_lock_fence(obj); 686 i915_gem_object_unlock(obj); 687 if (!fence) { 688 ret = -ENOMEM; 689 goto out_unpin; 690 } 691 692 i915_gem_object_invalidate_frontbuffer(obj, ORIGIN_CPU); 693 694 user_data = u64_to_user_ptr(args->data_ptr); 695 offset = args->offset; 696 remain = args->size; 697 while (remain) { 698 /* Operation in this page 699 * 700 * page_base = page offset within aperture 701 * page_offset = offset within page 702 * page_length = bytes to copy for this page 703 */ 704 u32 page_base = node.start; 705 unsigned int page_offset = offset_in_page(offset); 706 unsigned int page_length = PAGE_SIZE - page_offset; 707 page_length = remain < page_length ? remain : page_length; 708 if (drm_mm_node_allocated(&node)) { 709 /* flush the write before we modify the GGTT */ 710 intel_gt_flush_ggtt_writes(ggtt->vm.gt); 711 ggtt->vm.insert_page(&ggtt->vm, 712 i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT), 713 node.start, I915_CACHE_NONE, 0); 714 wmb(); /* flush modifications to the GGTT (insert_page) */ 715 } else { 716 page_base += offset & ~PAGE_MASK; 717 } 718 /* If we get a fault while copying data, then (presumably) our 719 * source page isn't available. Return the error and we'll 720 * retry in the slow path. 721 * If the object is non-shmem backed, we retry again with the 722 * path that handles page fault. 723 */ 724 if (ggtt_write(i915, page_base, page_offset, 725 user_data, page_length)) { 726 ret = -EFAULT; 727 break; 728 } 729 730 remain -= page_length; 731 user_data += page_length; 732 offset += page_length; 733 } 734 735 intel_gt_flush_ggtt_writes(ggtt->vm.gt); 736 i915_gem_object_flush_frontbuffer(obj, ORIGIN_CPU); 737 738 i915_gem_object_unlock_fence(obj, fence); 739 out_unpin: 740 if (drm_mm_node_allocated(&node)) { 741 ggtt->vm.clear_range(&ggtt->vm, node.start, node.size); 742 remove_mappable_node(ggtt, &node); 743 } else { 744 i915_vma_unpin(vma); 745 } 746 out_rpm: 747 intel_runtime_pm_put(rpm, wakeref); 748 return ret; 749 } 750 751 /* Per-page copy function for the shmem pwrite fastpath. 752 * Flushes invalid cachelines before writing to the target if 753 * needs_clflush_before is set and flushes out any written cachelines after 754 * writing if needs_clflush is set. 755 */ 756 static int 757 shmem_pwrite(struct vm_page *page, int offset, int len, char __user *user_data, 758 bool needs_clflush_before, 759 bool needs_clflush_after) 760 { 761 char *vaddr; 762 int ret; 763 764 vaddr = kmap(page); 765 766 if (needs_clflush_before) 767 drm_clflush_virt_range(vaddr + offset, len); 768 769 ret = __copy_from_user(vaddr + offset, user_data, len); 770 if (!ret && needs_clflush_after) 771 drm_clflush_virt_range(vaddr + offset, len); 772 773 kunmap_va(vaddr); 774 775 return ret ? -EFAULT : 0; 776 } 777 778 static int 779 i915_gem_shmem_pwrite(struct drm_i915_gem_object *obj, 780 const struct drm_i915_gem_pwrite *args) 781 { 782 unsigned int partial_cacheline_write; 783 unsigned int needs_clflush; 784 unsigned int offset, idx; 785 struct dma_fence *fence; 786 void __user *user_data; 787 u64 remain; 788 int ret; 789 790 ret = i915_gem_object_prepare_write(obj, &needs_clflush); 791 if (ret) 792 return ret; 793 794 fence = i915_gem_object_lock_fence(obj); 795 i915_gem_object_finish_access(obj); 796 if (!fence) 797 return -ENOMEM; 798 799 /* If we don't overwrite a cacheline completely we need to be 800 * careful to have up-to-date data by first clflushing. Don't 801 * overcomplicate things and flush the entire patch. 802 */ 803 partial_cacheline_write = 0; 804 if (needs_clflush & CLFLUSH_BEFORE) 805 partial_cacheline_write = curcpu()->ci_cflushsz - 1; 806 807 user_data = u64_to_user_ptr(args->data_ptr); 808 remain = args->size; 809 offset = offset_in_page(args->offset); 810 for (idx = args->offset >> PAGE_SHIFT; remain; idx++) { 811 struct vm_page *page = i915_gem_object_get_page(obj, idx); 812 unsigned int length = min_t(u64, remain, PAGE_SIZE - offset); 813 814 ret = shmem_pwrite(page, offset, length, user_data, 815 (offset | length) & partial_cacheline_write, 816 needs_clflush & CLFLUSH_AFTER); 817 if (ret) 818 break; 819 820 remain -= length; 821 user_data += length; 822 offset = 0; 823 } 824 825 i915_gem_object_flush_frontbuffer(obj, ORIGIN_CPU); 826 i915_gem_object_unlock_fence(obj, fence); 827 828 return ret; 829 } 830 831 /** 832 * Writes data to the object referenced by handle. 833 * @dev: drm device 834 * @data: ioctl data blob 835 * @file: drm file 836 * 837 * On error, the contents of the buffer that were to be modified are undefined. 838 */ 839 int 840 i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, 841 struct drm_file *file) 842 { 843 struct drm_i915_gem_pwrite *args = data; 844 struct drm_i915_gem_object *obj; 845 int ret; 846 847 if (args->size == 0) 848 return 0; 849 850 if (!access_ok(u64_to_user_ptr(args->data_ptr), args->size)) 851 return -EFAULT; 852 853 obj = i915_gem_object_lookup(file, args->handle); 854 if (!obj) 855 return -ENOENT; 856 857 /* Bounds check destination. */ 858 if (range_overflows_t(u64, args->offset, args->size, obj->base.size)) { 859 ret = -EINVAL; 860 goto err; 861 } 862 863 /* Writes not allowed into this read-only object */ 864 if (i915_gem_object_is_readonly(obj)) { 865 ret = -EINVAL; 866 goto err; 867 } 868 869 trace_i915_gem_object_pwrite(obj, args->offset, args->size); 870 871 ret = -ENODEV; 872 if (obj->ops->pwrite) 873 ret = obj->ops->pwrite(obj, args); 874 if (ret != -ENODEV) 875 goto err; 876 877 ret = i915_gem_object_wait(obj, 878 I915_WAIT_INTERRUPTIBLE | 879 I915_WAIT_ALL, 880 MAX_SCHEDULE_TIMEOUT); 881 if (ret) 882 goto err; 883 884 ret = i915_gem_object_pin_pages(obj); 885 if (ret) 886 goto err; 887 888 ret = -EFAULT; 889 /* We can only do the GTT pwrite on untiled buffers, as otherwise 890 * it would end up going through the fenced access, and we'll get 891 * different detiling behavior between reading and writing. 892 * pread/pwrite currently are reading and writing from the CPU 893 * perspective, requiring manual detiling by the client. 894 */ 895 if (!i915_gem_object_has_struct_page(obj) || 896 cpu_write_needs_clflush(obj)) 897 /* Note that the gtt paths might fail with non-page-backed user 898 * pointers (e.g. gtt mappings when moving data between 899 * textures). Fallback to the shmem path in that case. 900 */ 901 ret = i915_gem_gtt_pwrite_fast(obj, args); 902 903 if (ret == -EFAULT || ret == -ENOSPC) { 904 if (i915_gem_object_has_struct_page(obj)) 905 ret = i915_gem_shmem_pwrite(obj, args); 906 else 907 ret = i915_gem_phys_pwrite(obj, args, file); 908 } 909 910 i915_gem_object_unpin_pages(obj); 911 err: 912 i915_gem_object_put(obj); 913 return ret; 914 } 915 916 /** 917 * Called when user space has done writes to this buffer 918 * @dev: drm device 919 * @data: ioctl data blob 920 * @file: drm file 921 */ 922 int 923 i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data, 924 struct drm_file *file) 925 { 926 struct drm_i915_gem_sw_finish *args = data; 927 struct drm_i915_gem_object *obj; 928 929 obj = i915_gem_object_lookup(file, args->handle); 930 if (!obj) 931 return -ENOENT; 932 933 /* 934 * Proxy objects are barred from CPU access, so there is no 935 * need to ban sw_finish as it is a nop. 936 */ 937 938 /* Pinned buffers may be scanout, so flush the cache */ 939 i915_gem_object_flush_if_display(obj); 940 i915_gem_object_put(obj); 941 942 return 0; 943 } 944 945 void i915_gem_runtime_suspend(struct drm_i915_private *i915) 946 { 947 struct drm_i915_gem_object *obj, *on; 948 int i; 949 950 /* 951 * Only called during RPM suspend. All users of the userfault_list 952 * must be holding an RPM wakeref to ensure that this can not 953 * run concurrently with themselves (and use the struct_mutex for 954 * protection between themselves). 955 */ 956 957 list_for_each_entry_safe(obj, on, 958 &i915->ggtt.userfault_list, userfault_link) 959 __i915_gem_object_release_mmap_gtt(obj); 960 961 /* 962 * The fence will be lost when the device powers down. If any were 963 * in use by hardware (i.e. they are pinned), we should not be powering 964 * down! All other fences will be reacquired by the user upon waking. 965 */ 966 for (i = 0; i < i915->ggtt.num_fences; i++) { 967 struct i915_fence_reg *reg = &i915->ggtt.fence_regs[i]; 968 969 /* 970 * Ideally we want to assert that the fence register is not 971 * live at this point (i.e. that no piece of code will be 972 * trying to write through fence + GTT, as that both violates 973 * our tracking of activity and associated locking/barriers, 974 * but also is illegal given that the hw is powered down). 975 * 976 * Previously we used reg->pin_count as a "liveness" indicator. 977 * That is not sufficient, and we need a more fine-grained 978 * tool if we want to have a sanity check here. 979 */ 980 981 if (!reg->vma) 982 continue; 983 984 GEM_BUG_ON(i915_vma_has_userfault(reg->vma)); 985 reg->dirty = true; 986 } 987 } 988 989 struct i915_vma * 990 i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, 991 const struct i915_ggtt_view *view, 992 u64 size, 993 u64 alignment, 994 u64 flags) 995 { 996 struct drm_i915_private *i915 = to_i915(obj->base.dev); 997 struct i915_ggtt *ggtt = &i915->ggtt; 998 struct i915_vma *vma; 999 int ret; 1000 1001 if (flags & PIN_MAPPABLE && 1002 (!view || view->type == I915_GGTT_VIEW_NORMAL)) { 1003 /* 1004 * If the required space is larger than the available 1005 * aperture, we will not able to find a slot for the 1006 * object and unbinding the object now will be in 1007 * vain. Worse, doing so may cause us to ping-pong 1008 * the object in and out of the Global GTT and 1009 * waste a lot of cycles under the mutex. 1010 */ 1011 if (obj->base.size > ggtt->mappable_end) 1012 return ERR_PTR(-E2BIG); 1013 1014 /* 1015 * If NONBLOCK is set the caller is optimistically 1016 * trying to cache the full object within the mappable 1017 * aperture, and *must* have a fallback in place for 1018 * situations where we cannot bind the object. We 1019 * can be a little more lax here and use the fallback 1020 * more often to avoid costly migrations of ourselves 1021 * and other objects within the aperture. 1022 * 1023 * Half-the-aperture is used as a simple heuristic. 1024 * More interesting would to do search for a free 1025 * block prior to making the commitment to unbind. 1026 * That caters for the self-harm case, and with a 1027 * little more heuristics (e.g. NOFAULT, NOEVICT) 1028 * we could try to minimise harm to others. 1029 */ 1030 if (flags & PIN_NONBLOCK && 1031 obj->base.size > ggtt->mappable_end / 2) 1032 return ERR_PTR(-ENOSPC); 1033 } 1034 1035 vma = i915_vma_instance(obj, &ggtt->vm, view); 1036 if (IS_ERR(vma)) 1037 return vma; 1038 1039 if (i915_vma_misplaced(vma, size, alignment, flags)) { 1040 if (flags & PIN_NONBLOCK) { 1041 if (i915_vma_is_pinned(vma) || i915_vma_is_active(vma)) 1042 return ERR_PTR(-ENOSPC); 1043 1044 if (flags & PIN_MAPPABLE && 1045 vma->fence_size > ggtt->mappable_end / 2) 1046 return ERR_PTR(-ENOSPC); 1047 } 1048 1049 ret = i915_vma_unbind(vma); 1050 if (ret) 1051 return ERR_PTR(ret); 1052 } 1053 1054 if (vma->fence && !i915_gem_object_is_tiled(obj)) { 1055 mutex_lock(&ggtt->vm.mutex); 1056 ret = i915_vma_revoke_fence(vma); 1057 mutex_unlock(&ggtt->vm.mutex); 1058 if (ret) 1059 return ERR_PTR(ret); 1060 } 1061 1062 ret = i915_vma_pin(vma, size, alignment, flags | PIN_GLOBAL); 1063 if (ret) 1064 return ERR_PTR(ret); 1065 1066 ret = i915_vma_wait_for_bind(vma); 1067 if (ret) { 1068 i915_vma_unpin(vma); 1069 return ERR_PTR(ret); 1070 } 1071 1072 return vma; 1073 } 1074 1075 int 1076 i915_gem_madvise_ioctl(struct drm_device *dev, void *data, 1077 struct drm_file *file_priv) 1078 { 1079 struct drm_i915_private *i915 = to_i915(dev); 1080 struct drm_i915_gem_madvise *args = data; 1081 struct drm_i915_gem_object *obj; 1082 int err; 1083 1084 switch (args->madv) { 1085 case I915_MADV_DONTNEED: 1086 case I915_MADV_WILLNEED: 1087 break; 1088 default: 1089 return -EINVAL; 1090 } 1091 1092 obj = i915_gem_object_lookup(file_priv, args->handle); 1093 if (!obj) 1094 return -ENOENT; 1095 1096 err = mutex_lock_interruptible(&obj->mm.lock); 1097 if (err) 1098 goto out; 1099 1100 if (i915_gem_object_has_pages(obj) && 1101 i915_gem_object_is_tiled(obj) && 1102 i915->quirks & QUIRK_PIN_SWIZZLED_PAGES) { 1103 if (obj->mm.madv == I915_MADV_WILLNEED) { 1104 GEM_BUG_ON(!obj->mm.quirked); 1105 __i915_gem_object_unpin_pages(obj); 1106 obj->mm.quirked = false; 1107 } 1108 if (args->madv == I915_MADV_WILLNEED) { 1109 GEM_BUG_ON(obj->mm.quirked); 1110 __i915_gem_object_pin_pages(obj); 1111 obj->mm.quirked = true; 1112 } 1113 } 1114 1115 if (obj->mm.madv != __I915_MADV_PURGED) 1116 obj->mm.madv = args->madv; 1117 1118 if (i915_gem_object_has_pages(obj)) { 1119 struct list_head *list; 1120 1121 if (i915_gem_object_is_shrinkable(obj)) { 1122 unsigned long flags; 1123 1124 spin_lock_irqsave(&i915->mm.obj_lock, flags); 1125 1126 if (obj->mm.madv != I915_MADV_WILLNEED) 1127 list = &i915->mm.purge_list; 1128 else 1129 list = &i915->mm.shrink_list; 1130 list_move_tail(&obj->mm.link, list); 1131 1132 spin_unlock_irqrestore(&i915->mm.obj_lock, flags); 1133 } 1134 } 1135 1136 /* if the object is no longer attached, discard its backing storage */ 1137 if (obj->mm.madv == I915_MADV_DONTNEED && 1138 !i915_gem_object_has_pages(obj)) 1139 i915_gem_object_truncate(obj); 1140 1141 args->retained = obj->mm.madv != __I915_MADV_PURGED; 1142 mutex_unlock(&obj->mm.lock); 1143 1144 out: 1145 i915_gem_object_put(obj); 1146 return err; 1147 } 1148 1149 int i915_gem_init(struct drm_i915_private *dev_priv) 1150 { 1151 int ret; 1152 1153 /* We need to fallback to 4K pages if host doesn't support huge gtt. */ 1154 if (intel_vgpu_active(dev_priv) && !intel_vgpu_has_huge_gtt(dev_priv)) 1155 mkwrite_device_info(dev_priv)->page_sizes = 1156 I915_GTT_PAGE_SIZE_4K; 1157 1158 ret = i915_gem_init_userptr(dev_priv); 1159 if (ret) 1160 return ret; 1161 1162 intel_uc_fetch_firmwares(&dev_priv->gt.uc); 1163 intel_wopcm_init(&dev_priv->wopcm); 1164 1165 ret = i915_init_ggtt(dev_priv); 1166 if (ret) { 1167 GEM_BUG_ON(ret == -EIO); 1168 goto err_unlock; 1169 } 1170 1171 /* 1172 * Despite its name intel_init_clock_gating applies both display 1173 * clock gating workarounds; GT mmio workarounds and the occasional 1174 * GT power context workaround. Worse, sometimes it includes a context 1175 * register workaround which we need to apply before we record the 1176 * default HW state for all contexts. 1177 * 1178 * FIXME: break up the workarounds and apply them at the right time! 1179 */ 1180 intel_init_clock_gating(dev_priv); 1181 1182 ret = intel_gt_init(&dev_priv->gt); 1183 if (ret) 1184 goto err_unlock; 1185 1186 return 0; 1187 1188 /* 1189 * Unwinding is complicated by that we want to handle -EIO to mean 1190 * disable GPU submission but keep KMS alive. We want to mark the 1191 * HW as irrevisibly wedged, but keep enough state around that the 1192 * driver doesn't explode during runtime. 1193 */ 1194 err_unlock: 1195 i915_gem_drain_workqueue(dev_priv); 1196 1197 if (ret != -EIO) { 1198 intel_uc_cleanup_firmwares(&dev_priv->gt.uc); 1199 i915_gem_cleanup_userptr(dev_priv); 1200 } 1201 1202 if (ret == -EIO) { 1203 /* 1204 * Allow engines or uC initialisation to fail by marking the GPU 1205 * as wedged. But we only want to do this when the GPU is angry, 1206 * for all other failure, such as an allocation failure, bail. 1207 */ 1208 if (!intel_gt_is_wedged(&dev_priv->gt)) { 1209 i915_probe_error(dev_priv, 1210 "Failed to initialize GPU, declaring it wedged!\n"); 1211 intel_gt_set_wedged(&dev_priv->gt); 1212 } 1213 1214 /* Minimal basic recovery for KMS */ 1215 ret = i915_ggtt_enable_hw(dev_priv); 1216 i915_ggtt_resume(&dev_priv->ggtt); 1217 i915_gem_restore_fences(&dev_priv->ggtt); 1218 intel_init_clock_gating(dev_priv); 1219 } 1220 1221 i915_gem_drain_freed_objects(dev_priv); 1222 return ret; 1223 } 1224 1225 void i915_gem_driver_register(struct drm_i915_private *i915) 1226 { 1227 i915_gem_driver_register__shrinker(i915); 1228 1229 intel_engines_driver_register(i915); 1230 } 1231 1232 void i915_gem_driver_unregister(struct drm_i915_private *i915) 1233 { 1234 i915_gem_driver_unregister__shrinker(i915); 1235 } 1236 1237 void i915_gem_driver_remove(struct drm_i915_private *dev_priv) 1238 { 1239 intel_wakeref_auto_fini(&dev_priv->ggtt.userfault_wakeref); 1240 1241 i915_gem_suspend_late(dev_priv); 1242 intel_gt_driver_remove(&dev_priv->gt); 1243 dev_priv->uabi_engines = RB_ROOT; 1244 1245 /* Flush any outstanding unpin_work. */ 1246 i915_gem_drain_workqueue(dev_priv); 1247 1248 i915_gem_drain_freed_objects(dev_priv); 1249 } 1250 1251 void i915_gem_driver_release(struct drm_i915_private *dev_priv) 1252 { 1253 i915_gem_driver_release__contexts(dev_priv); 1254 1255 intel_gt_driver_release(&dev_priv->gt); 1256 1257 intel_wa_list_free(&dev_priv->gt_wa_list); 1258 1259 intel_uc_cleanup_firmwares(&dev_priv->gt.uc); 1260 i915_gem_cleanup_userptr(dev_priv); 1261 1262 i915_gem_drain_freed_objects(dev_priv); 1263 1264 drm_WARN_ON(&dev_priv->drm, !list_empty(&dev_priv->gem.contexts.list)); 1265 } 1266 1267 static void i915_gem_init__mm(struct drm_i915_private *i915) 1268 { 1269 mtx_init(&i915->mm.obj_lock, IPL_NONE); 1270 1271 init_llist_head(&i915->mm.free_list); 1272 1273 INIT_LIST_HEAD(&i915->mm.purge_list); 1274 INIT_LIST_HEAD(&i915->mm.shrink_list); 1275 1276 i915_gem_init__objects(i915); 1277 } 1278 1279 void i915_gem_init_early(struct drm_i915_private *dev_priv) 1280 { 1281 i915_gem_init__mm(dev_priv); 1282 i915_gem_init__contexts(dev_priv); 1283 1284 mtx_init(&dev_priv->fb_tracking.lock, IPL_NONE); 1285 } 1286 1287 void i915_gem_cleanup_early(struct drm_i915_private *dev_priv) 1288 { 1289 i915_gem_drain_freed_objects(dev_priv); 1290 GEM_BUG_ON(!llist_empty(&dev_priv->mm.free_list)); 1291 GEM_BUG_ON(atomic_read(&dev_priv->mm.free_count)); 1292 drm_WARN_ON(&dev_priv->drm, dev_priv->mm.shrink_count); 1293 } 1294 1295 int i915_gem_freeze(struct drm_i915_private *dev_priv) 1296 { 1297 /* Discard all purgeable objects, let userspace recover those as 1298 * required after resuming. 1299 */ 1300 i915_gem_shrink_all(dev_priv); 1301 1302 return 0; 1303 } 1304 1305 int i915_gem_freeze_late(struct drm_i915_private *i915) 1306 { 1307 struct drm_i915_gem_object *obj; 1308 intel_wakeref_t wakeref; 1309 1310 /* 1311 * Called just before we write the hibernation image. 1312 * 1313 * We need to update the domain tracking to reflect that the CPU 1314 * will be accessing all the pages to create and restore from the 1315 * hibernation, and so upon restoration those pages will be in the 1316 * CPU domain. 1317 * 1318 * To make sure the hibernation image contains the latest state, 1319 * we update that state just before writing out the image. 1320 * 1321 * To try and reduce the hibernation image, we manually shrink 1322 * the objects as well, see i915_gem_freeze() 1323 */ 1324 1325 wakeref = intel_runtime_pm_get(&i915->runtime_pm); 1326 1327 i915_gem_shrink(i915, -1UL, NULL, ~0); 1328 i915_gem_drain_freed_objects(i915); 1329 1330 list_for_each_entry(obj, &i915->mm.shrink_list, mm.link) { 1331 i915_gem_object_lock(obj); 1332 drm_WARN_ON(&i915->drm, 1333 i915_gem_object_set_to_cpu_domain(obj, true)); 1334 i915_gem_object_unlock(obj); 1335 } 1336 1337 intel_runtime_pm_put(&i915->runtime_pm, wakeref); 1338 1339 return 0; 1340 } 1341 1342 void i915_gem_release(struct drm_device *dev, struct drm_file *file) 1343 { 1344 struct drm_i915_file_private *file_priv = file->driver_priv; 1345 struct i915_request *request; 1346 1347 /* Clean up our request list when the client is going away, so that 1348 * later retire_requests won't dereference our soon-to-be-gone 1349 * file_priv. 1350 */ 1351 spin_lock(&file_priv->mm.lock); 1352 list_for_each_entry(request, &file_priv->mm.request_list, client_link) 1353 request->file_priv = NULL; 1354 spin_unlock(&file_priv->mm.lock); 1355 } 1356 1357 int i915_gem_open(struct drm_i915_private *i915, struct drm_file *file) 1358 { 1359 struct drm_i915_file_private *file_priv; 1360 int ret; 1361 1362 DRM_DEBUG("\n"); 1363 1364 file_priv = kzalloc(sizeof(*file_priv), GFP_KERNEL); 1365 if (!file_priv) 1366 return -ENOMEM; 1367 1368 file->driver_priv = file_priv; 1369 file_priv->dev_priv = i915; 1370 file_priv->file = file; 1371 1372 mtx_init(&file_priv->mm.lock, IPL_NONE); 1373 INIT_LIST_HEAD(&file_priv->mm.request_list); 1374 1375 file_priv->bsd_engine = -1; 1376 file_priv->hang_timestamp = jiffies; 1377 1378 ret = i915_gem_context_open(i915, file); 1379 if (ret) 1380 kfree(file_priv); 1381 1382 return ret; 1383 } 1384 1385 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) 1386 #include "selftests/mock_gem_device.c" 1387 #include "selftests/i915_gem.c" 1388 #endif 1389