1 /* 2 * Copyright © 2008-2015 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Eric Anholt <eric@anholt.net> 25 * 26 */ 27 28 #include <drm/drm_vma_manager.h> 29 #include <linux/dma-fence-array.h> 30 #include <linux/kthread.h> 31 #include <linux/dma-resv.h> 32 #include <linux/shmem_fs.h> 33 #include <linux/slab.h> 34 #include <linux/stop_machine.h> 35 #include <linux/swap.h> 36 #include <linux/pci.h> 37 #include <linux/dma-buf.h> 38 #include <linux/mman.h> 39 40 #include <dev/pci/agpvar.h> 41 42 #include "display/intel_display.h" 43 #include "display/intel_frontbuffer.h" 44 45 #include "gem/i915_gem_clflush.h" 46 #include "gem/i915_gem_context.h" 47 #include "gem/i915_gem_ioctls.h" 48 #include "gem/i915_gem_mman.h" 49 #include "gem/i915_gem_region.h" 50 #include "gt/intel_engine_user.h" 51 #include "gt/intel_gt.h" 52 #include "gt/intel_gt_pm.h" 53 #include "gt/intel_workarounds.h" 54 55 #include "i915_drv.h" 56 #include "i915_trace.h" 57 #include "i915_vgpu.h" 58 59 #include "intel_pm.h" 60 61 static int 62 insert_mappable_node(struct i915_ggtt *ggtt, struct drm_mm_node *node, u32 size) 63 { 64 int err; 65 66 err = mutex_lock_interruptible(&ggtt->vm.mutex); 67 if (err) 68 return err; 69 70 memset(node, 0, sizeof(*node)); 71 err = drm_mm_insert_node_in_range(&ggtt->vm.mm, node, 72 size, 0, I915_COLOR_UNEVICTABLE, 73 0, ggtt->mappable_end, 74 DRM_MM_INSERT_LOW); 75 76 mutex_unlock(&ggtt->vm.mutex); 77 78 return err; 79 } 80 81 static void 82 remove_mappable_node(struct i915_ggtt *ggtt, struct drm_mm_node *node) 83 { 84 mutex_lock(&ggtt->vm.mutex); 85 drm_mm_remove_node(node); 86 mutex_unlock(&ggtt->vm.mutex); 87 } 88 89 int 90 i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, 91 struct drm_file *file) 92 { 93 struct i915_ggtt *ggtt = &to_i915(dev)->ggtt; 94 struct drm_i915_gem_get_aperture *args = data; 95 struct i915_vma *vma; 96 u64 pinned; 97 98 if (mutex_lock_interruptible(&ggtt->vm.mutex)) 99 return -EINTR; 100 101 pinned = ggtt->vm.reserved; 102 list_for_each_entry(vma, &ggtt->vm.bound_list, vm_link) 103 if (i915_vma_is_pinned(vma)) 104 pinned += vma->node.size; 105 106 mutex_unlock(&ggtt->vm.mutex); 107 108 args->aper_size = ggtt->vm.total; 109 args->aper_available_size = args->aper_size - pinned; 110 111 return 0; 112 } 113 114 int i915_gem_object_unbind(struct drm_i915_gem_object *obj, 115 unsigned long flags) 116 { 117 struct intel_runtime_pm *rpm = &to_i915(obj->base.dev)->runtime_pm; 118 DRM_LIST_HEAD(still_in_list); 119 intel_wakeref_t wakeref; 120 struct i915_vma *vma; 121 int ret; 122 123 if (!atomic_read(&obj->bind_count)) 124 return 0; 125 126 /* 127 * As some machines use ACPI to handle runtime-resume callbacks, and 128 * ACPI is quite kmalloc happy, we cannot resume beneath the vm->mutex 129 * as they are required by the shrinker. Ergo, we wake the device up 130 * first just in case. 131 */ 132 wakeref = intel_runtime_pm_get(rpm); 133 134 try_again: 135 ret = 0; 136 spin_lock(&obj->vma.lock); 137 while (!ret && (vma = list_first_entry_or_null(&obj->vma.list, 138 struct i915_vma, 139 obj_link))) { 140 struct i915_address_space *vm = vma->vm; 141 142 list_move_tail(&vma->obj_link, &still_in_list); 143 if (!i915_vma_is_bound(vma, I915_VMA_BIND_MASK)) 144 continue; 145 146 ret = -EAGAIN; 147 if (!i915_vm_tryopen(vm)) 148 break; 149 150 /* Prevent vma being freed by i915_vma_parked as we unbind */ 151 vma = __i915_vma_get(vma); 152 spin_unlock(&obj->vma.lock); 153 154 if (vma) { 155 ret = -EBUSY; 156 if (flags & I915_GEM_OBJECT_UNBIND_ACTIVE || 157 !i915_vma_is_active(vma)) 158 ret = i915_vma_unbind(vma); 159 160 __i915_vma_put(vma); 161 } 162 163 i915_vm_close(vm); 164 spin_lock(&obj->vma.lock); 165 } 166 list_splice_init(&still_in_list, &obj->vma.list); 167 spin_unlock(&obj->vma.lock); 168 169 if (ret == -EAGAIN && flags & I915_GEM_OBJECT_UNBIND_BARRIER) { 170 rcu_barrier(); /* flush the i915_vm_release() */ 171 goto try_again; 172 } 173 174 intel_runtime_pm_put(rpm, wakeref); 175 176 return ret; 177 } 178 179 static int 180 i915_gem_phys_pwrite(struct drm_i915_gem_object *obj, 181 struct drm_i915_gem_pwrite *args, 182 struct drm_file *file) 183 { 184 void *vaddr = sg_page(obj->mm.pages->sgl) + args->offset; 185 char __user *user_data = u64_to_user_ptr(args->data_ptr); 186 187 /* 188 * We manually control the domain here and pretend that it 189 * remains coherent i.e. in the GTT domain, like shmem_pwrite. 190 */ 191 i915_gem_object_invalidate_frontbuffer(obj, ORIGIN_CPU); 192 193 if (copy_from_user(vaddr, user_data, args->size)) 194 return -EFAULT; 195 196 drm_clflush_virt_range(vaddr, args->size); 197 intel_gt_chipset_flush(&to_i915(obj->base.dev)->gt); 198 199 i915_gem_object_flush_frontbuffer(obj, ORIGIN_CPU); 200 return 0; 201 } 202 203 static int 204 i915_gem_create(struct drm_file *file, 205 struct intel_memory_region *mr, 206 u64 *size_p, 207 u32 *handle_p) 208 { 209 struct drm_i915_gem_object *obj; 210 u32 handle; 211 u64 size; 212 int ret; 213 214 GEM_BUG_ON(!is_power_of_2(mr->min_page_size)); 215 size = round_up(*size_p, mr->min_page_size); 216 if (size == 0) 217 return -EINVAL; 218 219 /* For most of the ABI (e.g. mmap) we think in system pages */ 220 GEM_BUG_ON(!IS_ALIGNED(size, PAGE_SIZE)); 221 222 /* Allocate the new object */ 223 obj = i915_gem_object_create_region(mr, size, 0); 224 if (IS_ERR(obj)) 225 return PTR_ERR(obj); 226 227 ret = drm_gem_handle_create(file, &obj->base, &handle); 228 /* drop reference from allocate - handle holds it now */ 229 i915_gem_object_put(obj); 230 if (ret) 231 return ret; 232 233 *handle_p = handle; 234 *size_p = size; 235 return 0; 236 } 237 238 int 239 i915_gem_dumb_create(struct drm_file *file, 240 struct drm_device *dev, 241 struct drm_mode_create_dumb *args) 242 { 243 enum intel_memory_type mem_type; 244 int cpp = DIV_ROUND_UP(args->bpp, 8); 245 u32 format; 246 247 switch (cpp) { 248 case 1: 249 format = DRM_FORMAT_C8; 250 break; 251 case 2: 252 format = DRM_FORMAT_RGB565; 253 break; 254 case 4: 255 format = DRM_FORMAT_XRGB8888; 256 break; 257 default: 258 return -EINVAL; 259 } 260 261 /* have to work out size/pitch and return them */ 262 args->pitch = roundup2(args->width * cpp, 64); 263 264 /* align stride to page size so that we can remap */ 265 if (args->pitch > intel_plane_fb_max_stride(to_i915(dev), format, 266 DRM_FORMAT_MOD_LINEAR)) 267 args->pitch = roundup2(args->pitch, 4096); 268 269 if (args->pitch < args->width) 270 return -EINVAL; 271 272 args->size = mul_u32_u32(args->pitch, args->height); 273 274 mem_type = INTEL_MEMORY_SYSTEM; 275 if (HAS_LMEM(to_i915(dev))) 276 mem_type = INTEL_MEMORY_LOCAL; 277 278 return i915_gem_create(file, 279 intel_memory_region_by_type(to_i915(dev), 280 mem_type), 281 &args->size, &args->handle); 282 } 283 284 /** 285 * Creates a new mm object and returns a handle to it. 286 * @dev: drm device pointer 287 * @data: ioctl data blob 288 * @file: drm file pointer 289 */ 290 int 291 i915_gem_create_ioctl(struct drm_device *dev, void *data, 292 struct drm_file *file) 293 { 294 struct drm_i915_private *i915 = to_i915(dev); 295 struct drm_i915_gem_create *args = data; 296 297 i915_gem_flush_free_objects(i915); 298 299 return i915_gem_create(file, 300 intel_memory_region_by_type(i915, 301 INTEL_MEMORY_SYSTEM), 302 &args->size, &args->handle); 303 } 304 305 static int 306 shmem_pread(struct vm_page *page, int offset, int len, char __user *user_data, 307 bool needs_clflush) 308 { 309 char *vaddr; 310 int ret; 311 312 vaddr = kmap(page); 313 314 if (needs_clflush) 315 drm_clflush_virt_range(vaddr + offset, len); 316 317 ret = __copy_to_user(user_data, vaddr + offset, len); 318 319 kunmap_va(vaddr); 320 321 return ret ? -EFAULT : 0; 322 } 323 324 static int 325 i915_gem_shmem_pread(struct drm_i915_gem_object *obj, 326 struct drm_i915_gem_pread *args) 327 { 328 unsigned int needs_clflush; 329 unsigned int idx, offset; 330 struct dma_fence *fence; 331 char __user *user_data; 332 u64 remain; 333 int ret; 334 335 ret = i915_gem_object_prepare_read(obj, &needs_clflush); 336 if (ret) 337 return ret; 338 339 fence = i915_gem_object_lock_fence(obj); 340 i915_gem_object_finish_access(obj); 341 if (!fence) 342 return -ENOMEM; 343 344 remain = args->size; 345 user_data = u64_to_user_ptr(args->data_ptr); 346 offset = offset_in_page(args->offset); 347 for (idx = args->offset >> PAGE_SHIFT; remain; idx++) { 348 struct vm_page *page = i915_gem_object_get_page(obj, idx); 349 unsigned int length = min_t(u64, remain, PAGE_SIZE - offset); 350 351 ret = shmem_pread(page, offset, length, user_data, 352 needs_clflush); 353 if (ret) 354 break; 355 356 remain -= length; 357 user_data += length; 358 offset = 0; 359 } 360 361 i915_gem_object_unlock_fence(obj, fence); 362 return ret; 363 } 364 365 #ifdef __linux__ 366 static inline bool 367 gtt_user_read(struct io_mapping *mapping, 368 loff_t base, int offset, 369 char __user *user_data, int length) 370 { 371 void __iomem *vaddr; 372 unsigned long unwritten; 373 374 /* We can use the cpu mem copy function because this is X86. */ 375 vaddr = io_mapping_map_atomic_wc(mapping, base); 376 unwritten = __copy_to_user_inatomic(user_data, 377 (void __force *)vaddr + offset, 378 length); 379 io_mapping_unmap_atomic(vaddr); 380 if (unwritten) { 381 vaddr = io_mapping_map_wc(mapping, base, PAGE_SIZE); 382 unwritten = copy_to_user(user_data, 383 (void __force *)vaddr + offset, 384 length); 385 io_mapping_unmap(vaddr); 386 } 387 return unwritten; 388 } 389 #else 390 static inline bool 391 gtt_user_read(struct drm_i915_private *dev_priv, 392 loff_t base, int offset, 393 char __user *user_data, int length) 394 { 395 bus_space_handle_t bsh; 396 void __iomem *vaddr; 397 unsigned long unwritten; 398 399 /* We can use the cpu mem copy function because this is X86. */ 400 agp_map_atomic(dev_priv->agph, base, &bsh); 401 vaddr = bus_space_vaddr(dev_priv->bst, bsh); 402 unwritten = __copy_to_user_inatomic(user_data, 403 (void __force *)vaddr + offset, 404 length); 405 agp_unmap_atomic(dev_priv->agph, bsh); 406 if (unwritten) { 407 agp_map_subregion(dev_priv->agph, base, PAGE_SIZE, &bsh); 408 vaddr = bus_space_vaddr(dev_priv->bst, bsh); 409 unwritten = copy_to_user(user_data, 410 (void __force *)vaddr + offset, 411 length); 412 agp_unmap_subregion(dev_priv->agph, bsh, PAGE_SIZE); 413 } 414 return unwritten; 415 } 416 #endif 417 418 static int 419 i915_gem_gtt_pread(struct drm_i915_gem_object *obj, 420 const struct drm_i915_gem_pread *args) 421 { 422 struct drm_i915_private *i915 = to_i915(obj->base.dev); 423 struct i915_ggtt *ggtt = &i915->ggtt; 424 intel_wakeref_t wakeref; 425 struct drm_mm_node node; 426 struct dma_fence *fence; 427 void __user *user_data; 428 struct i915_vma *vma; 429 u64 remain, offset; 430 int ret; 431 432 wakeref = intel_runtime_pm_get(&i915->runtime_pm); 433 vma = ERR_PTR(-ENODEV); 434 if (!i915_gem_object_is_tiled(obj)) 435 vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 436 PIN_MAPPABLE | 437 PIN_NONBLOCK /* NOWARN */ | 438 PIN_NOEVICT); 439 if (!IS_ERR(vma)) { 440 node.start = i915_ggtt_offset(vma); 441 #ifdef notyet 442 node.flags = 0; 443 #else 444 node.hole_follows = 0; 445 node.allocated = 0; 446 node.scanned_block = 0; 447 #endif 448 } else { 449 ret = insert_mappable_node(ggtt, &node, PAGE_SIZE); 450 if (ret) 451 goto out_rpm; 452 GEM_BUG_ON(!drm_mm_node_allocated(&node)); 453 } 454 455 ret = i915_gem_object_lock_interruptible(obj); 456 if (ret) 457 goto out_unpin; 458 459 ret = i915_gem_object_set_to_gtt_domain(obj, false); 460 if (ret) { 461 i915_gem_object_unlock(obj); 462 goto out_unpin; 463 } 464 465 fence = i915_gem_object_lock_fence(obj); 466 i915_gem_object_unlock(obj); 467 if (!fence) { 468 ret = -ENOMEM; 469 goto out_unpin; 470 } 471 472 user_data = u64_to_user_ptr(args->data_ptr); 473 remain = args->size; 474 offset = args->offset; 475 476 while (remain > 0) { 477 /* Operation in this page 478 * 479 * page_base = page offset within aperture 480 * page_offset = offset within page 481 * page_length = bytes to copy for this page 482 */ 483 u32 page_base = node.start; 484 unsigned page_offset = offset_in_page(offset); 485 unsigned page_length = PAGE_SIZE - page_offset; 486 page_length = remain < page_length ? remain : page_length; 487 if (drm_mm_node_allocated(&node)) { 488 ggtt->vm.insert_page(&ggtt->vm, 489 i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT), 490 node.start, I915_CACHE_NONE, 0); 491 } else { 492 page_base += offset & ~PAGE_MASK; 493 } 494 495 if (gtt_user_read(i915, page_base, page_offset, 496 user_data, page_length)) { 497 ret = -EFAULT; 498 break; 499 } 500 501 remain -= page_length; 502 user_data += page_length; 503 offset += page_length; 504 } 505 506 i915_gem_object_unlock_fence(obj, fence); 507 out_unpin: 508 if (drm_mm_node_allocated(&node)) { 509 ggtt->vm.clear_range(&ggtt->vm, node.start, node.size); 510 remove_mappable_node(ggtt, &node); 511 } else { 512 i915_vma_unpin(vma); 513 } 514 out_rpm: 515 intel_runtime_pm_put(&i915->runtime_pm, wakeref); 516 return ret; 517 } 518 519 /** 520 * Reads data from the object referenced by handle. 521 * @dev: drm device pointer 522 * @data: ioctl data blob 523 * @file: drm file pointer 524 * 525 * On error, the contents of *data are undefined. 526 */ 527 int 528 i915_gem_pread_ioctl(struct drm_device *dev, void *data, 529 struct drm_file *file) 530 { 531 struct drm_i915_gem_pread *args = data; 532 struct drm_i915_gem_object *obj; 533 int ret; 534 535 if (args->size == 0) 536 return 0; 537 538 if (!access_ok(u64_to_user_ptr(args->data_ptr), 539 args->size)) 540 return -EFAULT; 541 542 obj = i915_gem_object_lookup(file, args->handle); 543 if (!obj) 544 return -ENOENT; 545 546 /* Bounds check source. */ 547 if (range_overflows_t(u64, args->offset, args->size, obj->base.size)) { 548 ret = -EINVAL; 549 goto out; 550 } 551 552 trace_i915_gem_object_pread(obj, args->offset, args->size); 553 554 ret = i915_gem_object_wait(obj, 555 I915_WAIT_INTERRUPTIBLE, 556 MAX_SCHEDULE_TIMEOUT); 557 if (ret) 558 goto out; 559 560 ret = i915_gem_object_pin_pages(obj); 561 if (ret) 562 goto out; 563 564 ret = i915_gem_shmem_pread(obj, args); 565 if (ret == -EFAULT || ret == -ENODEV) 566 ret = i915_gem_gtt_pread(obj, args); 567 568 i915_gem_object_unpin_pages(obj); 569 out: 570 i915_gem_object_put(obj); 571 return ret; 572 } 573 574 /* This is the fast write path which cannot handle 575 * page faults in the source data 576 */ 577 #ifdef __linux__ 578 static inline bool 579 ggtt_write(struct io_mapping *mapping, 580 loff_t base, int offset, 581 char __user *user_data, int length) 582 { 583 void __iomem *vaddr; 584 unsigned long unwritten; 585 586 /* We can use the cpu mem copy function because this is X86. */ 587 vaddr = io_mapping_map_atomic_wc(mapping, base); 588 unwritten = __copy_from_user_inatomic_nocache((void __force *)vaddr + offset, 589 user_data, length); 590 io_mapping_unmap_atomic(vaddr); 591 if (unwritten) { 592 vaddr = io_mapping_map_wc(mapping, base, PAGE_SIZE); 593 unwritten = copy_from_user((void __force *)vaddr + offset, 594 user_data, length); 595 io_mapping_unmap(vaddr); 596 } 597 598 return unwritten; 599 } 600 #else 601 static inline bool 602 ggtt_write(struct drm_i915_private *dev_priv, 603 loff_t base, int offset, 604 char __user *user_data, int length) 605 { 606 bus_space_handle_t bsh; 607 void __iomem *vaddr; 608 unsigned long unwritten; 609 610 /* We can use the cpu mem copy function because this is X86. */ 611 agp_map_atomic(dev_priv->agph, base, &bsh); 612 vaddr = bus_space_vaddr(dev_priv->bst, bsh); 613 unwritten = __copy_from_user_inatomic_nocache((void __force *)vaddr + offset, 614 user_data, length); 615 agp_unmap_atomic(dev_priv->agph, bsh); 616 if (unwritten) { 617 agp_map_subregion(dev_priv->agph, base, PAGE_SIZE, &bsh); 618 vaddr = bus_space_vaddr(dev_priv->bst, bsh); 619 unwritten = copy_from_user((void __force *)vaddr + offset, 620 user_data, length); 621 agp_unmap_subregion(dev_priv->agph, bsh, PAGE_SIZE); 622 } 623 624 return unwritten; 625 } 626 #endif 627 628 /** 629 * This is the fast pwrite path, where we copy the data directly from the 630 * user into the GTT, uncached. 631 * @obj: i915 GEM object 632 * @args: pwrite arguments structure 633 */ 634 static int 635 i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj, 636 const struct drm_i915_gem_pwrite *args) 637 { 638 struct drm_i915_private *i915 = to_i915(obj->base.dev); 639 struct i915_ggtt *ggtt = &i915->ggtt; 640 struct intel_runtime_pm *rpm = &i915->runtime_pm; 641 intel_wakeref_t wakeref; 642 struct drm_mm_node node; 643 struct dma_fence *fence; 644 struct i915_vma *vma; 645 u64 remain, offset; 646 void __user *user_data; 647 int ret; 648 649 if (i915_gem_object_has_struct_page(obj)) { 650 /* 651 * Avoid waking the device up if we can fallback, as 652 * waking/resuming is very slow (worst-case 10-100 ms 653 * depending on PCI sleeps and our own resume time). 654 * This easily dwarfs any performance advantage from 655 * using the cache bypass of indirect GGTT access. 656 */ 657 wakeref = intel_runtime_pm_get_if_in_use(rpm); 658 if (!wakeref) 659 return -EFAULT; 660 } else { 661 /* No backing pages, no fallback, we must force GGTT access */ 662 wakeref = intel_runtime_pm_get(rpm); 663 } 664 665 vma = ERR_PTR(-ENODEV); 666 if (!i915_gem_object_is_tiled(obj)) 667 vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 668 PIN_MAPPABLE | 669 PIN_NONBLOCK /* NOWARN */ | 670 PIN_NOEVICT); 671 if (!IS_ERR(vma)) { 672 node.start = i915_ggtt_offset(vma); 673 #ifdef notyet 674 node.flags = 0; 675 #else 676 node.hole_follows = 0; 677 node.allocated = 0; 678 node.scanned_block = 0; 679 #endif 680 } else { 681 ret = insert_mappable_node(ggtt, &node, PAGE_SIZE); 682 if (ret) 683 goto out_rpm; 684 GEM_BUG_ON(!drm_mm_node_allocated(&node)); 685 } 686 687 ret = i915_gem_object_lock_interruptible(obj); 688 if (ret) 689 goto out_unpin; 690 691 ret = i915_gem_object_set_to_gtt_domain(obj, true); 692 if (ret) { 693 i915_gem_object_unlock(obj); 694 goto out_unpin; 695 } 696 697 fence = i915_gem_object_lock_fence(obj); 698 i915_gem_object_unlock(obj); 699 if (!fence) { 700 ret = -ENOMEM; 701 goto out_unpin; 702 } 703 704 i915_gem_object_invalidate_frontbuffer(obj, ORIGIN_CPU); 705 706 user_data = u64_to_user_ptr(args->data_ptr); 707 offset = args->offset; 708 remain = args->size; 709 while (remain) { 710 /* Operation in this page 711 * 712 * page_base = page offset within aperture 713 * page_offset = offset within page 714 * page_length = bytes to copy for this page 715 */ 716 u32 page_base = node.start; 717 unsigned int page_offset = offset_in_page(offset); 718 unsigned int page_length = PAGE_SIZE - page_offset; 719 page_length = remain < page_length ? remain : page_length; 720 if (drm_mm_node_allocated(&node)) { 721 /* flush the write before we modify the GGTT */ 722 intel_gt_flush_ggtt_writes(ggtt->vm.gt); 723 ggtt->vm.insert_page(&ggtt->vm, 724 i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT), 725 node.start, I915_CACHE_NONE, 0); 726 wmb(); /* flush modifications to the GGTT (insert_page) */ 727 } else { 728 page_base += offset & ~PAGE_MASK; 729 } 730 /* If we get a fault while copying data, then (presumably) our 731 * source page isn't available. Return the error and we'll 732 * retry in the slow path. 733 * If the object is non-shmem backed, we retry again with the 734 * path that handles page fault. 735 */ 736 if (ggtt_write(i915, page_base, page_offset, 737 user_data, page_length)) { 738 ret = -EFAULT; 739 break; 740 } 741 742 remain -= page_length; 743 user_data += page_length; 744 offset += page_length; 745 } 746 747 intel_gt_flush_ggtt_writes(ggtt->vm.gt); 748 i915_gem_object_flush_frontbuffer(obj, ORIGIN_CPU); 749 750 i915_gem_object_unlock_fence(obj, fence); 751 out_unpin: 752 if (drm_mm_node_allocated(&node)) { 753 ggtt->vm.clear_range(&ggtt->vm, node.start, node.size); 754 remove_mappable_node(ggtt, &node); 755 } else { 756 i915_vma_unpin(vma); 757 } 758 out_rpm: 759 intel_runtime_pm_put(rpm, wakeref); 760 return ret; 761 } 762 763 /* Per-page copy function for the shmem pwrite fastpath. 764 * Flushes invalid cachelines before writing to the target if 765 * needs_clflush_before is set and flushes out any written cachelines after 766 * writing if needs_clflush is set. 767 */ 768 static int 769 shmem_pwrite(struct vm_page *page, int offset, int len, char __user *user_data, 770 bool needs_clflush_before, 771 bool needs_clflush_after) 772 { 773 char *vaddr; 774 int ret; 775 776 vaddr = kmap(page); 777 778 if (needs_clflush_before) 779 drm_clflush_virt_range(vaddr + offset, len); 780 781 ret = __copy_from_user(vaddr + offset, user_data, len); 782 if (!ret && needs_clflush_after) 783 drm_clflush_virt_range(vaddr + offset, len); 784 785 kunmap_va(vaddr); 786 787 return ret ? -EFAULT : 0; 788 } 789 790 static int 791 i915_gem_shmem_pwrite(struct drm_i915_gem_object *obj, 792 const struct drm_i915_gem_pwrite *args) 793 { 794 unsigned int partial_cacheline_write; 795 unsigned int needs_clflush; 796 unsigned int offset, idx; 797 struct dma_fence *fence; 798 void __user *user_data; 799 u64 remain; 800 int ret; 801 802 ret = i915_gem_object_prepare_write(obj, &needs_clflush); 803 if (ret) 804 return ret; 805 806 fence = i915_gem_object_lock_fence(obj); 807 i915_gem_object_finish_access(obj); 808 if (!fence) 809 return -ENOMEM; 810 811 /* If we don't overwrite a cacheline completely we need to be 812 * careful to have up-to-date data by first clflushing. Don't 813 * overcomplicate things and flush the entire patch. 814 */ 815 partial_cacheline_write = 0; 816 if (needs_clflush & CLFLUSH_BEFORE) 817 partial_cacheline_write = curcpu()->ci_cflushsz - 1; 818 819 user_data = u64_to_user_ptr(args->data_ptr); 820 remain = args->size; 821 offset = offset_in_page(args->offset); 822 for (idx = args->offset >> PAGE_SHIFT; remain; idx++) { 823 struct vm_page *page = i915_gem_object_get_page(obj, idx); 824 unsigned int length = min_t(u64, remain, PAGE_SIZE - offset); 825 826 ret = shmem_pwrite(page, offset, length, user_data, 827 (offset | length) & partial_cacheline_write, 828 needs_clflush & CLFLUSH_AFTER); 829 if (ret) 830 break; 831 832 remain -= length; 833 user_data += length; 834 offset = 0; 835 } 836 837 i915_gem_object_flush_frontbuffer(obj, ORIGIN_CPU); 838 i915_gem_object_unlock_fence(obj, fence); 839 840 return ret; 841 } 842 843 /** 844 * Writes data to the object referenced by handle. 845 * @dev: drm device 846 * @data: ioctl data blob 847 * @file: drm file 848 * 849 * On error, the contents of the buffer that were to be modified are undefined. 850 */ 851 int 852 i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, 853 struct drm_file *file) 854 { 855 struct drm_i915_gem_pwrite *args = data; 856 struct drm_i915_gem_object *obj; 857 int ret; 858 859 if (args->size == 0) 860 return 0; 861 862 if (!access_ok(u64_to_user_ptr(args->data_ptr), args->size)) 863 return -EFAULT; 864 865 obj = i915_gem_object_lookup(file, args->handle); 866 if (!obj) 867 return -ENOENT; 868 869 /* Bounds check destination. */ 870 if (range_overflows_t(u64, args->offset, args->size, obj->base.size)) { 871 ret = -EINVAL; 872 goto err; 873 } 874 875 /* Writes not allowed into this read-only object */ 876 if (i915_gem_object_is_readonly(obj)) { 877 ret = -EINVAL; 878 goto err; 879 } 880 881 trace_i915_gem_object_pwrite(obj, args->offset, args->size); 882 883 ret = -ENODEV; 884 if (obj->ops->pwrite) 885 ret = obj->ops->pwrite(obj, args); 886 if (ret != -ENODEV) 887 goto err; 888 889 ret = i915_gem_object_wait(obj, 890 I915_WAIT_INTERRUPTIBLE | 891 I915_WAIT_ALL, 892 MAX_SCHEDULE_TIMEOUT); 893 if (ret) 894 goto err; 895 896 ret = i915_gem_object_pin_pages(obj); 897 if (ret) 898 goto err; 899 900 ret = -EFAULT; 901 /* We can only do the GTT pwrite on untiled buffers, as otherwise 902 * it would end up going through the fenced access, and we'll get 903 * different detiling behavior between reading and writing. 904 * pread/pwrite currently are reading and writing from the CPU 905 * perspective, requiring manual detiling by the client. 906 */ 907 if (!i915_gem_object_has_struct_page(obj) || 908 cpu_write_needs_clflush(obj)) 909 /* Note that the gtt paths might fail with non-page-backed user 910 * pointers (e.g. gtt mappings when moving data between 911 * textures). Fallback to the shmem path in that case. 912 */ 913 ret = i915_gem_gtt_pwrite_fast(obj, args); 914 915 if (ret == -EFAULT || ret == -ENOSPC) { 916 if (i915_gem_object_has_struct_page(obj)) 917 ret = i915_gem_shmem_pwrite(obj, args); 918 else 919 ret = i915_gem_phys_pwrite(obj, args, file); 920 } 921 922 i915_gem_object_unpin_pages(obj); 923 err: 924 i915_gem_object_put(obj); 925 return ret; 926 } 927 928 /** 929 * Called when user space has done writes to this buffer 930 * @dev: drm device 931 * @data: ioctl data blob 932 * @file: drm file 933 */ 934 int 935 i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data, 936 struct drm_file *file) 937 { 938 struct drm_i915_gem_sw_finish *args = data; 939 struct drm_i915_gem_object *obj; 940 941 obj = i915_gem_object_lookup(file, args->handle); 942 if (!obj) 943 return -ENOENT; 944 945 /* 946 * Proxy objects are barred from CPU access, so there is no 947 * need to ban sw_finish as it is a nop. 948 */ 949 950 /* Pinned buffers may be scanout, so flush the cache */ 951 i915_gem_object_flush_if_display(obj); 952 i915_gem_object_put(obj); 953 954 return 0; 955 } 956 957 void i915_gem_runtime_suspend(struct drm_i915_private *i915) 958 { 959 struct drm_i915_gem_object *obj, *on; 960 int i; 961 962 /* 963 * Only called during RPM suspend. All users of the userfault_list 964 * must be holding an RPM wakeref to ensure that this can not 965 * run concurrently with themselves (and use the struct_mutex for 966 * protection between themselves). 967 */ 968 969 list_for_each_entry_safe(obj, on, 970 &i915->ggtt.userfault_list, userfault_link) 971 __i915_gem_object_release_mmap_gtt(obj); 972 973 /* 974 * The fence will be lost when the device powers down. If any were 975 * in use by hardware (i.e. they are pinned), we should not be powering 976 * down! All other fences will be reacquired by the user upon waking. 977 */ 978 for (i = 0; i < i915->ggtt.num_fences; i++) { 979 struct i915_fence_reg *reg = &i915->ggtt.fence_regs[i]; 980 981 /* 982 * Ideally we want to assert that the fence register is not 983 * live at this point (i.e. that no piece of code will be 984 * trying to write through fence + GTT, as that both violates 985 * our tracking of activity and associated locking/barriers, 986 * but also is illegal given that the hw is powered down). 987 * 988 * Previously we used reg->pin_count as a "liveness" indicator. 989 * That is not sufficient, and we need a more fine-grained 990 * tool if we want to have a sanity check here. 991 */ 992 993 if (!reg->vma) 994 continue; 995 996 GEM_BUG_ON(i915_vma_has_userfault(reg->vma)); 997 reg->dirty = true; 998 } 999 } 1000 1001 struct i915_vma * 1002 i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, 1003 const struct i915_ggtt_view *view, 1004 u64 size, 1005 u64 alignment, 1006 u64 flags) 1007 { 1008 struct drm_i915_private *i915 = to_i915(obj->base.dev); 1009 struct i915_ggtt *ggtt = &i915->ggtt; 1010 struct i915_vma *vma; 1011 int ret; 1012 1013 if (flags & PIN_MAPPABLE && 1014 (!view || view->type == I915_GGTT_VIEW_NORMAL)) { 1015 /* 1016 * If the required space is larger than the available 1017 * aperture, we will not able to find a slot for the 1018 * object and unbinding the object now will be in 1019 * vain. Worse, doing so may cause us to ping-pong 1020 * the object in and out of the Global GTT and 1021 * waste a lot of cycles under the mutex. 1022 */ 1023 if (obj->base.size > ggtt->mappable_end) 1024 return ERR_PTR(-E2BIG); 1025 1026 /* 1027 * If NONBLOCK is set the caller is optimistically 1028 * trying to cache the full object within the mappable 1029 * aperture, and *must* have a fallback in place for 1030 * situations where we cannot bind the object. We 1031 * can be a little more lax here and use the fallback 1032 * more often to avoid costly migrations of ourselves 1033 * and other objects within the aperture. 1034 * 1035 * Half-the-aperture is used as a simple heuristic. 1036 * More interesting would to do search for a free 1037 * block prior to making the commitment to unbind. 1038 * That caters for the self-harm case, and with a 1039 * little more heuristics (e.g. NOFAULT, NOEVICT) 1040 * we could try to minimise harm to others. 1041 */ 1042 if (flags & PIN_NONBLOCK && 1043 obj->base.size > ggtt->mappable_end / 2) 1044 return ERR_PTR(-ENOSPC); 1045 } 1046 1047 vma = i915_vma_instance(obj, &ggtt->vm, view); 1048 if (IS_ERR(vma)) 1049 return vma; 1050 1051 if (i915_vma_misplaced(vma, size, alignment, flags)) { 1052 if (flags & PIN_NONBLOCK) { 1053 if (i915_vma_is_pinned(vma) || i915_vma_is_active(vma)) 1054 return ERR_PTR(-ENOSPC); 1055 1056 if (flags & PIN_MAPPABLE && 1057 vma->fence_size > ggtt->mappable_end / 2) 1058 return ERR_PTR(-ENOSPC); 1059 } 1060 1061 ret = i915_vma_unbind(vma); 1062 if (ret) 1063 return ERR_PTR(ret); 1064 } 1065 1066 if (vma->fence && !i915_gem_object_is_tiled(obj)) { 1067 mutex_lock(&ggtt->vm.mutex); 1068 ret = i915_vma_revoke_fence(vma); 1069 mutex_unlock(&ggtt->vm.mutex); 1070 if (ret) 1071 return ERR_PTR(ret); 1072 } 1073 1074 ret = i915_vma_pin(vma, size, alignment, flags | PIN_GLOBAL); 1075 if (ret) 1076 return ERR_PTR(ret); 1077 1078 ret = i915_vma_wait_for_bind(vma); 1079 if (ret) { 1080 i915_vma_unpin(vma); 1081 return ERR_PTR(ret); 1082 } 1083 1084 return vma; 1085 } 1086 1087 int 1088 i915_gem_madvise_ioctl(struct drm_device *dev, void *data, 1089 struct drm_file *file_priv) 1090 { 1091 struct drm_i915_private *i915 = to_i915(dev); 1092 struct drm_i915_gem_madvise *args = data; 1093 struct drm_i915_gem_object *obj; 1094 int err; 1095 1096 switch (args->madv) { 1097 case I915_MADV_DONTNEED: 1098 case I915_MADV_WILLNEED: 1099 break; 1100 default: 1101 return -EINVAL; 1102 } 1103 1104 obj = i915_gem_object_lookup(file_priv, args->handle); 1105 if (!obj) 1106 return -ENOENT; 1107 1108 err = mutex_lock_interruptible(&obj->mm.lock); 1109 if (err) 1110 goto out; 1111 1112 if (i915_gem_object_has_pages(obj) && 1113 i915_gem_object_is_tiled(obj) && 1114 i915->quirks & QUIRK_PIN_SWIZZLED_PAGES) { 1115 if (obj->mm.madv == I915_MADV_WILLNEED) { 1116 GEM_BUG_ON(!obj->mm.quirked); 1117 __i915_gem_object_unpin_pages(obj); 1118 obj->mm.quirked = false; 1119 } 1120 if (args->madv == I915_MADV_WILLNEED) { 1121 GEM_BUG_ON(obj->mm.quirked); 1122 __i915_gem_object_pin_pages(obj); 1123 obj->mm.quirked = true; 1124 } 1125 } 1126 1127 if (obj->mm.madv != __I915_MADV_PURGED) 1128 obj->mm.madv = args->madv; 1129 1130 if (i915_gem_object_has_pages(obj)) { 1131 struct list_head *list; 1132 1133 if (i915_gem_object_is_shrinkable(obj)) { 1134 unsigned long flags; 1135 1136 spin_lock_irqsave(&i915->mm.obj_lock, flags); 1137 1138 if (obj->mm.madv != I915_MADV_WILLNEED) 1139 list = &i915->mm.purge_list; 1140 else 1141 list = &i915->mm.shrink_list; 1142 list_move_tail(&obj->mm.link, list); 1143 1144 spin_unlock_irqrestore(&i915->mm.obj_lock, flags); 1145 } 1146 } 1147 1148 /* if the object is no longer attached, discard its backing storage */ 1149 if (obj->mm.madv == I915_MADV_DONTNEED && 1150 !i915_gem_object_has_pages(obj)) 1151 i915_gem_object_truncate(obj); 1152 1153 args->retained = obj->mm.madv != __I915_MADV_PURGED; 1154 mutex_unlock(&obj->mm.lock); 1155 1156 out: 1157 i915_gem_object_put(obj); 1158 return err; 1159 } 1160 1161 int i915_gem_init(struct drm_i915_private *dev_priv) 1162 { 1163 int ret; 1164 1165 /* We need to fallback to 4K pages if host doesn't support huge gtt. */ 1166 if (intel_vgpu_active(dev_priv) && !intel_vgpu_has_huge_gtt(dev_priv)) 1167 mkwrite_device_info(dev_priv)->page_sizes = 1168 I915_GTT_PAGE_SIZE_4K; 1169 1170 ret = i915_gem_init_userptr(dev_priv); 1171 if (ret) 1172 return ret; 1173 1174 intel_uc_fetch_firmwares(&dev_priv->gt.uc); 1175 intel_wopcm_init(&dev_priv->wopcm); 1176 1177 ret = i915_init_ggtt(dev_priv); 1178 if (ret) { 1179 GEM_BUG_ON(ret == -EIO); 1180 goto err_unlock; 1181 } 1182 1183 /* 1184 * Despite its name intel_init_clock_gating applies both display 1185 * clock gating workarounds; GT mmio workarounds and the occasional 1186 * GT power context workaround. Worse, sometimes it includes a context 1187 * register workaround which we need to apply before we record the 1188 * default HW state for all contexts. 1189 * 1190 * FIXME: break up the workarounds and apply them at the right time! 1191 */ 1192 intel_init_clock_gating(dev_priv); 1193 1194 ret = intel_gt_init(&dev_priv->gt); 1195 if (ret) 1196 goto err_unlock; 1197 1198 return 0; 1199 1200 /* 1201 * Unwinding is complicated by that we want to handle -EIO to mean 1202 * disable GPU submission but keep KMS alive. We want to mark the 1203 * HW as irrevisibly wedged, but keep enough state around that the 1204 * driver doesn't explode during runtime. 1205 */ 1206 err_unlock: 1207 i915_gem_drain_workqueue(dev_priv); 1208 1209 if (ret != -EIO) { 1210 intel_uc_cleanup_firmwares(&dev_priv->gt.uc); 1211 i915_gem_cleanup_userptr(dev_priv); 1212 } 1213 1214 if (ret == -EIO) { 1215 /* 1216 * Allow engines or uC initialisation to fail by marking the GPU 1217 * as wedged. But we only want to do this when the GPU is angry, 1218 * for all other failure, such as an allocation failure, bail. 1219 */ 1220 if (!intel_gt_is_wedged(&dev_priv->gt)) { 1221 i915_probe_error(dev_priv, 1222 "Failed to initialize GPU, declaring it wedged!\n"); 1223 intel_gt_set_wedged(&dev_priv->gt); 1224 } 1225 1226 /* Minimal basic recovery for KMS */ 1227 ret = i915_ggtt_enable_hw(dev_priv); 1228 i915_ggtt_resume(&dev_priv->ggtt); 1229 i915_gem_restore_fences(&dev_priv->ggtt); 1230 intel_init_clock_gating(dev_priv); 1231 } 1232 1233 i915_gem_drain_freed_objects(dev_priv); 1234 return ret; 1235 } 1236 1237 void i915_gem_driver_register(struct drm_i915_private *i915) 1238 { 1239 i915_gem_driver_register__shrinker(i915); 1240 1241 intel_engines_driver_register(i915); 1242 } 1243 1244 void i915_gem_driver_unregister(struct drm_i915_private *i915) 1245 { 1246 i915_gem_driver_unregister__shrinker(i915); 1247 } 1248 1249 void i915_gem_driver_remove(struct drm_i915_private *dev_priv) 1250 { 1251 intel_wakeref_auto_fini(&dev_priv->ggtt.userfault_wakeref); 1252 1253 i915_gem_suspend_late(dev_priv); 1254 intel_gt_driver_remove(&dev_priv->gt); 1255 dev_priv->uabi_engines = RB_ROOT; 1256 1257 /* Flush any outstanding unpin_work. */ 1258 i915_gem_drain_workqueue(dev_priv); 1259 1260 i915_gem_drain_freed_objects(dev_priv); 1261 } 1262 1263 void i915_gem_driver_release(struct drm_i915_private *dev_priv) 1264 { 1265 i915_gem_driver_release__contexts(dev_priv); 1266 1267 intel_gt_driver_release(&dev_priv->gt); 1268 1269 intel_wa_list_free(&dev_priv->gt_wa_list); 1270 1271 intel_uc_cleanup_firmwares(&dev_priv->gt.uc); 1272 i915_gem_cleanup_userptr(dev_priv); 1273 1274 i915_gem_drain_freed_objects(dev_priv); 1275 1276 drm_WARN_ON(&dev_priv->drm, !list_empty(&dev_priv->gem.contexts.list)); 1277 } 1278 1279 static void i915_gem_init__mm(struct drm_i915_private *i915) 1280 { 1281 mtx_init(&i915->mm.obj_lock, IPL_NONE); 1282 1283 init_llist_head(&i915->mm.free_list); 1284 1285 INIT_LIST_HEAD(&i915->mm.purge_list); 1286 INIT_LIST_HEAD(&i915->mm.shrink_list); 1287 1288 i915_gem_init__objects(i915); 1289 } 1290 1291 void i915_gem_init_early(struct drm_i915_private *dev_priv) 1292 { 1293 i915_gem_init__mm(dev_priv); 1294 i915_gem_init__contexts(dev_priv); 1295 1296 mtx_init(&dev_priv->fb_tracking.lock, IPL_NONE); 1297 } 1298 1299 void i915_gem_cleanup_early(struct drm_i915_private *dev_priv) 1300 { 1301 i915_gem_drain_freed_objects(dev_priv); 1302 GEM_BUG_ON(!llist_empty(&dev_priv->mm.free_list)); 1303 GEM_BUG_ON(atomic_read(&dev_priv->mm.free_count)); 1304 drm_WARN_ON(&dev_priv->drm, dev_priv->mm.shrink_count); 1305 } 1306 1307 int i915_gem_freeze(struct drm_i915_private *dev_priv) 1308 { 1309 /* Discard all purgeable objects, let userspace recover those as 1310 * required after resuming. 1311 */ 1312 i915_gem_shrink_all(dev_priv); 1313 1314 return 0; 1315 } 1316 1317 int i915_gem_freeze_late(struct drm_i915_private *i915) 1318 { 1319 struct drm_i915_gem_object *obj; 1320 intel_wakeref_t wakeref; 1321 1322 /* 1323 * Called just before we write the hibernation image. 1324 * 1325 * We need to update the domain tracking to reflect that the CPU 1326 * will be accessing all the pages to create and restore from the 1327 * hibernation, and so upon restoration those pages will be in the 1328 * CPU domain. 1329 * 1330 * To make sure the hibernation image contains the latest state, 1331 * we update that state just before writing out the image. 1332 * 1333 * To try and reduce the hibernation image, we manually shrink 1334 * the objects as well, see i915_gem_freeze() 1335 */ 1336 1337 wakeref = intel_runtime_pm_get(&i915->runtime_pm); 1338 1339 i915_gem_shrink(i915, -1UL, NULL, ~0); 1340 i915_gem_drain_freed_objects(i915); 1341 1342 list_for_each_entry(obj, &i915->mm.shrink_list, mm.link) { 1343 i915_gem_object_lock(obj); 1344 drm_WARN_ON(&i915->drm, 1345 i915_gem_object_set_to_cpu_domain(obj, true)); 1346 i915_gem_object_unlock(obj); 1347 } 1348 1349 intel_runtime_pm_put(&i915->runtime_pm, wakeref); 1350 1351 return 0; 1352 } 1353 1354 void i915_gem_release(struct drm_device *dev, struct drm_file *file) 1355 { 1356 struct drm_i915_file_private *file_priv = file->driver_priv; 1357 struct i915_request *request; 1358 1359 /* Clean up our request list when the client is going away, so that 1360 * later retire_requests won't dereference our soon-to-be-gone 1361 * file_priv. 1362 */ 1363 spin_lock(&file_priv->mm.lock); 1364 list_for_each_entry(request, &file_priv->mm.request_list, client_link) 1365 request->file_priv = NULL; 1366 spin_unlock(&file_priv->mm.lock); 1367 } 1368 1369 int i915_gem_open(struct drm_i915_private *i915, struct drm_file *file) 1370 { 1371 struct drm_i915_file_private *file_priv; 1372 int ret; 1373 1374 DRM_DEBUG("\n"); 1375 1376 file_priv = kzalloc(sizeof(*file_priv), GFP_KERNEL); 1377 if (!file_priv) 1378 return -ENOMEM; 1379 1380 file->driver_priv = file_priv; 1381 file_priv->dev_priv = i915; 1382 file_priv->file = file; 1383 1384 mtx_init(&file_priv->mm.lock, IPL_NONE); 1385 INIT_LIST_HEAD(&file_priv->mm.request_list); 1386 1387 file_priv->bsd_engine = -1; 1388 file_priv->hang_timestamp = jiffies; 1389 1390 ret = i915_gem_context_open(i915, file); 1391 if (ret) 1392 kfree(file_priv); 1393 1394 return ret; 1395 } 1396 1397 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) 1398 #include "selftests/mock_gem_device.c" 1399 #include "selftests/i915_gem.c" 1400 #endif 1401