1 /* $NetBSD: intel_ggtt.c,v 1.19 2025/01/26 18:23:56 riastradh Exp $ */ 2 3 // SPDX-License-Identifier: MIT 4 /* 5 * Copyright © 2020 Intel Corporation 6 */ 7 8 #include <sys/cdefs.h> 9 __KERNEL_RCSID(0, "$NetBSD: intel_ggtt.c,v 1.19 2025/01/26 18:23:56 riastradh Exp $"); 10 11 #include <linux/stop_machine.h> 12 13 #include <asm/set_memory.h> 14 #include <asm/smp.h> 15 16 #ifdef __NetBSD__ 17 #include <drm/io-mapping.h> 18 #endif 19 20 #include "intel_gt.h" 21 #include "i915_drv.h" 22 #include "i915_scatterlist.h" 23 #include "i915_vgpu.h" 24 25 #include "intel_gtt.h" 26 27 #include <linux/nbsd-namespace.h> 28 29 static int 30 i915_get_ggtt_vma_pages(struct i915_vma *vma); 31 32 static void i915_ggtt_color_adjust(const struct drm_mm_node *node, 33 unsigned long color, 34 u64 *start, 35 u64 *end) 36 { 37 if (i915_node_color_differs(node, color)) 38 *start += I915_GTT_PAGE_SIZE; 39 40 /* 41 * Also leave a space between the unallocated reserved node after the 42 * GTT and any objects within the GTT, i.e. we use the color adjustment 43 * to insert a guard page to prevent prefetches crossing over the 44 * GTT boundary. 45 */ 46 node = list_next_entry(node, node_list); 47 if (node->color != color) 48 *end -= I915_GTT_PAGE_SIZE; 49 } 50 51 static int ggtt_init_hw(struct i915_ggtt *ggtt) 52 { 53 struct drm_i915_private *i915 = ggtt->vm.i915; 54 55 i915_address_space_init(&ggtt->vm, VM_CLASS_GGTT); 56 57 ggtt->vm.is_ggtt = true; 58 59 /* Only VLV supports read-only GGTT mappings */ 60 ggtt->vm.has_read_only = IS_VALLEYVIEW(i915); 61 62 if (!HAS_LLC(i915) && !HAS_PPGTT(i915)) 63 ggtt->vm.mm.color_adjust = i915_ggtt_color_adjust; 64 65 if (ggtt->mappable_end) { 66 #ifdef __NetBSD__ 67 if (!drm_io_mapping_init_wc(&i915->drm, &ggtt->iomap, 68 ggtt->gmadr.start, ggtt->mappable_end)) { 69 ggtt->vm.cleanup(&ggtt->vm); 70 return -EIO; 71 } 72 /* 73 * Note: mappable_end is the size, not end paddr, of 74 * the aperture. 75 */ 76 pmap_pv_track(ggtt->gmadr.start, ggtt->mappable_end); 77 #else 78 if (!io_mapping_init_wc(&ggtt->iomap, 79 ggtt->gmadr.start, 80 ggtt->mappable_end)) { 81 ggtt->vm.cleanup(&ggtt->vm); 82 return -EIO; 83 } 84 #endif 85 86 ggtt->mtrr = arch_phys_wc_add(ggtt->gmadr.start, 87 ggtt->mappable_end); 88 } 89 90 i915_ggtt_init_fences(ggtt); 91 92 return 0; 93 } 94 95 /** 96 * i915_ggtt_init_hw - Initialize GGTT hardware 97 * @i915: i915 device 98 */ 99 int i915_ggtt_init_hw(struct drm_i915_private *i915) 100 { 101 int ret; 102 103 #ifndef __NetBSD__ 104 stash_init(&i915->mm.wc_stash); 105 #endif 106 107 /* 108 * Note that we use page colouring to enforce a guard page at the 109 * end of the address space. This is required as the CS may prefetch 110 * beyond the end of the batch buffer, across the page boundary, 111 * and beyond the end of the GTT if we do not provide a guard. 112 */ 113 ret = ggtt_init_hw(&i915->ggtt); 114 if (ret) 115 return ret; 116 117 return 0; 118 } 119 120 /* 121 * Certain Gen5 chipsets require require idling the GPU before 122 * unmapping anything from the GTT when VT-d is enabled. 123 */ 124 static bool needs_idle_maps(struct drm_i915_private *i915) 125 { 126 /* 127 * Query intel_iommu to see if we need the workaround. Presumably that 128 * was loaded first. 129 */ 130 return IS_GEN(i915, 5) && IS_MOBILE(i915) && intel_vtd_active(); 131 } 132 133 static void ggtt_suspend_mappings(struct i915_ggtt *ggtt) 134 { 135 struct drm_i915_private *i915 = ggtt->vm.i915; 136 137 /* 138 * Don't bother messing with faults pre GEN6 as we have little 139 * documentation supporting that it's a good idea. 140 */ 141 if (INTEL_GEN(i915) < 6) 142 return; 143 144 intel_gt_check_and_clear_faults(ggtt->vm.gt); 145 146 ggtt->vm.clear_range(&ggtt->vm, 0, ggtt->vm.total); 147 148 ggtt->invalidate(ggtt); 149 } 150 151 void i915_gem_suspend_gtt_mappings(struct drm_i915_private *i915) 152 { 153 ggtt_suspend_mappings(&i915->ggtt); 154 } 155 156 void gen6_ggtt_invalidate(struct i915_ggtt *ggtt) 157 { 158 struct intel_uncore *uncore = ggtt->vm.gt->uncore; 159 160 spin_lock_irq(&uncore->lock); 161 intel_uncore_write_fw(uncore, GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); 162 intel_uncore_read_fw(uncore, GFX_FLSH_CNTL_GEN6); 163 spin_unlock_irq(&uncore->lock); 164 } 165 166 static void gen8_ggtt_invalidate(struct i915_ggtt *ggtt) 167 { 168 struct intel_uncore *uncore = ggtt->vm.gt->uncore; 169 170 /* 171 * Note that as an uncached mmio write, this will flush the 172 * WCB of the writes into the GGTT before it triggers the invalidate. 173 */ 174 intel_uncore_write_fw(uncore, GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); 175 } 176 177 static void guc_ggtt_invalidate(struct i915_ggtt *ggtt) 178 { 179 struct intel_uncore *uncore = ggtt->vm.gt->uncore; 180 struct drm_i915_private *i915 = ggtt->vm.i915; 181 182 gen8_ggtt_invalidate(ggtt); 183 184 if (INTEL_GEN(i915) >= 12) 185 intel_uncore_write_fw(uncore, GEN12_GUC_TLB_INV_CR, 186 GEN12_GUC_TLB_INV_CR_INVALIDATE); 187 else 188 intel_uncore_write_fw(uncore, GEN8_GTCR, GEN8_GTCR_INVALIDATE); 189 } 190 191 static void gmch_ggtt_invalidate(struct i915_ggtt *ggtt) 192 { 193 intel_gtt_chipset_flush(); 194 } 195 196 #ifdef __NetBSD__ 197 static inline void 198 gen8_set_pte(bus_space_tag_t bst, bus_space_handle_t bsh, unsigned i, 199 gen8_pte_t pte) 200 { 201 CTASSERT(_BYTE_ORDER == _LITTLE_ENDIAN); /* x86 */ 202 CTASSERT(sizeof(gen8_pte_t) == 8); 203 #ifdef _LP64 /* XXX How to detect bus_space_write_8? */ 204 bus_space_write_8(bst, bsh, 8*i, pte); 205 #else 206 bus_space_write_4(bst, bsh, 8*i, (uint32_t)pte); 207 bus_space_write_4(bst, bsh, 8*i + 4, (uint32_t)(pte >> 32)); 208 #endif 209 } 210 #else 211 static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte) 212 { 213 writeq(pte, addr); 214 } 215 #endif 216 217 static void gen8_ggtt_insert_page(struct i915_address_space *vm, 218 dma_addr_t addr, 219 u64 offset, 220 enum i915_cache_level level, 221 u32 unused) 222 { 223 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); 224 #ifndef __NetBSD__ 225 gen8_pte_t __iomem *pte = 226 (gen8_pte_t __iomem *)ggtt->gsm + offset / I915_GTT_PAGE_SIZE; 227 #endif 228 229 #ifdef __NetBSD__ 230 gen8_set_pte(ggtt->gsmt, ggtt->gsmh, offset / I915_GTT_PAGE_SIZE, 231 gen8_pte_encode(addr, level, 0)); 232 #else 233 gen8_set_pte(pte, gen8_pte_encode(addr, level, 0)); 234 #endif 235 236 ggtt->invalidate(ggtt); 237 } 238 239 static void gen8_ggtt_insert_entries(struct i915_address_space *vm, 240 struct i915_vma *vma, 241 enum i915_cache_level level, 242 u32 flags) 243 { 244 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); 245 #ifdef __NetBSD__ 246 bus_dmamap_t map = vma->pages->sgl[0].sg_dmamap; 247 unsigned seg; 248 unsigned pgno; 249 #else 250 struct sgt_iter sgt_iter; 251 gen8_pte_t __iomem *gtt_entries; 252 #endif 253 const gen8_pte_t pte_encode = gen8_pte_encode(0, level, 0); 254 dma_addr_t addr; 255 256 /* 257 * Note that we ignore PTE_READ_ONLY here. The caller must be careful 258 * not to allow the user to override access to a read only page. 259 */ 260 261 #ifdef __NetBSD__ 262 pgno = vma->node.start / I915_GTT_PAGE_SIZE; 263 for (seg = 0; seg < map->dm_nsegs; seg++) { 264 addr = map->dm_segs[seg].ds_addr; 265 bus_size_t len = map->dm_segs[seg].ds_len; 266 KASSERT((addr % I915_GTT_PAGE_SIZE) == 0); 267 KASSERT((len % I915_GTT_PAGE_SIZE) == 0); 268 for (; 269 len >= I915_GTT_PAGE_SIZE; 270 addr += I915_GTT_PAGE_SIZE, len -= I915_GTT_PAGE_SIZE) { 271 gen8_set_pte(ggtt->gsmt, ggtt->gsmh, pgno++, 272 pte_encode | addr); 273 } 274 KASSERT(len == 0); 275 } 276 #else 277 gtt_entries = (gen8_pte_t __iomem *)ggtt->gsm; 278 gtt_entries += vma->node.start / I915_GTT_PAGE_SIZE; 279 for_each_sgt_daddr(addr, sgt_iter, vma->pages) 280 gen8_set_pte(gtt_entries++, pte_encode | addr); 281 #endif 282 283 /* 284 * We want to flush the TLBs only after we're certain all the PTE 285 * updates have finished. 286 */ 287 ggtt->invalidate(ggtt); 288 } 289 290 static void gen6_ggtt_insert_page(struct i915_address_space *vm, 291 dma_addr_t addr, 292 u64 offset, 293 enum i915_cache_level level, 294 u32 flags) 295 { 296 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); 297 #ifndef __NetBSD__ 298 gen6_pte_t __iomem *pte = 299 (gen6_pte_t __iomem *)ggtt->gsm + offset / I915_GTT_PAGE_SIZE; 300 #endif 301 302 #ifdef __NetBSD__ 303 CTASSERT(sizeof(gen6_pte_t) == 4); 304 bus_space_write_4(ggtt->gsmt, ggtt->gsmh, 305 sizeof(gen6_pte_t) * (offset / I915_GTT_PAGE_SIZE), 306 vm->pte_encode(addr, level, flags)); 307 #else 308 iowrite32(vm->pte_encode(addr, level, flags), pte); 309 #endif 310 311 ggtt->invalidate(ggtt); 312 } 313 314 /* 315 * Binds an object into the global gtt with the specified cache level. 316 * The object will be accessible to the GPU via commands whose operands 317 * reference offsets within the global GTT as well as accessible by the GPU 318 * through the GMADR mapped BAR (i915->mm.gtt->gtt). 319 */ 320 321 static void gen6_ggtt_insert_entries(struct i915_address_space *vm, 322 struct i915_vma *vma, 323 enum i915_cache_level level, 324 u32 flags) 325 { 326 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); 327 #ifdef __NetBSD__ 328 bus_dmamap_t map = vma->pages->sgl[0].sg_dmamap; 329 unsigned seg; 330 unsigned pgno; 331 #else 332 gen6_pte_t __iomem *entries = (gen6_pte_t __iomem *)ggtt->gsm; 333 unsigned int i = vma->node.start / I915_GTT_PAGE_SIZE; 334 struct sgt_iter iter; 335 #endif 336 dma_addr_t addr; 337 338 #ifdef __NetBSD__ 339 pgno = vma->node.start >> PAGE_SHIFT; 340 for (seg = 0; seg < map->dm_nsegs; seg++) { 341 addr = map->dm_segs[seg].ds_addr; 342 bus_size_t len = map->dm_segs[seg].ds_len; 343 KASSERT((addr % I915_GTT_PAGE_SIZE) == 0); 344 KASSERT((len % I915_GTT_PAGE_SIZE) == 0); 345 for (; 346 len >= I915_GTT_PAGE_SIZE; 347 addr += I915_GTT_PAGE_SIZE, len -= I915_GTT_PAGE_SIZE) { 348 /* XXX KASSERT(pgno < ...)? */ 349 CTASSERT(sizeof(gen6_pte_t) == 4); 350 bus_space_write_4(ggtt->gsmt, ggtt->gsmh, 351 sizeof(gen6_pte_t) * pgno++, 352 vm->pte_encode(addr, level, flags)); 353 } 354 KASSERT(len == 0); 355 /* XXX KASSERT(pgno <= ...)? */ 356 } 357 #else 358 for_each_sgt_daddr(addr, iter, vma->pages) 359 iowrite32(vm->pte_encode(addr, level, flags), &entries[i++]); 360 #endif 361 362 /* 363 * We want to flush the TLBs only after we're certain all the PTE 364 * updates have finished. 365 */ 366 ggtt->invalidate(ggtt); 367 } 368 369 static void nop_clear_range(struct i915_address_space *vm, 370 u64 start, u64 length) 371 { 372 } 373 374 static void gen8_ggtt_clear_range(struct i915_address_space *vm, 375 u64 start, u64 length) 376 { 377 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); 378 unsigned int first_entry = start / I915_GTT_PAGE_SIZE; 379 unsigned int num_entries = length / I915_GTT_PAGE_SIZE; 380 const gen8_pte_t scratch_pte = vm->scratch[0].encode; 381 #ifndef __NetBSD__ 382 gen8_pte_t __iomem *gtt_base = 383 (gen8_pte_t __iomem *)ggtt->gsm + first_entry; 384 #endif 385 const int max_entries = ggtt_total_entries(ggtt) - first_entry; 386 int i; 387 388 if (WARN(num_entries > max_entries, 389 "First entry = %d; Num entries = %d (max=%d)\n", 390 first_entry, num_entries, max_entries)) 391 num_entries = max_entries; 392 393 #ifdef __NetBSD__ 394 for (i = 0; i < num_entries; i++) 395 gen8_set_pte(ggtt->gsmt, ggtt->gsmh, first_entry + i, 396 scratch_pte); 397 #else 398 for (i = 0; i < num_entries; i++) 399 gen8_set_pte(>t_base[i], scratch_pte); 400 #endif 401 } 402 403 static void bxt_vtd_ggtt_wa(struct i915_address_space *vm) 404 { 405 /* 406 * Make sure the internal GAM fifo has been cleared of all GTT 407 * writes before exiting stop_machine(). This guarantees that 408 * any aperture accesses waiting to start in another process 409 * cannot back up behind the GTT writes causing a hang. 410 * The register can be any arbitrary GAM register. 411 */ 412 intel_uncore_posting_read_fw(vm->gt->uncore, GFX_FLSH_CNTL_GEN6); 413 } 414 415 struct insert_page { 416 struct i915_address_space *vm; 417 dma_addr_t addr; 418 u64 offset; 419 enum i915_cache_level level; 420 }; 421 422 static int bxt_vtd_ggtt_insert_page__cb(void *_arg) 423 { 424 struct insert_page *arg = _arg; 425 426 gen8_ggtt_insert_page(arg->vm, arg->addr, arg->offset, arg->level, 0); 427 bxt_vtd_ggtt_wa(arg->vm); 428 429 return 0; 430 } 431 432 static void bxt_vtd_ggtt_insert_page__BKL(struct i915_address_space *vm, 433 dma_addr_t addr, 434 u64 offset, 435 enum i915_cache_level level, 436 u32 unused) 437 { 438 struct insert_page arg = { vm, addr, offset, level }; 439 440 stop_machine(bxt_vtd_ggtt_insert_page__cb, &arg, NULL); 441 } 442 443 struct insert_entries { 444 struct i915_address_space *vm; 445 struct i915_vma *vma; 446 enum i915_cache_level level; 447 u32 flags; 448 }; 449 450 static int bxt_vtd_ggtt_insert_entries__cb(void *_arg) 451 { 452 struct insert_entries *arg = _arg; 453 454 gen8_ggtt_insert_entries(arg->vm, arg->vma, arg->level, arg->flags); 455 bxt_vtd_ggtt_wa(arg->vm); 456 457 return 0; 458 } 459 460 static void bxt_vtd_ggtt_insert_entries__BKL(struct i915_address_space *vm, 461 struct i915_vma *vma, 462 enum i915_cache_level level, 463 u32 flags) 464 { 465 struct insert_entries arg = { vm, vma, level, flags }; 466 467 stop_machine(bxt_vtd_ggtt_insert_entries__cb, &arg, NULL); 468 } 469 470 struct clear_range { 471 struct i915_address_space *vm; 472 u64 start; 473 u64 length; 474 }; 475 476 static int bxt_vtd_ggtt_clear_range__cb(void *_arg) 477 { 478 struct clear_range *arg = _arg; 479 480 gen8_ggtt_clear_range(arg->vm, arg->start, arg->length); 481 bxt_vtd_ggtt_wa(arg->vm); 482 483 return 0; 484 } 485 486 static void bxt_vtd_ggtt_clear_range__BKL(struct i915_address_space *vm, 487 u64 start, 488 u64 length) 489 { 490 struct clear_range arg = { vm, start, length }; 491 492 stop_machine(bxt_vtd_ggtt_clear_range__cb, &arg, NULL); 493 } 494 495 static void gen6_ggtt_clear_range(struct i915_address_space *vm, 496 u64 start, u64 length) 497 { 498 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); 499 unsigned int first_entry = start / I915_GTT_PAGE_SIZE; 500 unsigned int num_entries = length / I915_GTT_PAGE_SIZE; 501 #ifdef __NetBSD__ 502 gen6_pte_t scratch_pte; 503 #else 504 gen6_pte_t scratch_pte, __iomem *gtt_base = 505 (gen6_pte_t __iomem *)ggtt->gsm + first_entry; 506 #endif 507 const int max_entries = ggtt_total_entries(ggtt) - first_entry; 508 int i; 509 510 if (WARN(num_entries > max_entries, 511 "First entry = %d; Num entries = %d (max=%d)\n", 512 first_entry, num_entries, max_entries)) 513 num_entries = max_entries; 514 515 scratch_pte = vm->scratch[0].encode; 516 #ifdef __NetBSD__ 517 CTASSERT(sizeof(gen6_pte_t) == 4); 518 for (i = 0; i < num_entries; i++) 519 bus_space_write_4(ggtt->gsmt, ggtt->gsmh, 520 sizeof(gen6_pte_t) * (first_entry + i), 521 scratch_pte); 522 #else 523 for (i = 0; i < num_entries; i++) 524 iowrite32(scratch_pte, >t_base[i]); 525 #endif 526 } 527 528 static void i915_ggtt_insert_page(struct i915_address_space *vm, 529 dma_addr_t addr, 530 u64 offset, 531 enum i915_cache_level cache_level, 532 u32 unused) 533 { 534 unsigned int flags = (cache_level == I915_CACHE_NONE) ? 535 AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY; 536 537 intel_gtt_insert_page(addr, offset >> PAGE_SHIFT, flags); 538 } 539 540 static void i915_ggtt_insert_entries(struct i915_address_space *vm, 541 struct i915_vma *vma, 542 enum i915_cache_level cache_level, 543 u32 unused) 544 { 545 unsigned int flags = (cache_level == I915_CACHE_NONE) ? 546 AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY; 547 548 intel_gtt_insert_sg_entries(vma->pages, vma->node.start >> PAGE_SHIFT, 549 flags); 550 } 551 552 static void i915_ggtt_clear_range(struct i915_address_space *vm, 553 u64 start, u64 length) 554 { 555 intel_gtt_clear_range(start >> PAGE_SHIFT, length >> PAGE_SHIFT); 556 } 557 558 static int ggtt_bind_vma(struct i915_vma *vma, 559 enum i915_cache_level cache_level, 560 u32 flags) 561 { 562 struct drm_i915_gem_object *obj = vma->obj; 563 u32 pte_flags; 564 565 /* Applicable to VLV (gen8+ do not support RO in the GGTT) */ 566 pte_flags = 0; 567 if (i915_gem_object_is_readonly(obj)) 568 pte_flags |= PTE_READ_ONLY; 569 570 vma->vm->insert_entries(vma->vm, vma, cache_level, pte_flags); 571 572 vma->page_sizes.gtt = I915_GTT_PAGE_SIZE; 573 574 /* 575 * Without aliasing PPGTT there's no difference between 576 * GLOBAL/LOCAL_BIND, it's all the same ptes. Hence unconditionally 577 * upgrade to both bound if we bind either to avoid double-binding. 578 */ 579 atomic_or(I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND, &vma->flags); 580 581 return 0; 582 } 583 584 static void ggtt_unbind_vma(struct i915_vma *vma) 585 { 586 vma->vm->clear_range(vma->vm, vma->node.start, vma->size); 587 } 588 589 static int ggtt_reserve_guc_top(struct i915_ggtt *ggtt) 590 { 591 u64 size; 592 int ret; 593 594 if (!USES_GUC(ggtt->vm.i915)) 595 return 0; 596 597 GEM_BUG_ON(ggtt->vm.total <= GUC_GGTT_TOP); 598 size = ggtt->vm.total - GUC_GGTT_TOP; 599 600 ret = i915_gem_gtt_reserve(&ggtt->vm, &ggtt->uc_fw, size, 601 GUC_GGTT_TOP, I915_COLOR_UNEVICTABLE, 602 PIN_NOEVICT); 603 if (ret) 604 DRM_DEBUG_DRIVER("Failed to reserve top of GGTT for GuC\n"); 605 606 return ret; 607 } 608 609 static void ggtt_release_guc_top(struct i915_ggtt *ggtt) 610 { 611 if (drm_mm_node_allocated(&ggtt->uc_fw)) 612 drm_mm_remove_node(&ggtt->uc_fw); 613 } 614 615 static void cleanup_init_ggtt(struct i915_ggtt *ggtt) 616 { 617 ggtt_release_guc_top(ggtt); 618 if (drm_mm_node_allocated(&ggtt->error_capture)) 619 drm_mm_remove_node(&ggtt->error_capture); 620 mutex_destroy(&ggtt->error_mutex); 621 } 622 623 static int init_ggtt(struct i915_ggtt *ggtt) 624 { 625 /* 626 * Let GEM Manage all of the aperture. 627 * 628 * However, leave one page at the end still bound to the scratch page. 629 * There are a number of places where the hardware apparently prefetches 630 * past the end of the object, and we've seen multiple hangs with the 631 * GPU head pointer stuck in a batchbuffer bound at the last page of the 632 * aperture. One page should be enough to keep any prefetching inside 633 * of the aperture. 634 */ 635 unsigned long hole_start, hole_end; 636 struct drm_mm_node *entry; 637 int ret; 638 639 /* 640 * GuC requires all resources that we're sharing with it to be placed in 641 * non-WOPCM memory. If GuC is not present or not in use we still need a 642 * small bias as ring wraparound at offset 0 sometimes hangs. No idea 643 * why. 644 */ 645 ggtt->pin_bias = max_t(u32, I915_GTT_PAGE_SIZE, 646 intel_wopcm_guc_size(&ggtt->vm.i915->wopcm)); 647 648 ret = intel_vgt_balloon(ggtt); 649 if (ret) 650 return ret; 651 652 mutex_init(&ggtt->error_mutex); 653 if (ggtt->mappable_end) { 654 /* Reserve a mappable slot for our lockless error capture */ 655 ret = drm_mm_insert_node_in_range(&ggtt->vm.mm, 656 &ggtt->error_capture, 657 PAGE_SIZE, 0, 658 I915_COLOR_UNEVICTABLE, 659 0, ggtt->mappable_end, 660 DRM_MM_INSERT_LOW); 661 if (ret) 662 return ret; 663 } 664 665 /* 666 * The upper portion of the GuC address space has a sizeable hole 667 * (several MB) that is inaccessible by GuC. Reserve this range within 668 * GGTT as it can comfortably hold GuC/HuC firmware images. 669 */ 670 ret = ggtt_reserve_guc_top(ggtt); 671 if (ret) 672 goto err; 673 674 /* Clear any non-preallocated blocks */ 675 drm_mm_for_each_hole(entry, &ggtt->vm.mm, hole_start, hole_end) { 676 DRM_DEBUG_KMS("clearing unused GTT space: [%lx, %lx]\n", 677 hole_start, hole_end); 678 ggtt->vm.clear_range(&ggtt->vm, hole_start, 679 hole_end - hole_start); 680 } 681 682 /* And finally clear the reserved guard page */ 683 ggtt->vm.clear_range(&ggtt->vm, ggtt->vm.total - PAGE_SIZE, PAGE_SIZE); 684 685 return 0; 686 687 err: 688 cleanup_init_ggtt(ggtt); 689 return ret; 690 } 691 692 static int aliasing_gtt_bind_vma(struct i915_vma *vma, 693 enum i915_cache_level cache_level, 694 u32 flags) 695 { 696 u32 pte_flags; 697 int ret; 698 699 /* Currently applicable only to VLV */ 700 pte_flags = 0; 701 if (i915_gem_object_is_readonly(vma->obj)) 702 pte_flags |= PTE_READ_ONLY; 703 704 if (flags & I915_VMA_LOCAL_BIND) { 705 struct i915_ppgtt *alias = i915_vm_to_ggtt(vma->vm)->alias; 706 707 if (flags & I915_VMA_ALLOC) { 708 ret = alias->vm.allocate_va_range(&alias->vm, 709 vma->node.start, 710 vma->size); 711 if (ret) 712 return ret; 713 714 set_bit(I915_VMA_ALLOC_BIT, __i915_vma_flags(vma)); 715 } 716 717 GEM_BUG_ON(!test_bit(I915_VMA_ALLOC_BIT, 718 __i915_vma_flags(vma))); 719 alias->vm.insert_entries(&alias->vm, vma, 720 cache_level, pte_flags); 721 } 722 723 if (flags & I915_VMA_GLOBAL_BIND) 724 vma->vm->insert_entries(vma->vm, vma, cache_level, pte_flags); 725 726 return 0; 727 } 728 729 static void aliasing_gtt_unbind_vma(struct i915_vma *vma) 730 { 731 if (i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND)) { 732 struct i915_address_space *vm = vma->vm; 733 734 vm->clear_range(vm, vma->node.start, vma->size); 735 } 736 737 if (test_and_clear_bit(I915_VMA_ALLOC_BIT, __i915_vma_flags(vma))) { 738 struct i915_address_space *vm = 739 &i915_vm_to_ggtt(vma->vm)->alias->vm; 740 741 vm->clear_range(vm, vma->node.start, vma->size); 742 } 743 } 744 745 static int init_aliasing_ppgtt(struct i915_ggtt *ggtt) 746 { 747 struct i915_ppgtt *ppgtt; 748 int err; 749 750 ppgtt = i915_ppgtt_create(ggtt->vm.gt); 751 if (IS_ERR(ppgtt)) 752 return PTR_ERR(ppgtt); 753 754 if (GEM_WARN_ON(ppgtt->vm.total < ggtt->vm.total)) { 755 err = -ENODEV; 756 goto err_ppgtt; 757 } 758 759 /* 760 * Note we only pre-allocate as far as the end of the global 761 * GTT. On 48b / 4-level page-tables, the difference is very, 762 * very significant! We have to preallocate as GVT/vgpu does 763 * not like the page directory disappearing. 764 */ 765 err = ppgtt->vm.allocate_va_range(&ppgtt->vm, 0, ggtt->vm.total); 766 if (err) 767 goto err_ppgtt; 768 769 ggtt->alias = ppgtt; 770 ggtt->vm.bind_async_flags |= ppgtt->vm.bind_async_flags; 771 772 GEM_BUG_ON(ggtt->vm.vma_ops.bind_vma != ggtt_bind_vma); 773 ggtt->vm.vma_ops.bind_vma = aliasing_gtt_bind_vma; 774 775 GEM_BUG_ON(ggtt->vm.vma_ops.unbind_vma != ggtt_unbind_vma); 776 ggtt->vm.vma_ops.unbind_vma = aliasing_gtt_unbind_vma; 777 778 return 0; 779 780 err_ppgtt: 781 i915_vm_put(&ppgtt->vm); 782 return err; 783 } 784 785 static void fini_aliasing_ppgtt(struct i915_ggtt *ggtt) 786 { 787 struct i915_ppgtt *ppgtt; 788 789 ppgtt = fetch_and_zero(&ggtt->alias); 790 if (!ppgtt) 791 return; 792 793 i915_vm_put(&ppgtt->vm); 794 795 ggtt->vm.vma_ops.bind_vma = ggtt_bind_vma; 796 ggtt->vm.vma_ops.unbind_vma = ggtt_unbind_vma; 797 } 798 799 int i915_init_ggtt(struct drm_i915_private *i915) 800 { 801 int ret; 802 803 ret = init_ggtt(&i915->ggtt); 804 if (ret) 805 return ret; 806 807 if (INTEL_PPGTT(i915) == INTEL_PPGTT_ALIASING) { 808 ret = init_aliasing_ppgtt(&i915->ggtt); 809 if (ret) 810 cleanup_init_ggtt(&i915->ggtt); 811 } 812 813 return 0; 814 } 815 816 static void ggtt_cleanup_hw(struct i915_ggtt *ggtt) 817 { 818 struct i915_vma *vma, *vn; 819 820 atomic_set(&ggtt->vm.open, 0); 821 822 rcu_barrier(); /* flush the RCU'ed__i915_vm_release */ 823 flush_workqueue(ggtt->vm.i915->wq); 824 825 mutex_lock(&ggtt->vm.mutex); 826 827 list_for_each_entry_safe(vma, vn, &ggtt->vm.bound_list, vm_link) 828 WARN_ON(__i915_vma_unbind(vma)); 829 830 if (drm_mm_node_allocated(&ggtt->error_capture)) 831 drm_mm_remove_node(&ggtt->error_capture); 832 mutex_destroy(&ggtt->error_mutex); 833 834 ggtt_release_guc_top(ggtt); 835 intel_vgt_deballoon(ggtt); 836 837 ggtt->vm.cleanup(&ggtt->vm); 838 839 mutex_unlock(&ggtt->vm.mutex); 840 i915_address_space_fini(&ggtt->vm); 841 842 #ifdef __NetBSD__ 843 if (ggtt->mappable_end) 844 pmap_pv_untrack(ggtt->gmadr.start, ggtt->mappable_end); 845 #endif 846 847 arch_phys_wc_del(ggtt->mtrr); 848 849 if (ggtt->iomap.size) 850 io_mapping_fini(&ggtt->iomap); 851 } 852 853 /** 854 * i915_ggtt_driver_release - Clean up GGTT hardware initialization 855 * @i915: i915 device 856 */ 857 void i915_ggtt_driver_release(struct drm_i915_private *i915) 858 { 859 #ifndef __NetBSD__ 860 struct pagevec *pvec; 861 #endif 862 863 fini_aliasing_ppgtt(&i915->ggtt); 864 865 ggtt_cleanup_hw(&i915->ggtt); 866 867 #ifndef __NetBSD__ 868 pvec = &i915->mm.wc_stash.pvec; 869 if (pvec->nr) { 870 set_pages_array_wb(pvec->pages, pvec->nr); 871 __pagevec_release(pvec); 872 } 873 #endif 874 } 875 876 static unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl) 877 { 878 snb_gmch_ctl >>= SNB_GMCH_GGMS_SHIFT; 879 snb_gmch_ctl &= SNB_GMCH_GGMS_MASK; 880 return snb_gmch_ctl << 20; 881 } 882 883 static unsigned int gen8_get_total_gtt_size(u16 bdw_gmch_ctl) 884 { 885 bdw_gmch_ctl >>= BDW_GMCH_GGMS_SHIFT; 886 bdw_gmch_ctl &= BDW_GMCH_GGMS_MASK; 887 if (bdw_gmch_ctl) 888 bdw_gmch_ctl = 1 << bdw_gmch_ctl; 889 890 #ifdef CONFIG_X86_32 891 /* Limit 32b platforms to a 2GB GGTT: 4 << 20 / pte size * I915_GTT_PAGE_SIZE */ 892 if (bdw_gmch_ctl > 4) 893 bdw_gmch_ctl = 4; 894 #endif 895 896 return bdw_gmch_ctl << 20; 897 } 898 899 static unsigned int chv_get_total_gtt_size(u16 gmch_ctrl) 900 { 901 gmch_ctrl >>= SNB_GMCH_GGMS_SHIFT; 902 gmch_ctrl &= SNB_GMCH_GGMS_MASK; 903 904 if (gmch_ctrl) 905 return 1 << (20 + gmch_ctrl); 906 907 return 0; 908 } 909 910 static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size) 911 { 912 struct drm_i915_private *i915 = ggtt->vm.i915; 913 struct pci_dev *pdev = i915->drm.pdev; 914 phys_addr_t phys_addr; 915 int ret; 916 917 /* For Modern GENs the PTEs and register space are split in the BAR */ 918 phys_addr = pci_resource_start(pdev, 0) + pci_resource_len(pdev, 0) / 2; 919 920 /* 921 * On BXT+/CNL+ writes larger than 64 bit to the GTT pagetable range 922 * will be dropped. For WC mappings in general we have 64 byte burst 923 * writes when the WC buffer is flushed, so we can't use it, but have to 924 * resort to an uncached mapping. The WC issue is easily caught by the 925 * readback check when writing GTT PTE entries. 926 */ 927 #ifdef __NetBSD__ 928 { 929 int flags; 930 if (IS_GEN9_LP(i915) || INTEL_GEN(i915) >= 10) 931 flags = 0; 932 else 933 flags = BUS_SPACE_MAP_PREFETCHABLE; 934 ggtt->gsmt = i915->drm.pdev->pd_pa.pa_memt; 935 /* XXX errno NetBSD->Linux */ 936 ret = -bus_space_map(ggtt->gsmt, phys_addr, size, flags, &ggtt->gsmh); 937 if (ret) { 938 DRM_ERROR("Failed to map the ggtt page table: %d\n", ret); 939 return ret; 940 } 941 ggtt->gsmsz = size; 942 } 943 #else 944 if (IS_GEN9_LP(i915) || INTEL_GEN(i915) >= 10) 945 ggtt->gsm = ioremap(phys_addr, size); 946 else 947 ggtt->gsm = ioremap_wc(phys_addr, size); 948 if (!ggtt->gsm) { 949 DRM_ERROR("Failed to map the ggtt page table\n"); 950 return -ENOMEM; 951 } 952 #endif 953 954 ret = setup_scratch_page(&ggtt->vm, GFP_DMA32); 955 if (ret) { 956 DRM_ERROR("Scratch setup failed\n"); 957 /* iounmap will also get called at remove, but meh */ 958 #ifdef __NetBSD__ 959 KASSERT(ggtt->gsmsz == size); 960 bus_space_unmap(ggtt->gsmt, ggtt->gsmh, ggtt->gsmsz); 961 ggtt->gsmsz = 0; 962 #else 963 iounmap(ggtt->gsm); 964 #endif 965 return ret; 966 } 967 968 ggtt->vm.scratch[0].encode = 969 ggtt->vm.pte_encode(px_dma(&ggtt->vm.scratch[0]), 970 I915_CACHE_NONE, 0); 971 972 return 0; 973 } 974 975 int ggtt_set_pages(struct i915_vma *vma) 976 { 977 int ret; 978 979 GEM_BUG_ON(vma->pages); 980 981 ret = i915_get_ggtt_vma_pages(vma); 982 if (ret) 983 return ret; 984 985 vma->page_sizes = vma->obj->mm.page_sizes; 986 987 return 0; 988 } 989 990 static void gen6_gmch_remove(struct i915_address_space *vm) 991 { 992 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); 993 994 #ifdef __NetBSD__ 995 if (ggtt->gsmsz) { 996 bus_space_unmap(ggtt->gsmt, ggtt->gsmh, ggtt->gsmsz); 997 ggtt->gsmsz = 0; 998 } 999 #else 1000 iounmap(ggtt->gsm); 1001 #endif 1002 cleanup_scratch_page(vm); 1003 } 1004 1005 static struct resource pci_resource(struct pci_dev *pdev, int bar) 1006 { 1007 return (struct resource)DEFINE_RES_MEM(pci_resource_start(pdev, bar), 1008 pci_resource_len(pdev, bar)); 1009 } 1010 1011 static int gen8_gmch_probe(struct i915_ggtt *ggtt) 1012 { 1013 struct drm_i915_private *i915 = ggtt->vm.i915; 1014 struct pci_dev *pdev = i915->drm.pdev; 1015 unsigned int size; 1016 u16 snb_gmch_ctl; 1017 int err; 1018 1019 /* TODO: We're not aware of mappable constraints on gen8 yet */ 1020 if (!IS_DGFX(i915)) { 1021 ggtt->gmadr = pci_resource(pdev, 2); 1022 ggtt->mappable_end = resource_size(&ggtt->gmadr); 1023 } 1024 1025 #ifdef __NetBSD__ 1026 __USE(err); 1027 ggtt->max_paddr = DMA_BIT_MASK(39); 1028 #else 1029 err = pci_set_dma_mask(pdev, DMA_BIT_MASK(39)); 1030 if (!err) 1031 err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(39)); 1032 if (err) 1033 DRM_ERROR("Can't set DMA mask/consistent mask (%d)\n", err); 1034 #endif 1035 1036 pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl); 1037 if (IS_CHERRYVIEW(i915)) 1038 size = chv_get_total_gtt_size(snb_gmch_ctl); 1039 else 1040 size = gen8_get_total_gtt_size(snb_gmch_ctl); 1041 1042 ggtt->vm.total = (size / sizeof(gen8_pte_t)) * I915_GTT_PAGE_SIZE; 1043 ggtt->vm.cleanup = gen6_gmch_remove; 1044 ggtt->vm.insert_page = gen8_ggtt_insert_page; 1045 ggtt->vm.clear_range = nop_clear_range; 1046 if (intel_scanout_needs_vtd_wa(i915)) 1047 ggtt->vm.clear_range = gen8_ggtt_clear_range; 1048 1049 ggtt->vm.insert_entries = gen8_ggtt_insert_entries; 1050 1051 /* Serialize GTT updates with aperture access on BXT if VT-d is on. */ 1052 if (intel_ggtt_update_needs_vtd_wa(i915) || 1053 IS_CHERRYVIEW(i915) /* fails with concurrent use/update */) { 1054 ggtt->vm.insert_entries = bxt_vtd_ggtt_insert_entries__BKL; 1055 ggtt->vm.insert_page = bxt_vtd_ggtt_insert_page__BKL; 1056 if (ggtt->vm.clear_range != nop_clear_range) 1057 ggtt->vm.clear_range = bxt_vtd_ggtt_clear_range__BKL; 1058 } 1059 1060 ggtt->invalidate = gen8_ggtt_invalidate; 1061 1062 ggtt->vm.vma_ops.bind_vma = ggtt_bind_vma; 1063 ggtt->vm.vma_ops.unbind_vma = ggtt_unbind_vma; 1064 ggtt->vm.vma_ops.set_pages = ggtt_set_pages; 1065 ggtt->vm.vma_ops.clear_pages = clear_pages; 1066 1067 ggtt->vm.pte_encode = gen8_pte_encode; 1068 1069 setup_private_pat(ggtt->vm.gt->uncore); 1070 1071 return ggtt_probe_common(ggtt, size); 1072 } 1073 1074 static u64 snb_pte_encode(dma_addr_t addr, 1075 enum i915_cache_level level, 1076 u32 flags) 1077 { 1078 gen6_pte_t pte = GEN6_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID; 1079 1080 switch (level) { 1081 case I915_CACHE_L3_LLC: 1082 case I915_CACHE_LLC: 1083 pte |= GEN6_PTE_CACHE_LLC; 1084 break; 1085 case I915_CACHE_NONE: 1086 pte |= GEN6_PTE_UNCACHED; 1087 break; 1088 default: 1089 MISSING_CASE(level); 1090 } 1091 1092 return pte; 1093 } 1094 1095 static u64 ivb_pte_encode(dma_addr_t addr, 1096 enum i915_cache_level level, 1097 u32 flags) 1098 { 1099 gen6_pte_t pte = GEN6_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID; 1100 1101 switch (level) { 1102 case I915_CACHE_L3_LLC: 1103 pte |= GEN7_PTE_CACHE_L3_LLC; 1104 break; 1105 case I915_CACHE_LLC: 1106 pte |= GEN6_PTE_CACHE_LLC; 1107 break; 1108 case I915_CACHE_NONE: 1109 pte |= GEN6_PTE_UNCACHED; 1110 break; 1111 default: 1112 MISSING_CASE(level); 1113 } 1114 1115 return pte; 1116 } 1117 1118 static u64 byt_pte_encode(dma_addr_t addr, 1119 enum i915_cache_level level, 1120 u32 flags) 1121 { 1122 gen6_pte_t pte = GEN6_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID; 1123 1124 if (!(flags & PTE_READ_ONLY)) 1125 pte |= BYT_PTE_WRITEABLE; 1126 1127 if (level != I915_CACHE_NONE) 1128 pte |= BYT_PTE_SNOOPED_BY_CPU_CACHES; 1129 1130 return pte; 1131 } 1132 1133 static u64 hsw_pte_encode(dma_addr_t addr, 1134 enum i915_cache_level level, 1135 u32 flags) 1136 { 1137 gen6_pte_t pte = HSW_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID; 1138 1139 if (level != I915_CACHE_NONE) 1140 pte |= HSW_WB_LLC_AGE3; 1141 1142 return pte; 1143 } 1144 1145 static u64 iris_pte_encode(dma_addr_t addr, 1146 enum i915_cache_level level, 1147 u32 flags) 1148 { 1149 gen6_pte_t pte = HSW_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID; 1150 1151 switch (level) { 1152 case I915_CACHE_NONE: 1153 break; 1154 case I915_CACHE_WT: 1155 pte |= HSW_WT_ELLC_LLC_AGE3; 1156 break; 1157 default: 1158 pte |= HSW_WB_ELLC_LLC_AGE3; 1159 break; 1160 } 1161 1162 return pte; 1163 } 1164 1165 static int gen6_gmch_probe(struct i915_ggtt *ggtt) 1166 { 1167 struct drm_i915_private *i915 = ggtt->vm.i915; 1168 struct pci_dev *pdev = i915->drm.pdev; 1169 unsigned int size; 1170 u16 snb_gmch_ctl; 1171 int err; 1172 1173 ggtt->gmadr = pci_resource(pdev, 2); 1174 ggtt->mappable_end = resource_size(&ggtt->gmadr); 1175 1176 /* 1177 * 64/512MB is the current min/max we actually know of, but this is 1178 * just a coarse sanity check. 1179 */ 1180 if (ggtt->mappable_end < (64<<20) || ggtt->mappable_end > (512<<20)) { 1181 DRM_ERROR("Unknown GMADR size (%pa)\n", &ggtt->mappable_end); 1182 return -ENXIO; 1183 } 1184 1185 #ifdef __NetBSD__ 1186 __USE(err); 1187 ggtt->max_paddr = DMA_BIT_MASK(40); 1188 #else 1189 err = pci_set_dma_mask(pdev, DMA_BIT_MASK(40)); 1190 if (!err) 1191 err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(40)); 1192 if (err) 1193 DRM_ERROR("Can't set DMA mask/consistent mask (%d)\n", err); 1194 #endif 1195 pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl); 1196 1197 size = gen6_get_total_gtt_size(snb_gmch_ctl); 1198 ggtt->vm.total = (size / sizeof(gen6_pte_t)) * I915_GTT_PAGE_SIZE; 1199 1200 ggtt->vm.clear_range = nop_clear_range; 1201 if (!HAS_FULL_PPGTT(i915) || intel_scanout_needs_vtd_wa(i915)) 1202 ggtt->vm.clear_range = gen6_ggtt_clear_range; 1203 ggtt->vm.insert_page = gen6_ggtt_insert_page; 1204 ggtt->vm.insert_entries = gen6_ggtt_insert_entries; 1205 ggtt->vm.cleanup = gen6_gmch_remove; 1206 1207 ggtt->invalidate = gen6_ggtt_invalidate; 1208 1209 if (HAS_EDRAM(i915)) 1210 ggtt->vm.pte_encode = iris_pte_encode; 1211 else if (IS_HASWELL(i915)) 1212 ggtt->vm.pte_encode = hsw_pte_encode; 1213 else if (IS_VALLEYVIEW(i915)) 1214 ggtt->vm.pte_encode = byt_pte_encode; 1215 else if (INTEL_GEN(i915) >= 7) 1216 ggtt->vm.pte_encode = ivb_pte_encode; 1217 else 1218 ggtt->vm.pte_encode = snb_pte_encode; 1219 1220 ggtt->vm.vma_ops.bind_vma = ggtt_bind_vma; 1221 ggtt->vm.vma_ops.unbind_vma = ggtt_unbind_vma; 1222 ggtt->vm.vma_ops.set_pages = ggtt_set_pages; 1223 ggtt->vm.vma_ops.clear_pages = clear_pages; 1224 1225 return ggtt_probe_common(ggtt, size); 1226 } 1227 1228 static void i915_gmch_remove(struct i915_address_space *vm) 1229 { 1230 intel_gmch_remove(); 1231 } 1232 1233 static int i915_gmch_probe(struct i915_ggtt *ggtt) 1234 { 1235 struct drm_i915_private *i915 = ggtt->vm.i915; 1236 phys_addr_t gmadr_base; 1237 int ret; 1238 1239 ret = intel_gmch_probe(i915->bridge_dev, i915->drm.pdev, NULL); 1240 if (!ret) { 1241 DRM_ERROR("failed to set up gmch\n"); 1242 return -EIO; 1243 } 1244 1245 intel_gtt_get(&ggtt->vm.total, &gmadr_base, &ggtt->mappable_end); 1246 1247 ggtt->gmadr = 1248 (struct resource)DEFINE_RES_MEM(gmadr_base, ggtt->mappable_end); 1249 1250 #ifdef __NetBSD__ 1251 /* Based on i915_drv.c, i915_driver_hw_probe. */ 1252 if (IS_GEN(i915, 2)) 1253 ggtt->max_paddr = DMA_BIT_MASK(30); 1254 else if (IS_I965G(i915) || IS_I965GM(i915)) 1255 ggtt->max_paddr = DMA_BIT_MASK(32); 1256 else 1257 ggtt->max_paddr = DMA_BIT_MASK(40); 1258 #endif 1259 1260 ggtt->do_idle_maps = needs_idle_maps(i915); 1261 ggtt->vm.insert_page = i915_ggtt_insert_page; 1262 ggtt->vm.insert_entries = i915_ggtt_insert_entries; 1263 ggtt->vm.clear_range = i915_ggtt_clear_range; 1264 ggtt->vm.cleanup = i915_gmch_remove; 1265 1266 ggtt->invalidate = gmch_ggtt_invalidate; 1267 1268 ggtt->vm.vma_ops.bind_vma = ggtt_bind_vma; 1269 ggtt->vm.vma_ops.unbind_vma = ggtt_unbind_vma; 1270 ggtt->vm.vma_ops.set_pages = ggtt_set_pages; 1271 ggtt->vm.vma_ops.clear_pages = clear_pages; 1272 1273 if (unlikely(ggtt->do_idle_maps)) 1274 dev_notice(i915->drm.dev, 1275 "Applying Ironlake quirks for intel_iommu\n"); 1276 1277 return 0; 1278 } 1279 1280 static int ggtt_probe_hw(struct i915_ggtt *ggtt, struct intel_gt *gt) 1281 { 1282 struct drm_i915_private *i915 = gt->i915; 1283 int ret; 1284 1285 ggtt->vm.gt = gt; 1286 ggtt->vm.i915 = i915; 1287 #ifdef __NetBSD__ 1288 ggtt->vm.dmat = i915->drm.dmat; 1289 #else 1290 ggtt->vm.dma = &i915->drm.pdev->dev; 1291 #endif 1292 1293 if (INTEL_GEN(i915) <= 5) 1294 ret = i915_gmch_probe(ggtt); 1295 else if (INTEL_GEN(i915) < 8) 1296 ret = gen6_gmch_probe(ggtt); 1297 else 1298 ret = gen8_gmch_probe(ggtt); 1299 if (ret) 1300 return ret; 1301 1302 #ifdef __NetBSD__ 1303 ggtt->pgfl = x86_select_freelist(ggtt->max_paddr); 1304 ret = drm_limit_dma_space(&i915->drm, 0, ggtt->max_paddr); 1305 if (ret) { 1306 DRM_ERROR("Unable to limit DMA paddr allocations: %d\n", ret); 1307 i915_ggtt_driver_release(i915); 1308 return ret; 1309 } 1310 #endif 1311 1312 if ((ggtt->vm.total - 1) >> 32) { 1313 DRM_ERROR("We never expected a Global GTT with more than 32bits" 1314 " of address space! Found %"PRId64"M!\n", 1315 ggtt->vm.total >> 20); 1316 ggtt->vm.total = 1ULL << 32; 1317 ggtt->mappable_end = 1318 min_t(u64, ggtt->mappable_end, ggtt->vm.total); 1319 } 1320 1321 if (ggtt->mappable_end > ggtt->vm.total) { 1322 DRM_ERROR("mappable aperture extends past end of GGTT," 1323 " aperture=%pa, total=%"PRIx64"\n", 1324 &ggtt->mappable_end, ggtt->vm.total); 1325 ggtt->mappable_end = ggtt->vm.total; 1326 } 1327 1328 /* GMADR is the PCI mmio aperture into the global GTT. */ 1329 DRM_DEBUG_DRIVER("GGTT size = %"PRIu64"M\n", ggtt->vm.total >> 20); 1330 DRM_DEBUG_DRIVER("GMADR size = %"PRIu64"M\n", (u64)ggtt->mappable_end >> 20); 1331 DRM_DEBUG_DRIVER("DSM size = %"PRIu64"M\n", 1332 (u64)resource_size(&intel_graphics_stolen_res) >> 20); 1333 1334 return 0; 1335 } 1336 1337 /** 1338 * i915_ggtt_probe_hw - Probe GGTT hardware location 1339 * @i915: i915 device 1340 */ 1341 int i915_ggtt_probe_hw(struct drm_i915_private *i915) 1342 { 1343 int ret; 1344 1345 ret = ggtt_probe_hw(&i915->ggtt, &i915->gt); 1346 if (ret) 1347 return ret; 1348 1349 if (intel_vtd_active()) 1350 dev_info(i915->drm.dev, "VT-d active for gfx access\n"); 1351 1352 return 0; 1353 } 1354 1355 int i915_ggtt_enable_hw(struct drm_i915_private *i915) 1356 { 1357 if (INTEL_GEN(i915) < 6 && !intel_enable_gtt()) 1358 return -EIO; 1359 1360 return 0; 1361 } 1362 1363 void i915_ggtt_enable_guc(struct i915_ggtt *ggtt) 1364 { 1365 GEM_BUG_ON(ggtt->invalidate != gen8_ggtt_invalidate); 1366 1367 ggtt->invalidate = guc_ggtt_invalidate; 1368 1369 ggtt->invalidate(ggtt); 1370 } 1371 1372 void i915_ggtt_disable_guc(struct i915_ggtt *ggtt) 1373 { 1374 /* XXX Temporary pardon for error unload */ 1375 if (ggtt->invalidate == gen8_ggtt_invalidate) 1376 return; 1377 1378 /* We should only be called after i915_ggtt_enable_guc() */ 1379 GEM_BUG_ON(ggtt->invalidate != guc_ggtt_invalidate); 1380 1381 ggtt->invalidate = gen8_ggtt_invalidate; 1382 1383 ggtt->invalidate(ggtt); 1384 } 1385 1386 static void ggtt_restore_mappings(struct i915_ggtt *ggtt) 1387 { 1388 struct i915_vma *vma; 1389 bool flush = false; 1390 int open; 1391 1392 intel_gt_check_and_clear_faults(ggtt->vm.gt); 1393 1394 mutex_lock(&ggtt->vm.mutex); 1395 1396 /* First fill our portion of the GTT with scratch pages */ 1397 ggtt->vm.clear_range(&ggtt->vm, 0, ggtt->vm.total); 1398 1399 /* Skip rewriting PTE on VMA unbind. */ 1400 open = atomic_xchg(&ggtt->vm.open, 0); 1401 1402 /* clflush objects bound into the GGTT and rebind them. */ 1403 list_for_each_entry(vma, &ggtt->vm.bound_list, vm_link) { 1404 struct drm_i915_gem_object *obj = vma->obj; 1405 1406 if (!i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND)) 1407 continue; 1408 1409 clear_bit(I915_VMA_GLOBAL_BIND_BIT, __i915_vma_flags(vma)); 1410 WARN_ON(i915_vma_bind(vma, 1411 obj ? obj->cache_level : 0, 1412 PIN_GLOBAL, NULL)); 1413 if (obj) { /* only used during resume => exclusive access */ 1414 flush |= fetch_and_zero(&obj->write_domain); 1415 obj->read_domains |= I915_GEM_DOMAIN_GTT; 1416 } 1417 } 1418 1419 atomic_set(&ggtt->vm.open, open); 1420 ggtt->invalidate(ggtt); 1421 1422 mutex_unlock(&ggtt->vm.mutex); 1423 1424 if (flush) 1425 wbinvd_on_all_cpus(); 1426 } 1427 1428 void i915_gem_restore_gtt_mappings(struct drm_i915_private *i915) 1429 { 1430 struct i915_ggtt *ggtt = &i915->ggtt; 1431 1432 ggtt_restore_mappings(ggtt); 1433 1434 if (INTEL_GEN(i915) >= 8) 1435 setup_private_pat(ggtt->vm.gt->uncore); 1436 } 1437 1438 #ifndef __NetBSD__ 1439 1440 static struct scatterlist * 1441 rotate_pages(struct drm_i915_gem_object *obj, unsigned int offset, 1442 unsigned int width, unsigned int height, 1443 unsigned int stride, 1444 struct sg_table *st, struct scatterlist *sg) 1445 { 1446 unsigned int column, row; 1447 unsigned int src_idx; 1448 1449 for (column = 0; column < width; column++) { 1450 src_idx = stride * (height - 1) + column + offset; 1451 for (row = 0; row < height; row++) { 1452 st->nents++; 1453 /* 1454 * We don't need the pages, but need to initialize 1455 * the entries so the sg list can be happily traversed. 1456 * The only thing we need are DMA addresses. 1457 */ 1458 sg_set_page(sg, NULL, I915_GTT_PAGE_SIZE, 0); 1459 sg_dma_address(sg) = 1460 i915_gem_object_get_dma_address(obj, src_idx); 1461 sg_dma_len(sg) = I915_GTT_PAGE_SIZE; 1462 sg = sg_next(sg); 1463 src_idx -= stride; 1464 } 1465 } 1466 1467 return sg; 1468 } 1469 1470 static noinline struct sg_table * 1471 intel_rotate_pages(struct intel_rotation_info *rot_info, 1472 struct drm_i915_gem_object *obj) 1473 { 1474 unsigned int size = intel_rotation_info_size(rot_info); 1475 struct sg_table *st; 1476 struct scatterlist *sg; 1477 int ret = -ENOMEM; 1478 int i; 1479 1480 /* Allocate target SG list. */ 1481 st = kmalloc(sizeof(*st), GFP_KERNEL); 1482 if (!st) 1483 goto err_st_alloc; 1484 1485 ret = sg_alloc_table(st, size, GFP_KERNEL); 1486 if (ret) 1487 goto err_sg_alloc; 1488 1489 st->nents = 0; 1490 sg = st->sgl; 1491 1492 for (i = 0 ; i < ARRAY_SIZE(rot_info->plane); i++) { 1493 sg = rotate_pages(obj, rot_info->plane[i].offset, 1494 rot_info->plane[i].width, rot_info->plane[i].height, 1495 rot_info->plane[i].stride, st, sg); 1496 } 1497 1498 return st; 1499 1500 err_sg_alloc: 1501 kfree(st); 1502 err_st_alloc: 1503 1504 DRM_DEBUG_DRIVER("Failed to create rotated mapping for object size %zu! (%ux%u tiles, %u pages)\n", 1505 obj->base.size, rot_info->plane[0].width, rot_info->plane[0].height, size); 1506 1507 return ERR_PTR(ret); 1508 } 1509 1510 static struct scatterlist * 1511 remap_pages(struct drm_i915_gem_object *obj, unsigned int offset, 1512 unsigned int width, unsigned int height, 1513 unsigned int stride, 1514 struct sg_table *st, struct scatterlist *sg) 1515 { 1516 unsigned int row; 1517 1518 for (row = 0; row < height; row++) { 1519 unsigned int left = width * I915_GTT_PAGE_SIZE; 1520 1521 while (left) { 1522 dma_addr_t addr; 1523 unsigned int length; 1524 1525 /* 1526 * We don't need the pages, but need to initialize 1527 * the entries so the sg list can be happily traversed. 1528 * The only thing we need are DMA addresses. 1529 */ 1530 1531 addr = i915_gem_object_get_dma_address_len(obj, offset, &length); 1532 1533 length = min(left, length); 1534 1535 st->nents++; 1536 1537 sg_set_page(sg, NULL, length, 0); 1538 sg_dma_address(sg) = addr; 1539 sg_dma_len(sg) = length; 1540 sg = sg_next(sg); 1541 1542 offset += length / I915_GTT_PAGE_SIZE; 1543 left -= length; 1544 } 1545 1546 offset += stride - width; 1547 } 1548 1549 return sg; 1550 } 1551 1552 static noinline struct sg_table * 1553 intel_remap_pages(struct intel_remapped_info *rem_info, 1554 struct drm_i915_gem_object *obj) 1555 { 1556 unsigned int size = intel_remapped_info_size(rem_info); 1557 struct sg_table *st; 1558 struct scatterlist *sg; 1559 int ret = -ENOMEM; 1560 int i; 1561 1562 /* Allocate target SG list. */ 1563 st = kmalloc(sizeof(*st), GFP_KERNEL); 1564 if (!st) 1565 goto err_st_alloc; 1566 1567 ret = sg_alloc_table(st, size, GFP_KERNEL); 1568 if (ret) 1569 goto err_sg_alloc; 1570 1571 st->nents = 0; 1572 sg = st->sgl; 1573 1574 for (i = 0 ; i < ARRAY_SIZE(rem_info->plane); i++) { 1575 sg = remap_pages(obj, rem_info->plane[i].offset, 1576 rem_info->plane[i].width, rem_info->plane[i].height, 1577 rem_info->plane[i].stride, st, sg); 1578 } 1579 1580 i915_sg_trim(st); 1581 1582 return st; 1583 1584 err_sg_alloc: 1585 kfree(st); 1586 err_st_alloc: 1587 1588 DRM_DEBUG_DRIVER("Failed to create remapped mapping for object size %zu! (%ux%u tiles, %u pages)\n", 1589 obj->base.size, rem_info->plane[0].width, rem_info->plane[0].height, size); 1590 1591 return ERR_PTR(ret); 1592 } 1593 1594 #endif /* __NetBSD__ */ 1595 1596 static noinline struct sg_table * 1597 intel_partial_pages(const struct i915_ggtt_view *view, 1598 struct drm_i915_gem_object *obj) 1599 { 1600 #ifdef __NetBSD__ 1601 struct sg_table *st = NULL; 1602 int ret = -ENOMEM; 1603 1604 KASSERTMSG(view->partial.offset <= obj->base.size >> PAGE_SHIFT, 1605 "obj=%p size=0x%zx; view offset=0x%zx size=0x%zx", 1606 obj, 1607 (size_t)obj->base.size >> PAGE_SHIFT, 1608 (size_t)view->partial.offset, 1609 (size_t)view->partial.size); 1610 KASSERTMSG((view->partial.size <= 1611 (obj->base.size >> PAGE_SHIFT) - view->partial.offset), 1612 "obj=%p size=0x%zx; view offset=0x%zx size=0x%zx", 1613 obj, 1614 (size_t)obj->base.size >> PAGE_SHIFT, 1615 (size_t)view->partial.offset, 1616 (size_t)view->partial.size); 1617 KASSERTMSG(view->partial.size <= INT_MAX, "view size=0x%zx", 1618 (size_t)view->partial.size); 1619 1620 st = kmalloc(sizeof(*st), GFP_KERNEL); 1621 if (st == NULL) 1622 goto fail; 1623 ret = sg_alloc_table(st, view->partial.size, GFP_KERNEL); 1624 if (ret) { 1625 kfree(st); 1626 st = NULL; 1627 goto fail; 1628 } 1629 1630 /* XXX errno NetBSD->Linux */ 1631 if (obj->mm.pages->sgl->sg_dmamap) { /* XXX KASSERT? */ 1632 ret = -bus_dmamap_create(obj->base.dev->dmat, 1633 (bus_size_t)view->partial.size << PAGE_SHIFT, 1634 view->partial.size, PAGE_SIZE, 0, BUS_DMA_NOWAIT, 1635 &st->sgl->sg_dmamap); 1636 if (ret) { 1637 st->sgl->sg_dmamap = NULL; 1638 goto fail; 1639 } 1640 st->sgl->sg_dmat = obj->base.dev->dmat; 1641 } 1642 1643 /* 1644 * Copy over the pages. The view's offset and size are in 1645 * units of pages already. 1646 */ 1647 KASSERT(st->sgl->sg_npgs == view->partial.size); 1648 memcpy(st->sgl->sg_pgs, 1649 obj->mm.pages->sgl->sg_pgs + view->partial.offset, 1650 sizeof(st->sgl->sg_pgs[0]) * view->partial.size); 1651 1652 /* 1653 * Copy over the DMA addresses. For simplicity, we don't do 1654 * anything to compress contiguous pages into larger segments. 1655 */ 1656 if (obj->mm.pages->sgl->sg_dmamap) { 1657 bus_size_t offset = (bus_size_t)view->partial.offset 1658 << PAGE_SHIFT; 1659 unsigned i, j, k; 1660 1661 st->sgl->sg_dmamap->dm_nsegs = view->partial.size; 1662 for (i = j = 0; i < view->partial.size; j++) { 1663 KASSERT(j < obj->mm.pages->sgl->sg_dmamap->dm_nsegs); 1664 const bus_dma_segment_t *iseg = 1665 &obj->mm.pages->sgl->sg_dmamap->dm_segs[j]; 1666 1667 KASSERT(iseg->ds_len % PAGE_SIZE == 0); 1668 1669 /* Skip segments prior to the start offset. */ 1670 if (offset >= iseg->ds_len) { 1671 offset -= iseg->ds_len; 1672 continue; 1673 } 1674 for (k = 0; 1675 (i < view->partial.size && 1676 k < iseg->ds_len >> PAGE_SHIFT); 1677 k++) { 1678 KASSERT(i < view->partial.size); 1679 bus_dma_segment_t *oseg = 1680 &st->sgl->sg_dmamap->dm_segs[i++]; 1681 oseg->ds_addr = iseg->ds_addr + offset + 1682 k*PAGE_SIZE; 1683 oseg->ds_len = PAGE_SIZE; 1684 } 1685 1686 /* 1687 * After the first segment which we possibly 1688 * use only a suffix of, the remainder we will 1689 * take from the beginning. 1690 */ 1691 offset = 0; 1692 } 1693 } 1694 1695 /* Success! */ 1696 return st; 1697 1698 fail: if (st) { 1699 sg_free_table(st); 1700 kfree(st); 1701 } 1702 return ERR_PTR(ret); 1703 #else 1704 struct sg_table *st; 1705 struct scatterlist *sg, *iter; 1706 unsigned int count = view->partial.size; 1707 unsigned int offset; 1708 int ret; 1709 1710 st = kmalloc(sizeof(*st), GFP_KERNEL); 1711 if (!st) { 1712 ret = -ENOMEM; 1713 goto err_st_alloc; 1714 } 1715 1716 ret = sg_alloc_table(st, count, GFP_KERNEL); 1717 if (ret) 1718 goto err_sg_alloc; 1719 1720 iter = i915_gem_object_get_sg(obj, view->partial.offset, &offset); 1721 GEM_BUG_ON(!iter); 1722 1723 sg = st->sgl; 1724 st->nents = 0; 1725 do { 1726 unsigned int len; 1727 1728 len = min(iter->length - (offset << PAGE_SHIFT), 1729 count << PAGE_SHIFT); 1730 sg_set_page(sg, NULL, len, 0); 1731 sg_dma_address(sg) = 1732 sg_dma_address(iter) + (offset << PAGE_SHIFT); 1733 sg_dma_len(sg) = len; 1734 1735 st->nents++; 1736 count -= len >> PAGE_SHIFT; 1737 if (count == 0) { 1738 sg_mark_end(sg); 1739 i915_sg_trim(st); /* Drop any unused tail entries. */ 1740 1741 return st; 1742 } 1743 1744 sg = __sg_next(sg); 1745 iter = __sg_next(iter); 1746 offset = 0; 1747 } while (1); 1748 1749 err_sg_alloc: 1750 kfree(st); 1751 err_st_alloc: 1752 return ERR_PTR(ret); 1753 #endif /* __NetBSD__ */ 1754 } 1755 1756 static int 1757 i915_get_ggtt_vma_pages(struct i915_vma *vma) 1758 { 1759 int ret; 1760 1761 /* 1762 * The vma->pages are only valid within the lifespan of the borrowed 1763 * obj->mm.pages. When the obj->mm.pages sg_table is regenerated, so 1764 * must be the vma->pages. A simple rule is that vma->pages must only 1765 * be accessed when the obj->mm.pages are pinned. 1766 */ 1767 GEM_BUG_ON(!i915_gem_object_has_pinned_pages(vma->obj)); 1768 1769 switch (vma->ggtt_view.type) { 1770 default: 1771 GEM_BUG_ON(vma->ggtt_view.type); 1772 /* fall through */ 1773 case I915_GGTT_VIEW_NORMAL: 1774 vma->pages = vma->obj->mm.pages; 1775 return 0; 1776 1777 case I915_GGTT_VIEW_ROTATED: 1778 #ifdef __NetBSD__ 1779 vma->pages = ERR_PTR(-ENODEV); 1780 #else 1781 vma->pages = 1782 intel_rotate_pages(&vma->ggtt_view.rotated, vma->obj); 1783 #endif 1784 break; 1785 1786 case I915_GGTT_VIEW_REMAPPED: 1787 #ifdef __NetBSD__ 1788 vma->pages = ERR_PTR(-ENODEV); 1789 #else 1790 vma->pages = 1791 intel_remap_pages(&vma->ggtt_view.remapped, vma->obj); 1792 #endif 1793 break; 1794 1795 case I915_GGTT_VIEW_PARTIAL: 1796 vma->pages = intel_partial_pages(&vma->ggtt_view, vma->obj); 1797 break; 1798 } 1799 1800 ret = 0; 1801 if (IS_ERR(vma->pages)) { 1802 ret = PTR_ERR(vma->pages); 1803 vma->pages = NULL; 1804 DRM_ERROR("Failed to get pages for VMA view type %u (%d)!\n", 1805 vma->ggtt_view.type, ret); 1806 } 1807 return ret; 1808 } 1809