1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2020 Intel Corporation 4 */ 5 6 #include <asm/set_memory.h> 7 #include <asm/smp.h> 8 #include <linux/types.h> 9 #include <linux/stop_machine.h> 10 11 #include <drm/drm_managed.h> 12 #include <drm/i915_drm.h> 13 #include <drm/intel-gtt.h> 14 15 #include "display/intel_display.h" 16 #include "gem/i915_gem_lmem.h" 17 18 #include "intel_context.h" 19 #include "intel_ggtt_gmch.h" 20 #include "intel_gpu_commands.h" 21 #include "intel_gt.h" 22 #include "intel_gt_regs.h" 23 #include "intel_pci_config.h" 24 #include "intel_ring.h" 25 #include "i915_drv.h" 26 #include "i915_pci.h" 27 #include "i915_reg.h" 28 #include "i915_request.h" 29 #include "i915_scatterlist.h" 30 #include "i915_utils.h" 31 #include "i915_vgpu.h" 32 33 #include "intel_gtt.h" 34 #include "gen8_ppgtt.h" 35 #include "intel_engine_pm.h" 36 37 static void i915_ggtt_color_adjust(const struct drm_mm_node *node, 38 unsigned long color, 39 u64 *start, 40 u64 *end) 41 { 42 if (i915_node_color_differs(node, color)) 43 *start += I915_GTT_PAGE_SIZE; 44 45 /* 46 * Also leave a space between the unallocated reserved node after the 47 * GTT and any objects within the GTT, i.e. we use the color adjustment 48 * to insert a guard page to prevent prefetches crossing over the 49 * GTT boundary. 50 */ 51 node = list_next_entry(node, node_list); 52 if (node->color != color) 53 *end -= I915_GTT_PAGE_SIZE; 54 } 55 56 static int ggtt_init_hw(struct i915_ggtt *ggtt) 57 { 58 struct drm_i915_private *i915 = ggtt->vm.i915; 59 60 i915_address_space_init(&ggtt->vm, VM_CLASS_GGTT); 61 62 ggtt->vm.is_ggtt = true; 63 64 /* Only VLV supports read-only GGTT mappings */ 65 ggtt->vm.has_read_only = IS_VALLEYVIEW(i915); 66 67 if (!HAS_LLC(i915) && !HAS_PPGTT(i915)) 68 ggtt->vm.mm.color_adjust = i915_ggtt_color_adjust; 69 70 if (ggtt->mappable_end) { 71 #ifdef __linux__ 72 if (!io_mapping_init_wc(&ggtt->iomap, 73 ggtt->gmadr.start, 74 ggtt->mappable_end)) { 75 ggtt->vm.cleanup(&ggtt->vm); 76 return -EIO; 77 } 78 79 ggtt->mtrr = arch_phys_wc_add(ggtt->gmadr.start, 80 ggtt->mappable_end); 81 #else 82 bus_space_handle_t bsh; 83 int i; 84 85 /* XXX would be a lot nicer to get agp info before now */ 86 uvm_page_physload(atop(ggtt->gmadr.start), 87 atop(ggtt->gmadr.start + ggtt->mappable_end), 88 atop(ggtt->gmadr.start), 89 atop(ggtt->gmadr.start + ggtt->mappable_end), 90 PHYSLOAD_DEVICE); 91 /* array of vm pages that physload introduced. */ 92 i915->pgs = PHYS_TO_VM_PAGE(ggtt->gmadr.start); 93 KASSERT(i915->pgs != NULL); 94 /* 95 * XXX mark all pages write combining so user mmaps get the 96 * right bits. We really need a proper MI api for doing this, 97 * but for now this allows us to use PAT where available. 98 */ 99 for (i = 0; i < atop(ggtt->mappable_end); i++) 100 atomic_setbits_int(&(i915->pgs[i].pg_flags), 101 PG_PMAP_WC); 102 if (bus_space_map(i915->bst, ggtt->gmadr.start, 103 ggtt->mappable_end, 104 BUS_SPACE_MAP_LINEAR | BUS_SPACE_MAP_PREFETCHABLE, &bsh)) 105 panic("can't map aperture"); 106 ggtt->iomap.base = ggtt->gmadr.start; 107 ggtt->iomap.size = ggtt->mappable_end; 108 ggtt->iomap.iomem = bus_space_vaddr(i915->bst, bsh); 109 #endif 110 } 111 112 intel_ggtt_init_fences(ggtt); 113 114 return 0; 115 } 116 117 /** 118 * i915_ggtt_init_hw - Initialize GGTT hardware 119 * @i915: i915 device 120 */ 121 int i915_ggtt_init_hw(struct drm_i915_private *i915) 122 { 123 int ret; 124 125 /* 126 * Note that we use page colouring to enforce a guard page at the 127 * end of the address space. This is required as the CS may prefetch 128 * beyond the end of the batch buffer, across the page boundary, 129 * and beyond the end of the GTT if we do not provide a guard. 130 */ 131 ret = ggtt_init_hw(to_gt(i915)->ggtt); 132 if (ret) 133 return ret; 134 135 return 0; 136 } 137 138 /** 139 * i915_ggtt_suspend_vm - Suspend the memory mappings for a GGTT or DPT VM 140 * @vm: The VM to suspend the mappings for 141 * 142 * Suspend the memory mappings for all objects mapped to HW via the GGTT or a 143 * DPT page table. 144 */ 145 void i915_ggtt_suspend_vm(struct i915_address_space *vm) 146 { 147 struct i915_vma *vma, *vn; 148 int save_skip_rewrite; 149 150 drm_WARN_ON(&vm->i915->drm, !vm->is_ggtt && !vm->is_dpt); 151 152 retry: 153 i915_gem_drain_freed_objects(vm->i915); 154 155 mutex_lock(&vm->mutex); 156 157 /* 158 * Skip rewriting PTE on VMA unbind. 159 * FIXME: Use an argument to i915_vma_unbind() instead? 160 */ 161 save_skip_rewrite = vm->skip_pte_rewrite; 162 vm->skip_pte_rewrite = true; 163 164 list_for_each_entry_safe(vma, vn, &vm->bound_list, vm_link) { 165 struct drm_i915_gem_object *obj = vma->obj; 166 167 GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); 168 169 if (i915_vma_is_pinned(vma) || !i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND)) 170 continue; 171 172 /* unlikely to race when GPU is idle, so no worry about slowpath.. */ 173 if (WARN_ON(!i915_gem_object_trylock(obj, NULL))) { 174 /* 175 * No dead objects should appear here, GPU should be 176 * completely idle, and userspace suspended 177 */ 178 i915_gem_object_get(obj); 179 180 mutex_unlock(&vm->mutex); 181 182 i915_gem_object_lock(obj, NULL); 183 GEM_WARN_ON(i915_vma_unbind(vma)); 184 i915_gem_object_unlock(obj); 185 i915_gem_object_put(obj); 186 187 vm->skip_pte_rewrite = save_skip_rewrite; 188 goto retry; 189 } 190 191 if (!i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND)) { 192 i915_vma_wait_for_bind(vma); 193 194 __i915_vma_evict(vma, false); 195 drm_mm_remove_node(&vma->node); 196 } 197 198 i915_gem_object_unlock(obj); 199 } 200 201 vm->clear_range(vm, 0, vm->total); 202 203 vm->skip_pte_rewrite = save_skip_rewrite; 204 205 mutex_unlock(&vm->mutex); 206 } 207 208 void i915_ggtt_suspend(struct i915_ggtt *ggtt) 209 { 210 struct intel_gt *gt; 211 212 i915_ggtt_suspend_vm(&ggtt->vm); 213 ggtt->invalidate(ggtt); 214 215 list_for_each_entry(gt, &ggtt->gt_list, ggtt_link) 216 intel_gt_check_and_clear_faults(gt); 217 } 218 219 void gen6_ggtt_invalidate(struct i915_ggtt *ggtt) 220 { 221 struct intel_uncore *uncore = ggtt->vm.gt->uncore; 222 223 spin_lock_irq(&uncore->lock); 224 intel_uncore_write_fw(uncore, GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); 225 intel_uncore_read_fw(uncore, GFX_FLSH_CNTL_GEN6); 226 spin_unlock_irq(&uncore->lock); 227 } 228 229 static bool needs_wc_ggtt_mapping(struct drm_i915_private *i915) 230 { 231 /* 232 * On BXT+/ICL+ writes larger than 64 bit to the GTT pagetable range 233 * will be dropped. For WC mappings in general we have 64 byte burst 234 * writes when the WC buffer is flushed, so we can't use it, but have to 235 * resort to an uncached mapping. The WC issue is easily caught by the 236 * readback check when writing GTT PTE entries. 237 */ 238 if (!IS_GEN9_LP(i915) && GRAPHICS_VER(i915) < 11) 239 return true; 240 241 return false; 242 } 243 244 static void gen8_ggtt_invalidate(struct i915_ggtt *ggtt) 245 { 246 struct intel_uncore *uncore = ggtt->vm.gt->uncore; 247 248 /* 249 * Note that as an uncached mmio write, this will flush the 250 * WCB of the writes into the GGTT before it triggers the invalidate. 251 * 252 * Only perform this when GGTT is mapped as WC, see ggtt_probe_common(). 253 */ 254 if (needs_wc_ggtt_mapping(ggtt->vm.i915)) 255 intel_uncore_write_fw(uncore, GFX_FLSH_CNTL_GEN6, 256 GFX_FLSH_CNTL_EN); 257 } 258 259 static void guc_ggtt_invalidate(struct i915_ggtt *ggtt) 260 { 261 struct drm_i915_private *i915 = ggtt->vm.i915; 262 263 gen8_ggtt_invalidate(ggtt); 264 265 if (GRAPHICS_VER(i915) >= 12) { 266 struct intel_gt *gt; 267 268 list_for_each_entry(gt, &ggtt->gt_list, ggtt_link) 269 intel_uncore_write_fw(gt->uncore, 270 GEN12_GUC_TLB_INV_CR, 271 GEN12_GUC_TLB_INV_CR_INVALIDATE); 272 } else { 273 intel_uncore_write_fw(ggtt->vm.gt->uncore, 274 GEN8_GTCR, GEN8_GTCR_INVALIDATE); 275 } 276 } 277 278 static u64 mtl_ggtt_pte_encode(dma_addr_t addr, 279 unsigned int pat_index, 280 u32 flags) 281 { 282 gen8_pte_t pte = addr | GEN8_PAGE_PRESENT; 283 284 WARN_ON_ONCE(addr & ~GEN12_GGTT_PTE_ADDR_MASK); 285 286 if (flags & PTE_LM) 287 pte |= GEN12_GGTT_PTE_LM; 288 289 if (pat_index & BIT(0)) 290 pte |= MTL_GGTT_PTE_PAT0; 291 292 if (pat_index & BIT(1)) 293 pte |= MTL_GGTT_PTE_PAT1; 294 295 return pte; 296 } 297 298 u64 gen8_ggtt_pte_encode(dma_addr_t addr, 299 unsigned int pat_index, 300 u32 flags) 301 { 302 gen8_pte_t pte = addr | GEN8_PAGE_PRESENT; 303 304 if (flags & PTE_LM) 305 pte |= GEN12_GGTT_PTE_LM; 306 307 return pte; 308 } 309 310 static bool should_update_ggtt_with_bind(struct i915_ggtt *ggtt) 311 { 312 struct intel_gt *gt = ggtt->vm.gt; 313 314 return intel_gt_is_bind_context_ready(gt); 315 } 316 317 static struct intel_context *gen8_ggtt_bind_get_ce(struct i915_ggtt *ggtt) 318 { 319 struct intel_context *ce; 320 struct intel_gt *gt = ggtt->vm.gt; 321 322 if (intel_gt_is_wedged(gt)) 323 return NULL; 324 325 ce = gt->engine[BCS0]->bind_context; 326 GEM_BUG_ON(!ce); 327 328 /* 329 * If the GT is not awake already at this stage then fallback 330 * to pci based GGTT update otherwise __intel_wakeref_get_first() 331 * would conflict with fs_reclaim trying to allocate memory while 332 * doing rpm_resume(). 333 */ 334 if (!intel_gt_pm_get_if_awake(gt)) 335 return NULL; 336 337 intel_engine_pm_get(ce->engine); 338 339 return ce; 340 } 341 342 static void gen8_ggtt_bind_put_ce(struct intel_context *ce) 343 { 344 intel_engine_pm_put(ce->engine); 345 intel_gt_pm_put(ce->engine->gt); 346 } 347 348 static bool gen8_ggtt_bind_ptes(struct i915_ggtt *ggtt, u32 offset, 349 struct sg_table *pages, u32 num_entries, 350 const gen8_pte_t pte) 351 { 352 struct i915_sched_attr attr = {}; 353 struct intel_gt *gt = ggtt->vm.gt; 354 const gen8_pte_t scratch_pte = ggtt->vm.scratch[0]->encode; 355 struct sgt_iter iter; 356 struct i915_request *rq; 357 struct intel_context *ce; 358 u32 *cs; 359 360 if (!num_entries) 361 return true; 362 363 ce = gen8_ggtt_bind_get_ce(ggtt); 364 if (!ce) 365 return false; 366 367 if (pages) 368 iter = __sgt_iter(pages->sgl, true); 369 370 while (num_entries) { 371 int count = 0; 372 dma_addr_t addr; 373 /* 374 * MI_UPDATE_GTT can update 512 entries in a single command but 375 * that end up with engine reset, 511 works. 376 */ 377 u32 n_ptes = min_t(u32, 511, num_entries); 378 379 if (mutex_lock_interruptible(&ce->timeline->mutex)) 380 goto put_ce; 381 382 intel_context_enter(ce); 383 rq = __i915_request_create(ce, GFP_NOWAIT | GFP_ATOMIC); 384 intel_context_exit(ce); 385 if (IS_ERR(rq)) { 386 GT_TRACE(gt, "Failed to get bind request\n"); 387 mutex_unlock(&ce->timeline->mutex); 388 goto put_ce; 389 } 390 391 cs = intel_ring_begin(rq, 2 * n_ptes + 2); 392 if (IS_ERR(cs)) { 393 GT_TRACE(gt, "Failed to ring space for GGTT bind\n"); 394 i915_request_set_error_once(rq, PTR_ERR(cs)); 395 /* once a request is created, it must be queued */ 396 goto queue_err_rq; 397 } 398 399 *cs++ = MI_UPDATE_GTT | (2 * n_ptes); 400 *cs++ = offset << 12; 401 402 if (pages) { 403 for_each_sgt_daddr_next(addr, iter) { 404 if (count == n_ptes) 405 break; 406 *cs++ = lower_32_bits(pte | addr); 407 *cs++ = upper_32_bits(pte | addr); 408 count++; 409 } 410 /* fill remaining with scratch pte, if any */ 411 if (count < n_ptes) { 412 memset64((u64 *)cs, scratch_pte, 413 n_ptes - count); 414 cs += (n_ptes - count) * 2; 415 } 416 } else { 417 memset64((u64 *)cs, pte, n_ptes); 418 cs += n_ptes * 2; 419 } 420 421 intel_ring_advance(rq, cs); 422 queue_err_rq: 423 i915_request_get(rq); 424 __i915_request_commit(rq); 425 __i915_request_queue(rq, &attr); 426 427 mutex_unlock(&ce->timeline->mutex); 428 /* This will break if the request is complete or after engine reset */ 429 i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT); 430 if (rq->fence.error) 431 goto err_rq; 432 433 i915_request_put(rq); 434 435 num_entries -= n_ptes; 436 offset += n_ptes; 437 } 438 439 gen8_ggtt_bind_put_ce(ce); 440 return true; 441 442 err_rq: 443 i915_request_put(rq); 444 put_ce: 445 gen8_ggtt_bind_put_ce(ce); 446 return false; 447 } 448 449 static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte) 450 { 451 writeq(pte, addr); 452 } 453 454 static void gen8_ggtt_insert_page(struct i915_address_space *vm, 455 dma_addr_t addr, 456 u64 offset, 457 unsigned int pat_index, 458 u32 flags) 459 { 460 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); 461 gen8_pte_t __iomem *pte = 462 (gen8_pte_t __iomem *)ggtt->gsm + offset / I915_GTT_PAGE_SIZE; 463 464 gen8_set_pte(pte, ggtt->vm.pte_encode(addr, pat_index, flags)); 465 466 ggtt->invalidate(ggtt); 467 } 468 469 static void gen8_ggtt_insert_page_bind(struct i915_address_space *vm, 470 dma_addr_t addr, u64 offset, 471 unsigned int pat_index, u32 flags) 472 { 473 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); 474 gen8_pte_t pte; 475 476 pte = ggtt->vm.pte_encode(addr, pat_index, flags); 477 if (should_update_ggtt_with_bind(i915_vm_to_ggtt(vm)) && 478 gen8_ggtt_bind_ptes(ggtt, offset, NULL, 1, pte)) 479 return ggtt->invalidate(ggtt); 480 481 gen8_ggtt_insert_page(vm, addr, offset, pat_index, flags); 482 } 483 484 static void gen8_ggtt_insert_entries(struct i915_address_space *vm, 485 struct i915_vma_resource *vma_res, 486 unsigned int pat_index, 487 u32 flags) 488 { 489 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); 490 const gen8_pte_t pte_encode = ggtt->vm.pte_encode(0, pat_index, flags); 491 gen8_pte_t __iomem *gte; 492 gen8_pte_t __iomem *end; 493 struct sgt_iter iter; 494 dma_addr_t addr; 495 496 /* 497 * Note that we ignore PTE_READ_ONLY here. The caller must be careful 498 * not to allow the user to override access to a read only page. 499 */ 500 501 gte = (gen8_pte_t __iomem *)ggtt->gsm; 502 gte += (vma_res->start - vma_res->guard) / I915_GTT_PAGE_SIZE; 503 end = gte + vma_res->guard / I915_GTT_PAGE_SIZE; 504 while (gte < end) 505 gen8_set_pte(gte++, vm->scratch[0]->encode); 506 end += (vma_res->node_size + vma_res->guard) / I915_GTT_PAGE_SIZE; 507 508 for_each_sgt_daddr(addr, iter, vma_res->bi.pages) 509 gen8_set_pte(gte++, pte_encode | addr); 510 GEM_BUG_ON(gte > end); 511 512 /* Fill the allocated but "unused" space beyond the end of the buffer */ 513 while (gte < end) 514 gen8_set_pte(gte++, vm->scratch[0]->encode); 515 516 /* 517 * We want to flush the TLBs only after we're certain all the PTE 518 * updates have finished. 519 */ 520 ggtt->invalidate(ggtt); 521 } 522 523 static bool __gen8_ggtt_insert_entries_bind(struct i915_address_space *vm, 524 struct i915_vma_resource *vma_res, 525 unsigned int pat_index, u32 flags) 526 { 527 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); 528 gen8_pte_t scratch_pte = vm->scratch[0]->encode; 529 gen8_pte_t pte_encode; 530 u64 start, end; 531 532 pte_encode = ggtt->vm.pte_encode(0, pat_index, flags); 533 start = (vma_res->start - vma_res->guard) / I915_GTT_PAGE_SIZE; 534 end = start + vma_res->guard / I915_GTT_PAGE_SIZE; 535 if (!gen8_ggtt_bind_ptes(ggtt, start, NULL, end - start, scratch_pte)) 536 goto err; 537 538 start = end; 539 end += (vma_res->node_size + vma_res->guard) / I915_GTT_PAGE_SIZE; 540 if (!gen8_ggtt_bind_ptes(ggtt, start, vma_res->bi.pages, 541 vma_res->node_size / I915_GTT_PAGE_SIZE, pte_encode)) 542 goto err; 543 544 start += vma_res->node_size / I915_GTT_PAGE_SIZE; 545 if (!gen8_ggtt_bind_ptes(ggtt, start, NULL, end - start, scratch_pte)) 546 goto err; 547 548 return true; 549 550 err: 551 return false; 552 } 553 554 static void gen8_ggtt_insert_entries_bind(struct i915_address_space *vm, 555 struct i915_vma_resource *vma_res, 556 unsigned int pat_index, u32 flags) 557 { 558 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); 559 560 if (should_update_ggtt_with_bind(i915_vm_to_ggtt(vm)) && 561 __gen8_ggtt_insert_entries_bind(vm, vma_res, pat_index, flags)) 562 return ggtt->invalidate(ggtt); 563 564 gen8_ggtt_insert_entries(vm, vma_res, pat_index, flags); 565 } 566 567 static void gen8_ggtt_clear_range(struct i915_address_space *vm, 568 u64 start, u64 length) 569 { 570 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); 571 unsigned int first_entry = start / I915_GTT_PAGE_SIZE; 572 unsigned int num_entries = length / I915_GTT_PAGE_SIZE; 573 const gen8_pte_t scratch_pte = vm->scratch[0]->encode; 574 gen8_pte_t __iomem *gtt_base = 575 (gen8_pte_t __iomem *)ggtt->gsm + first_entry; 576 const int max_entries = ggtt_total_entries(ggtt) - first_entry; 577 int i; 578 579 if (WARN(num_entries > max_entries, 580 "First entry = %d; Num entries = %d (max=%d)\n", 581 first_entry, num_entries, max_entries)) 582 num_entries = max_entries; 583 584 for (i = 0; i < num_entries; i++) 585 gen8_set_pte(>t_base[i], scratch_pte); 586 } 587 588 static void gen8_ggtt_scratch_range_bind(struct i915_address_space *vm, 589 u64 start, u64 length) 590 { 591 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); 592 unsigned int first_entry = start / I915_GTT_PAGE_SIZE; 593 unsigned int num_entries = length / I915_GTT_PAGE_SIZE; 594 const gen8_pte_t scratch_pte = vm->scratch[0]->encode; 595 const int max_entries = ggtt_total_entries(ggtt) - first_entry; 596 597 if (WARN(num_entries > max_entries, 598 "First entry = %d; Num entries = %d (max=%d)\n", 599 first_entry, num_entries, max_entries)) 600 num_entries = max_entries; 601 602 if (should_update_ggtt_with_bind(ggtt) && gen8_ggtt_bind_ptes(ggtt, first_entry, 603 NULL, num_entries, scratch_pte)) 604 return ggtt->invalidate(ggtt); 605 606 gen8_ggtt_clear_range(vm, start, length); 607 } 608 609 static void gen6_ggtt_insert_page(struct i915_address_space *vm, 610 dma_addr_t addr, 611 u64 offset, 612 unsigned int pat_index, 613 u32 flags) 614 { 615 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); 616 gen6_pte_t __iomem *pte = 617 (gen6_pte_t __iomem *)ggtt->gsm + offset / I915_GTT_PAGE_SIZE; 618 619 iowrite32(vm->pte_encode(addr, pat_index, flags), pte); 620 621 ggtt->invalidate(ggtt); 622 } 623 624 /* 625 * Binds an object into the global gtt with the specified cache level. 626 * The object will be accessible to the GPU via commands whose operands 627 * reference offsets within the global GTT as well as accessible by the GPU 628 * through the GMADR mapped BAR (i915->mm.gtt->gtt). 629 */ 630 static void gen6_ggtt_insert_entries(struct i915_address_space *vm, 631 struct i915_vma_resource *vma_res, 632 unsigned int pat_index, 633 u32 flags) 634 { 635 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); 636 gen6_pte_t __iomem *gte; 637 gen6_pte_t __iomem *end; 638 struct sgt_iter iter; 639 dma_addr_t addr; 640 641 gte = (gen6_pte_t __iomem *)ggtt->gsm; 642 gte += (vma_res->start - vma_res->guard) / I915_GTT_PAGE_SIZE; 643 644 end = gte + vma_res->guard / I915_GTT_PAGE_SIZE; 645 while (gte < end) 646 iowrite32(vm->scratch[0]->encode, gte++); 647 end += (vma_res->node_size + vma_res->guard) / I915_GTT_PAGE_SIZE; 648 for_each_sgt_daddr(addr, iter, vma_res->bi.pages) 649 iowrite32(vm->pte_encode(addr, pat_index, flags), gte++); 650 GEM_BUG_ON(gte > end); 651 652 /* Fill the allocated but "unused" space beyond the end of the buffer */ 653 while (gte < end) 654 iowrite32(vm->scratch[0]->encode, gte++); 655 656 /* 657 * We want to flush the TLBs only after we're certain all the PTE 658 * updates have finished. 659 */ 660 ggtt->invalidate(ggtt); 661 } 662 663 static void nop_clear_range(struct i915_address_space *vm, 664 u64 start, u64 length) 665 { 666 } 667 668 static void bxt_vtd_ggtt_wa(struct i915_address_space *vm) 669 { 670 /* 671 * Make sure the internal GAM fifo has been cleared of all GTT 672 * writes before exiting stop_machine(). This guarantees that 673 * any aperture accesses waiting to start in another process 674 * cannot back up behind the GTT writes causing a hang. 675 * The register can be any arbitrary GAM register. 676 */ 677 intel_uncore_posting_read_fw(vm->gt->uncore, GFX_FLSH_CNTL_GEN6); 678 } 679 680 struct insert_page { 681 struct i915_address_space *vm; 682 dma_addr_t addr; 683 u64 offset; 684 unsigned int pat_index; 685 }; 686 687 static int bxt_vtd_ggtt_insert_page__cb(void *_arg) 688 { 689 struct insert_page *arg = _arg; 690 691 gen8_ggtt_insert_page(arg->vm, arg->addr, arg->offset, 692 arg->pat_index, 0); 693 bxt_vtd_ggtt_wa(arg->vm); 694 695 return 0; 696 } 697 698 static void bxt_vtd_ggtt_insert_page__BKL(struct i915_address_space *vm, 699 dma_addr_t addr, 700 u64 offset, 701 unsigned int pat_index, 702 u32 unused) 703 { 704 struct insert_page arg = { vm, addr, offset, pat_index }; 705 706 stop_machine(bxt_vtd_ggtt_insert_page__cb, &arg, NULL); 707 } 708 709 struct insert_entries { 710 struct i915_address_space *vm; 711 struct i915_vma_resource *vma_res; 712 unsigned int pat_index; 713 u32 flags; 714 }; 715 716 static int bxt_vtd_ggtt_insert_entries__cb(void *_arg) 717 { 718 struct insert_entries *arg = _arg; 719 720 gen8_ggtt_insert_entries(arg->vm, arg->vma_res, 721 arg->pat_index, arg->flags); 722 bxt_vtd_ggtt_wa(arg->vm); 723 724 return 0; 725 } 726 727 static void bxt_vtd_ggtt_insert_entries__BKL(struct i915_address_space *vm, 728 struct i915_vma_resource *vma_res, 729 unsigned int pat_index, 730 u32 flags) 731 { 732 struct insert_entries arg = { vm, vma_res, pat_index, flags }; 733 734 stop_machine(bxt_vtd_ggtt_insert_entries__cb, &arg, NULL); 735 } 736 737 static void gen6_ggtt_clear_range(struct i915_address_space *vm, 738 u64 start, u64 length) 739 { 740 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); 741 unsigned int first_entry = start / I915_GTT_PAGE_SIZE; 742 unsigned int num_entries = length / I915_GTT_PAGE_SIZE; 743 gen6_pte_t scratch_pte, __iomem *gtt_base = 744 (gen6_pte_t __iomem *)ggtt->gsm + first_entry; 745 const int max_entries = ggtt_total_entries(ggtt) - first_entry; 746 int i; 747 748 if (WARN(num_entries > max_entries, 749 "First entry = %d; Num entries = %d (max=%d)\n", 750 first_entry, num_entries, max_entries)) 751 num_entries = max_entries; 752 753 scratch_pte = vm->scratch[0]->encode; 754 for (i = 0; i < num_entries; i++) 755 iowrite32(scratch_pte, >t_base[i]); 756 } 757 758 void intel_ggtt_bind_vma(struct i915_address_space *vm, 759 struct i915_vm_pt_stash *stash, 760 struct i915_vma_resource *vma_res, 761 unsigned int pat_index, 762 u32 flags) 763 { 764 u32 pte_flags; 765 766 if (vma_res->bound_flags & (~flags & I915_VMA_BIND_MASK)) 767 return; 768 769 vma_res->bound_flags |= flags; 770 771 /* Applicable to VLV (gen8+ do not support RO in the GGTT) */ 772 pte_flags = 0; 773 if (vma_res->bi.readonly) 774 pte_flags |= PTE_READ_ONLY; 775 if (vma_res->bi.lmem) 776 pte_flags |= PTE_LM; 777 778 vm->insert_entries(vm, vma_res, pat_index, pte_flags); 779 vma_res->page_sizes_gtt = I915_GTT_PAGE_SIZE; 780 } 781 782 void intel_ggtt_unbind_vma(struct i915_address_space *vm, 783 struct i915_vma_resource *vma_res) 784 { 785 vm->clear_range(vm, vma_res->start, vma_res->vma_size); 786 } 787 788 /* 789 * Reserve the top of the GuC address space for firmware images. Addresses 790 * beyond GUC_GGTT_TOP in the GuC address space are inaccessible by GuC, 791 * which makes for a suitable range to hold GuC/HuC firmware images if the 792 * size of the GGTT is 4G. However, on a 32-bit platform the size of the GGTT 793 * is limited to 2G, which is less than GUC_GGTT_TOP, but we reserve a chunk 794 * of the same size anyway, which is far more than needed, to keep the logic 795 * in uc_fw_ggtt_offset() simple. 796 */ 797 #define GUC_TOP_RESERVE_SIZE (SZ_4G - GUC_GGTT_TOP) 798 799 static int ggtt_reserve_guc_top(struct i915_ggtt *ggtt) 800 { 801 u64 offset; 802 int ret; 803 804 if (!intel_uc_uses_guc(&ggtt->vm.gt->uc)) 805 return 0; 806 807 GEM_BUG_ON(ggtt->vm.total <= GUC_TOP_RESERVE_SIZE); 808 offset = ggtt->vm.total - GUC_TOP_RESERVE_SIZE; 809 810 ret = i915_gem_gtt_reserve(&ggtt->vm, NULL, &ggtt->uc_fw, 811 GUC_TOP_RESERVE_SIZE, offset, 812 I915_COLOR_UNEVICTABLE, PIN_NOEVICT); 813 if (ret) 814 drm_dbg(&ggtt->vm.i915->drm, 815 "Failed to reserve top of GGTT for GuC\n"); 816 817 return ret; 818 } 819 820 static void ggtt_release_guc_top(struct i915_ggtt *ggtt) 821 { 822 if (drm_mm_node_allocated(&ggtt->uc_fw)) 823 drm_mm_remove_node(&ggtt->uc_fw); 824 } 825 826 static void cleanup_init_ggtt(struct i915_ggtt *ggtt) 827 { 828 ggtt_release_guc_top(ggtt); 829 if (drm_mm_node_allocated(&ggtt->error_capture)) 830 drm_mm_remove_node(&ggtt->error_capture); 831 mutex_destroy(&ggtt->error_mutex); 832 } 833 834 static int init_ggtt(struct i915_ggtt *ggtt) 835 { 836 /* 837 * Let GEM Manage all of the aperture. 838 * 839 * However, leave one page at the end still bound to the scratch page. 840 * There are a number of places where the hardware apparently prefetches 841 * past the end of the object, and we've seen multiple hangs with the 842 * GPU head pointer stuck in a batchbuffer bound at the last page of the 843 * aperture. One page should be enough to keep any prefetching inside 844 * of the aperture. 845 */ 846 unsigned long hole_start, hole_end; 847 struct drm_mm_node *entry; 848 int ret; 849 850 /* 851 * GuC requires all resources that we're sharing with it to be placed in 852 * non-WOPCM memory. If GuC is not present or not in use we still need a 853 * small bias as ring wraparound at offset 0 sometimes hangs. No idea 854 * why. 855 */ 856 ggtt->pin_bias = max_t(u32, I915_GTT_PAGE_SIZE, 857 intel_wopcm_guc_size(&ggtt->vm.gt->wopcm)); 858 859 ret = intel_vgt_balloon(ggtt); 860 if (ret) 861 return ret; 862 863 rw_init(&ggtt->error_mutex, "ggtter"); 864 if (ggtt->mappable_end) { 865 /* 866 * Reserve a mappable slot for our lockless error capture. 867 * 868 * We strongly prefer taking address 0x0 in order to protect 869 * other critical buffers against accidental overwrites, 870 * as writing to address 0 is a very common mistake. 871 * 872 * Since 0 may already be in use by the system (e.g. the BIOS 873 * framebuffer), we let the reservation fail quietly and hope 874 * 0 remains reserved always. 875 * 876 * If we fail to reserve 0, and then fail to find any space 877 * for an error-capture, remain silent. We can afford not 878 * to reserve an error_capture node as we have fallback 879 * paths, and we trust that 0 will remain reserved. However, 880 * the only likely reason for failure to insert is a driver 881 * bug, which we expect to cause other failures... 882 * 883 * Since CPU can perform speculative reads on error capture 884 * (write-combining allows it) add scratch page after error 885 * capture to avoid DMAR errors. 886 */ 887 ggtt->error_capture.size = 2 * I915_GTT_PAGE_SIZE; 888 ggtt->error_capture.color = I915_COLOR_UNEVICTABLE; 889 if (drm_mm_reserve_node(&ggtt->vm.mm, &ggtt->error_capture)) 890 drm_mm_insert_node_in_range(&ggtt->vm.mm, 891 &ggtt->error_capture, 892 ggtt->error_capture.size, 0, 893 ggtt->error_capture.color, 894 0, ggtt->mappable_end, 895 DRM_MM_INSERT_LOW); 896 } 897 if (drm_mm_node_allocated(&ggtt->error_capture)) { 898 u64 start = ggtt->error_capture.start; 899 u64 size = ggtt->error_capture.size; 900 901 ggtt->vm.scratch_range(&ggtt->vm, start, size); 902 drm_dbg(&ggtt->vm.i915->drm, 903 "Reserved GGTT:[%llx, %llx] for use by error capture\n", 904 start, start + size); 905 } 906 907 /* 908 * The upper portion of the GuC address space has a sizeable hole 909 * (several MB) that is inaccessible by GuC. Reserve this range within 910 * GGTT as it can comfortably hold GuC/HuC firmware images. 911 */ 912 ret = ggtt_reserve_guc_top(ggtt); 913 if (ret) 914 goto err; 915 916 /* Clear any non-preallocated blocks */ 917 drm_mm_for_each_hole(entry, &ggtt->vm.mm, hole_start, hole_end) { 918 drm_dbg(&ggtt->vm.i915->drm, 919 "clearing unused GTT space: [%lx, %lx]\n", 920 hole_start, hole_end); 921 ggtt->vm.clear_range(&ggtt->vm, hole_start, 922 hole_end - hole_start); 923 } 924 925 /* And finally clear the reserved guard page */ 926 ggtt->vm.clear_range(&ggtt->vm, ggtt->vm.total - PAGE_SIZE, PAGE_SIZE); 927 928 return 0; 929 930 err: 931 cleanup_init_ggtt(ggtt); 932 return ret; 933 } 934 935 static void aliasing_gtt_bind_vma(struct i915_address_space *vm, 936 struct i915_vm_pt_stash *stash, 937 struct i915_vma_resource *vma_res, 938 unsigned int pat_index, 939 u32 flags) 940 { 941 u32 pte_flags; 942 943 /* Currently applicable only to VLV */ 944 pte_flags = 0; 945 if (vma_res->bi.readonly) 946 pte_flags |= PTE_READ_ONLY; 947 948 if (flags & I915_VMA_LOCAL_BIND) 949 ppgtt_bind_vma(&i915_vm_to_ggtt(vm)->alias->vm, 950 stash, vma_res, pat_index, flags); 951 952 if (flags & I915_VMA_GLOBAL_BIND) 953 vm->insert_entries(vm, vma_res, pat_index, pte_flags); 954 955 vma_res->bound_flags |= flags; 956 } 957 958 static void aliasing_gtt_unbind_vma(struct i915_address_space *vm, 959 struct i915_vma_resource *vma_res) 960 { 961 if (vma_res->bound_flags & I915_VMA_GLOBAL_BIND) 962 vm->clear_range(vm, vma_res->start, vma_res->vma_size); 963 964 if (vma_res->bound_flags & I915_VMA_LOCAL_BIND) 965 ppgtt_unbind_vma(&i915_vm_to_ggtt(vm)->alias->vm, vma_res); 966 } 967 968 static int init_aliasing_ppgtt(struct i915_ggtt *ggtt) 969 { 970 struct i915_vm_pt_stash stash = {}; 971 struct i915_ppgtt *ppgtt; 972 int err; 973 974 ppgtt = i915_ppgtt_create(ggtt->vm.gt, 0); 975 if (IS_ERR(ppgtt)) 976 return PTR_ERR(ppgtt); 977 978 if (GEM_WARN_ON(ppgtt->vm.total < ggtt->vm.total)) { 979 err = -ENODEV; 980 goto err_ppgtt; 981 } 982 983 err = i915_vm_alloc_pt_stash(&ppgtt->vm, &stash, ggtt->vm.total); 984 if (err) 985 goto err_ppgtt; 986 987 i915_gem_object_lock(ppgtt->vm.scratch[0], NULL); 988 err = i915_vm_map_pt_stash(&ppgtt->vm, &stash); 989 i915_gem_object_unlock(ppgtt->vm.scratch[0]); 990 if (err) 991 goto err_stash; 992 993 /* 994 * Note we only pre-allocate as far as the end of the global 995 * GTT. On 48b / 4-level page-tables, the difference is very, 996 * very significant! We have to preallocate as GVT/vgpu does 997 * not like the page directory disappearing. 998 */ 999 ppgtt->vm.allocate_va_range(&ppgtt->vm, &stash, 0, ggtt->vm.total); 1000 1001 ggtt->alias = ppgtt; 1002 ggtt->vm.bind_async_flags |= ppgtt->vm.bind_async_flags; 1003 1004 GEM_BUG_ON(ggtt->vm.vma_ops.bind_vma != intel_ggtt_bind_vma); 1005 ggtt->vm.vma_ops.bind_vma = aliasing_gtt_bind_vma; 1006 1007 GEM_BUG_ON(ggtt->vm.vma_ops.unbind_vma != intel_ggtt_unbind_vma); 1008 ggtt->vm.vma_ops.unbind_vma = aliasing_gtt_unbind_vma; 1009 1010 i915_vm_free_pt_stash(&ppgtt->vm, &stash); 1011 return 0; 1012 1013 err_stash: 1014 i915_vm_free_pt_stash(&ppgtt->vm, &stash); 1015 err_ppgtt: 1016 i915_vm_put(&ppgtt->vm); 1017 return err; 1018 } 1019 1020 static void fini_aliasing_ppgtt(struct i915_ggtt *ggtt) 1021 { 1022 struct i915_ppgtt *ppgtt; 1023 1024 ppgtt = fetch_and_zero(&ggtt->alias); 1025 if (!ppgtt) 1026 return; 1027 1028 i915_vm_put(&ppgtt->vm); 1029 1030 ggtt->vm.vma_ops.bind_vma = intel_ggtt_bind_vma; 1031 ggtt->vm.vma_ops.unbind_vma = intel_ggtt_unbind_vma; 1032 } 1033 1034 int i915_init_ggtt(struct drm_i915_private *i915) 1035 { 1036 int ret; 1037 1038 ret = init_ggtt(to_gt(i915)->ggtt); 1039 if (ret) 1040 return ret; 1041 1042 if (INTEL_PPGTT(i915) == INTEL_PPGTT_ALIASING) { 1043 ret = init_aliasing_ppgtt(to_gt(i915)->ggtt); 1044 if (ret) 1045 cleanup_init_ggtt(to_gt(i915)->ggtt); 1046 } 1047 1048 return 0; 1049 } 1050 1051 static void ggtt_cleanup_hw(struct i915_ggtt *ggtt) 1052 { 1053 struct i915_vma *vma, *vn; 1054 1055 flush_workqueue(ggtt->vm.i915->wq); 1056 i915_gem_drain_freed_objects(ggtt->vm.i915); 1057 1058 mutex_lock(&ggtt->vm.mutex); 1059 1060 ggtt->vm.skip_pte_rewrite = true; 1061 1062 list_for_each_entry_safe(vma, vn, &ggtt->vm.bound_list, vm_link) { 1063 struct drm_i915_gem_object *obj = vma->obj; 1064 bool trylock; 1065 1066 trylock = i915_gem_object_trylock(obj, NULL); 1067 WARN_ON(!trylock); 1068 1069 WARN_ON(__i915_vma_unbind(vma)); 1070 if (trylock) 1071 i915_gem_object_unlock(obj); 1072 } 1073 1074 if (drm_mm_node_allocated(&ggtt->error_capture)) 1075 drm_mm_remove_node(&ggtt->error_capture); 1076 mutex_destroy(&ggtt->error_mutex); 1077 1078 ggtt_release_guc_top(ggtt); 1079 intel_vgt_deballoon(ggtt); 1080 1081 ggtt->vm.cleanup(&ggtt->vm); 1082 1083 mutex_unlock(&ggtt->vm.mutex); 1084 i915_address_space_fini(&ggtt->vm); 1085 1086 #ifdef notyet 1087 arch_phys_wc_del(ggtt->mtrr); 1088 1089 if (ggtt->iomap.size) 1090 io_mapping_fini(&ggtt->iomap); 1091 #endif 1092 } 1093 1094 /** 1095 * i915_ggtt_driver_release - Clean up GGTT hardware initialization 1096 * @i915: i915 device 1097 */ 1098 void i915_ggtt_driver_release(struct drm_i915_private *i915) 1099 { 1100 struct i915_ggtt *ggtt = to_gt(i915)->ggtt; 1101 1102 fini_aliasing_ppgtt(ggtt); 1103 1104 intel_ggtt_fini_fences(ggtt); 1105 ggtt_cleanup_hw(ggtt); 1106 } 1107 1108 /** 1109 * i915_ggtt_driver_late_release - Cleanup of GGTT that needs to be done after 1110 * all free objects have been drained. 1111 * @i915: i915 device 1112 */ 1113 void i915_ggtt_driver_late_release(struct drm_i915_private *i915) 1114 { 1115 struct i915_ggtt *ggtt = to_gt(i915)->ggtt; 1116 1117 GEM_WARN_ON(kref_read(&ggtt->vm.resv_ref) != 1); 1118 dma_resv_fini(&ggtt->vm._resv); 1119 } 1120 1121 static unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl) 1122 { 1123 snb_gmch_ctl >>= SNB_GMCH_GGMS_SHIFT; 1124 snb_gmch_ctl &= SNB_GMCH_GGMS_MASK; 1125 return snb_gmch_ctl << 20; 1126 } 1127 1128 static unsigned int gen8_get_total_gtt_size(u16 bdw_gmch_ctl) 1129 { 1130 bdw_gmch_ctl >>= BDW_GMCH_GGMS_SHIFT; 1131 bdw_gmch_ctl &= BDW_GMCH_GGMS_MASK; 1132 if (bdw_gmch_ctl) 1133 bdw_gmch_ctl = 1 << bdw_gmch_ctl; 1134 1135 #ifdef CONFIG_X86_32 1136 /* Limit 32b platforms to a 2GB GGTT: 4 << 20 / pte size * I915_GTT_PAGE_SIZE */ 1137 if (bdw_gmch_ctl > 4) 1138 bdw_gmch_ctl = 4; 1139 #endif 1140 1141 return bdw_gmch_ctl << 20; 1142 } 1143 1144 static unsigned int chv_get_total_gtt_size(u16 gmch_ctrl) 1145 { 1146 gmch_ctrl >>= SNB_GMCH_GGMS_SHIFT; 1147 gmch_ctrl &= SNB_GMCH_GGMS_MASK; 1148 1149 if (gmch_ctrl) 1150 return 1 << (20 + gmch_ctrl); 1151 1152 return 0; 1153 } 1154 1155 static unsigned int gen6_gttmmadr_size(struct drm_i915_private *i915) 1156 { 1157 /* 1158 * GEN6: GTTMMADR size is 4MB and GTTADR starts at 2MB offset 1159 * GEN8: GTTMMADR size is 16MB and GTTADR starts at 8MB offset 1160 */ 1161 GEM_BUG_ON(GRAPHICS_VER(i915) < 6); 1162 return (GRAPHICS_VER(i915) < 8) ? SZ_4M : SZ_16M; 1163 } 1164 1165 static unsigned int gen6_gttadr_offset(struct drm_i915_private *i915) 1166 { 1167 return gen6_gttmmadr_size(i915) / 2; 1168 } 1169 1170 #ifdef __linux__ 1171 1172 static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size) 1173 { 1174 struct drm_i915_private *i915 = ggtt->vm.i915; 1175 struct intel_uncore *uncore = ggtt->vm.gt->uncore; 1176 struct pci_dev *pdev = to_pci_dev(i915->drm.dev); 1177 phys_addr_t phys_addr; 1178 u32 pte_flags; 1179 int ret; 1180 1181 GEM_WARN_ON(pci_resource_len(pdev, GEN4_GTTMMADR_BAR) != gen6_gttmmadr_size(i915)); 1182 1183 if (i915_direct_stolen_access(i915)) { 1184 drm_dbg(&i915->drm, "Using direct GSM access\n"); 1185 phys_addr = intel_uncore_read64(uncore, GEN6_GSMBASE) & GEN11_BDSM_MASK; 1186 } else { 1187 phys_addr = pci_resource_start(pdev, GEN4_GTTMMADR_BAR) + gen6_gttadr_offset(i915); 1188 } 1189 1190 if (needs_wc_ggtt_mapping(i915)) 1191 ggtt->gsm = ioremap_wc(phys_addr, size); 1192 else 1193 ggtt->gsm = ioremap(phys_addr, size); 1194 1195 if (!ggtt->gsm) { 1196 drm_err(&i915->drm, "Failed to map the ggtt page table\n"); 1197 return -ENOMEM; 1198 } 1199 1200 kref_init(&ggtt->vm.resv_ref); 1201 ret = setup_scratch_page(&ggtt->vm); 1202 if (ret) { 1203 drm_err(&i915->drm, "Scratch setup failed\n"); 1204 /* iounmap will also get called at remove, but meh */ 1205 iounmap(ggtt->gsm); 1206 return ret; 1207 } 1208 1209 pte_flags = 0; 1210 if (i915_gem_object_is_lmem(ggtt->vm.scratch[0])) 1211 pte_flags |= PTE_LM; 1212 1213 ggtt->vm.scratch[0]->encode = 1214 ggtt->vm.pte_encode(px_dma(ggtt->vm.scratch[0]), 1215 i915_gem_get_pat_index(i915, 1216 I915_CACHE_NONE), 1217 pte_flags); 1218 1219 return 0; 1220 } 1221 1222 #else 1223 1224 static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size) 1225 { 1226 struct drm_i915_private *i915 = ggtt->vm.i915; 1227 struct intel_uncore *uncore = ggtt->vm.gt->uncore; 1228 struct pci_dev *pdev = i915->drm.pdev; 1229 phys_addr_t phys_addr; 1230 bus_addr_t addr; 1231 bus_size_t len; 1232 pcireg_t type; 1233 int flags; 1234 u32 pte_flags; 1235 int ret; 1236 1237 type = pci_mapreg_type(i915->pc, i915->tag, 0x10); 1238 ret = -pci_mapreg_info(i915->pc, i915->tag, 0x10, type, 1239 &addr, &len, NULL); 1240 if (ret) 1241 return ret; 1242 1243 GEM_WARN_ON(len != gen6_gttmmadr_size(i915)); 1244 1245 if (i915_direct_stolen_access(i915)) { 1246 drm_dbg(&i915->drm, "Using direct GSM access\n"); 1247 phys_addr = intel_uncore_read64(uncore, GEN6_GSMBASE) & GEN11_BDSM_MASK; 1248 } else { 1249 phys_addr = addr + gen6_gttadr_offset(i915); 1250 } 1251 1252 if (needs_wc_ggtt_mapping(i915)) 1253 flags = BUS_SPACE_MAP_PREFETCHABLE; 1254 else 1255 flags = 0; 1256 1257 ret = -bus_space_map(i915->bst, phys_addr, size, 1258 flags | BUS_SPACE_MAP_LINEAR, &ggtt->gsm_bsh); 1259 if (ret) { 1260 drm_err(&i915->drm, "Failed to map the ggtt page table\n"); 1261 return ret; 1262 } 1263 ggtt->gsm = bus_space_vaddr(i915->bst, ggtt->gsm_bsh); 1264 ggtt->gsm_size = size; 1265 if (!ggtt->gsm) { 1266 drm_err(&i915->drm, "Failed to map the ggtt page table\n"); 1267 return -ENOMEM; 1268 } 1269 1270 kref_init(&ggtt->vm.resv_ref); 1271 ret = setup_scratch_page(&ggtt->vm); 1272 if (ret) { 1273 drm_err(&i915->drm, "Scratch setup failed\n"); 1274 /* iounmap will also get called at remove, but meh */ 1275 bus_space_unmap(i915->bst, ggtt->gsm_bsh, size); 1276 return ret; 1277 } 1278 1279 pte_flags = 0; 1280 if (i915_gem_object_is_lmem(ggtt->vm.scratch[0])) 1281 pte_flags |= PTE_LM; 1282 1283 ggtt->vm.scratch[0]->encode = 1284 ggtt->vm.pte_encode(px_dma(ggtt->vm.scratch[0]), 1285 i915_gem_get_pat_index(i915, 1286 I915_CACHE_NONE), 1287 pte_flags); 1288 1289 return 0; 1290 } 1291 1292 #endif 1293 1294 static void gen6_gmch_remove(struct i915_address_space *vm) 1295 { 1296 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); 1297 1298 #ifdef __linux__ 1299 iounmap(ggtt->gsm); 1300 #else 1301 bus_space_unmap(vm->i915->bst, ggtt->gsm_bsh, ggtt->gsm_size); 1302 #endif 1303 free_scratch(vm); 1304 } 1305 1306 #ifdef __linux__ 1307 static struct resource pci_resource(struct pci_dev *pdev, int bar) 1308 { 1309 return DEFINE_RES_MEM(pci_resource_start(pdev, bar), 1310 pci_resource_len(pdev, bar)); 1311 } 1312 #endif 1313 1314 static int gen8_gmch_probe(struct i915_ggtt *ggtt) 1315 { 1316 struct drm_i915_private *i915 = ggtt->vm.i915; 1317 struct pci_dev *pdev = i915->drm.pdev; 1318 unsigned int size; 1319 u16 snb_gmch_ctl; 1320 1321 if (!HAS_LMEM(i915) && !HAS_LMEMBAR_SMEM_STOLEN(i915)) { 1322 #ifdef __linux__ 1323 if (!i915_pci_resource_valid(pdev, GEN4_GMADR_BAR)) 1324 return -ENXIO; 1325 1326 ggtt->gmadr = pci_resource(pdev, GEN4_GMADR_BAR); 1327 ggtt->mappable_end = resource_size(&ggtt->gmadr); 1328 #else 1329 bus_addr_t base; 1330 bus_size_t sz; 1331 pcireg_t type; 1332 int err; 1333 1334 type = pci_mapreg_type(i915->pc, i915->tag, 0x18); 1335 err = -pci_mapreg_info(i915->pc, i915->tag, 0x18, type, 1336 &base, &sz, NULL); 1337 if (err) 1338 return err; 1339 ggtt->gmadr.start = base; 1340 ggtt->mappable_end = sz; 1341 #endif 1342 } 1343 1344 pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl); 1345 if (IS_CHERRYVIEW(i915)) 1346 size = chv_get_total_gtt_size(snb_gmch_ctl); 1347 else 1348 size = gen8_get_total_gtt_size(snb_gmch_ctl); 1349 1350 ggtt->vm.alloc_pt_dma = alloc_pt_dma; 1351 ggtt->vm.alloc_scratch_dma = alloc_pt_dma; 1352 ggtt->vm.lmem_pt_obj_flags = I915_BO_ALLOC_PM_EARLY; 1353 1354 ggtt->vm.total = (size / sizeof(gen8_pte_t)) * I915_GTT_PAGE_SIZE; 1355 ggtt->vm.cleanup = gen6_gmch_remove; 1356 ggtt->vm.insert_page = gen8_ggtt_insert_page; 1357 ggtt->vm.clear_range = nop_clear_range; 1358 ggtt->vm.scratch_range = gen8_ggtt_clear_range; 1359 1360 ggtt->vm.insert_entries = gen8_ggtt_insert_entries; 1361 1362 /* 1363 * Serialize GTT updates with aperture access on BXT if VT-d is on, 1364 * and always on CHV. 1365 */ 1366 if (intel_vm_no_concurrent_access_wa(i915)) { 1367 ggtt->vm.insert_entries = bxt_vtd_ggtt_insert_entries__BKL; 1368 ggtt->vm.insert_page = bxt_vtd_ggtt_insert_page__BKL; 1369 1370 /* 1371 * Calling stop_machine() version of GGTT update function 1372 * at error capture/reset path will raise lockdep warning. 1373 * Allow calling gen8_ggtt_insert_* directly at reset path 1374 * which is safe from parallel GGTT updates. 1375 */ 1376 ggtt->vm.raw_insert_page = gen8_ggtt_insert_page; 1377 ggtt->vm.raw_insert_entries = gen8_ggtt_insert_entries; 1378 1379 ggtt->vm.bind_async_flags = 1380 I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND; 1381 } 1382 1383 if (i915_ggtt_require_binder(i915)) { 1384 ggtt->vm.scratch_range = gen8_ggtt_scratch_range_bind; 1385 ggtt->vm.insert_page = gen8_ggtt_insert_page_bind; 1386 ggtt->vm.insert_entries = gen8_ggtt_insert_entries_bind; 1387 /* 1388 * On GPU is hung, we might bind VMAs for error capture. 1389 * Fallback to CPU GGTT updates in that case. 1390 */ 1391 ggtt->vm.raw_insert_page = gen8_ggtt_insert_page; 1392 } 1393 1394 if (intel_uc_wants_guc(&ggtt->vm.gt->uc)) 1395 ggtt->invalidate = guc_ggtt_invalidate; 1396 else 1397 ggtt->invalidate = gen8_ggtt_invalidate; 1398 1399 ggtt->vm.vma_ops.bind_vma = intel_ggtt_bind_vma; 1400 ggtt->vm.vma_ops.unbind_vma = intel_ggtt_unbind_vma; 1401 1402 if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)) 1403 ggtt->vm.pte_encode = mtl_ggtt_pte_encode; 1404 else 1405 ggtt->vm.pte_encode = gen8_ggtt_pte_encode; 1406 1407 return ggtt_probe_common(ggtt, size); 1408 } 1409 1410 /* 1411 * For pre-gen8 platforms pat_index is the same as enum i915_cache_level, 1412 * so the switch-case statements in these PTE encode functions are still valid. 1413 * See translation table LEGACY_CACHELEVEL. 1414 */ 1415 static u64 snb_pte_encode(dma_addr_t addr, 1416 unsigned int pat_index, 1417 u32 flags) 1418 { 1419 gen6_pte_t pte = GEN6_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID; 1420 1421 switch (pat_index) { 1422 case I915_CACHE_L3_LLC: 1423 case I915_CACHE_LLC: 1424 pte |= GEN6_PTE_CACHE_LLC; 1425 break; 1426 case I915_CACHE_NONE: 1427 pte |= GEN6_PTE_UNCACHED; 1428 break; 1429 default: 1430 MISSING_CASE(pat_index); 1431 } 1432 1433 return pte; 1434 } 1435 1436 static u64 ivb_pte_encode(dma_addr_t addr, 1437 unsigned int pat_index, 1438 u32 flags) 1439 { 1440 gen6_pte_t pte = GEN6_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID; 1441 1442 switch (pat_index) { 1443 case I915_CACHE_L3_LLC: 1444 pte |= GEN7_PTE_CACHE_L3_LLC; 1445 break; 1446 case I915_CACHE_LLC: 1447 pte |= GEN6_PTE_CACHE_LLC; 1448 break; 1449 case I915_CACHE_NONE: 1450 pte |= GEN6_PTE_UNCACHED; 1451 break; 1452 default: 1453 MISSING_CASE(pat_index); 1454 } 1455 1456 return pte; 1457 } 1458 1459 static u64 byt_pte_encode(dma_addr_t addr, 1460 unsigned int pat_index, 1461 u32 flags) 1462 { 1463 gen6_pte_t pte = GEN6_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID; 1464 1465 if (!(flags & PTE_READ_ONLY)) 1466 pte |= BYT_PTE_WRITEABLE; 1467 1468 if (pat_index != I915_CACHE_NONE) 1469 pte |= BYT_PTE_SNOOPED_BY_CPU_CACHES; 1470 1471 return pte; 1472 } 1473 1474 static u64 hsw_pte_encode(dma_addr_t addr, 1475 unsigned int pat_index, 1476 u32 flags) 1477 { 1478 gen6_pte_t pte = HSW_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID; 1479 1480 if (pat_index != I915_CACHE_NONE) 1481 pte |= HSW_WB_LLC_AGE3; 1482 1483 return pte; 1484 } 1485 1486 static u64 iris_pte_encode(dma_addr_t addr, 1487 unsigned int pat_index, 1488 u32 flags) 1489 { 1490 gen6_pte_t pte = HSW_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID; 1491 1492 switch (pat_index) { 1493 case I915_CACHE_NONE: 1494 break; 1495 case I915_CACHE_WT: 1496 pte |= HSW_WT_ELLC_LLC_AGE3; 1497 break; 1498 default: 1499 pte |= HSW_WB_ELLC_LLC_AGE3; 1500 break; 1501 } 1502 1503 return pte; 1504 } 1505 1506 static int gen6_gmch_probe(struct i915_ggtt *ggtt) 1507 { 1508 struct drm_i915_private *i915 = ggtt->vm.i915; 1509 struct pci_dev *pdev = i915->drm.pdev; 1510 unsigned int size; 1511 u16 snb_gmch_ctl; 1512 1513 #ifdef __linux__ 1514 if (!i915_pci_resource_valid(pdev, GEN4_GMADR_BAR)) 1515 return -ENXIO; 1516 1517 ggtt->gmadr = pci_resource(pdev, GEN4_GMADR_BAR); 1518 ggtt->mappable_end = resource_size(&ggtt->gmadr); 1519 #else 1520 bus_addr_t base; 1521 bus_size_t sz; 1522 pcireg_t type; 1523 int err; 1524 1525 type = pci_mapreg_type(i915->pc, i915->tag, 0x18); 1526 err = -pci_mapreg_info(i915->pc, i915->tag, 0x18, type, 1527 &base, &sz, NULL); 1528 if (err) 1529 return err; 1530 ggtt->gmadr.start = base; 1531 ggtt->mappable_end = sz; 1532 #endif 1533 1534 /* 1535 * 64/512MB is the current min/max we actually know of, but this is 1536 * just a coarse sanity check. 1537 */ 1538 if (ggtt->mappable_end < (64 << 20) || 1539 ggtt->mappable_end > (512 << 20)) { 1540 drm_err(&i915->drm, "Unknown GMADR size (%pa)\n", 1541 &ggtt->mappable_end); 1542 return -ENXIO; 1543 } 1544 1545 pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl); 1546 1547 size = gen6_get_total_gtt_size(snb_gmch_ctl); 1548 ggtt->vm.total = (size / sizeof(gen6_pte_t)) * I915_GTT_PAGE_SIZE; 1549 1550 ggtt->vm.alloc_pt_dma = alloc_pt_dma; 1551 ggtt->vm.alloc_scratch_dma = alloc_pt_dma; 1552 1553 ggtt->vm.clear_range = nop_clear_range; 1554 if (!HAS_FULL_PPGTT(i915)) 1555 ggtt->vm.clear_range = gen6_ggtt_clear_range; 1556 ggtt->vm.scratch_range = gen6_ggtt_clear_range; 1557 ggtt->vm.insert_page = gen6_ggtt_insert_page; 1558 ggtt->vm.insert_entries = gen6_ggtt_insert_entries; 1559 ggtt->vm.cleanup = gen6_gmch_remove; 1560 1561 ggtt->invalidate = gen6_ggtt_invalidate; 1562 1563 if (HAS_EDRAM(i915)) 1564 ggtt->vm.pte_encode = iris_pte_encode; 1565 else if (IS_HASWELL(i915)) 1566 ggtt->vm.pte_encode = hsw_pte_encode; 1567 else if (IS_VALLEYVIEW(i915)) 1568 ggtt->vm.pte_encode = byt_pte_encode; 1569 else if (GRAPHICS_VER(i915) >= 7) 1570 ggtt->vm.pte_encode = ivb_pte_encode; 1571 else 1572 ggtt->vm.pte_encode = snb_pte_encode; 1573 1574 ggtt->vm.vma_ops.bind_vma = intel_ggtt_bind_vma; 1575 ggtt->vm.vma_ops.unbind_vma = intel_ggtt_unbind_vma; 1576 1577 return ggtt_probe_common(ggtt, size); 1578 } 1579 1580 static int ggtt_probe_hw(struct i915_ggtt *ggtt, struct intel_gt *gt) 1581 { 1582 struct drm_i915_private *i915 = gt->i915; 1583 int ret; 1584 1585 ggtt->vm.gt = gt; 1586 ggtt->vm.i915 = i915; 1587 #ifdef notyet 1588 ggtt->vm.dma = i915->drm.dev; 1589 #endif 1590 dma_resv_init(&ggtt->vm._resv); 1591 1592 if (GRAPHICS_VER(i915) >= 8) 1593 ret = gen8_gmch_probe(ggtt); 1594 else if (GRAPHICS_VER(i915) >= 6) 1595 ret = gen6_gmch_probe(ggtt); 1596 else 1597 ret = intel_ggtt_gmch_probe(ggtt); 1598 1599 if (ret) { 1600 dma_resv_fini(&ggtt->vm._resv); 1601 return ret; 1602 } 1603 1604 if ((ggtt->vm.total - 1) >> 32) { 1605 drm_err(&i915->drm, 1606 "We never expected a Global GTT with more than 32bits" 1607 " of address space! Found %lldM!\n", 1608 ggtt->vm.total >> 20); 1609 ggtt->vm.total = 1ULL << 32; 1610 ggtt->mappable_end = 1611 min_t(u64, ggtt->mappable_end, ggtt->vm.total); 1612 } 1613 1614 if (ggtt->mappable_end > ggtt->vm.total) { 1615 drm_err(&i915->drm, 1616 "mappable aperture extends past end of GGTT," 1617 " aperture=%pa, total=%llx\n", 1618 &ggtt->mappable_end, ggtt->vm.total); 1619 ggtt->mappable_end = ggtt->vm.total; 1620 } 1621 1622 /* GMADR is the PCI mmio aperture into the global GTT. */ 1623 drm_dbg(&i915->drm, "GGTT size = %lluM\n", ggtt->vm.total >> 20); 1624 drm_dbg(&i915->drm, "GMADR size = %lluM\n", 1625 (u64)ggtt->mappable_end >> 20); 1626 drm_dbg(&i915->drm, "DSM size = %lluM\n", 1627 (u64)resource_size(&intel_graphics_stolen_res) >> 20); 1628 1629 return 0; 1630 } 1631 1632 /** 1633 * i915_ggtt_probe_hw - Probe GGTT hardware location 1634 * @i915: i915 device 1635 */ 1636 int i915_ggtt_probe_hw(struct drm_i915_private *i915) 1637 { 1638 struct intel_gt *gt; 1639 int ret, i; 1640 1641 for_each_gt(gt, i915, i) { 1642 ret = intel_gt_assign_ggtt(gt); 1643 if (ret) 1644 return ret; 1645 } 1646 1647 ret = ggtt_probe_hw(to_gt(i915)->ggtt, to_gt(i915)); 1648 if (ret) 1649 return ret; 1650 1651 if (i915_vtd_active(i915)) 1652 drm_info(&i915->drm, "VT-d active for gfx access\n"); 1653 1654 return 0; 1655 } 1656 1657 struct i915_ggtt *i915_ggtt_create(struct drm_i915_private *i915) 1658 { 1659 struct i915_ggtt *ggtt; 1660 1661 ggtt = drmm_kzalloc(&i915->drm, sizeof(*ggtt), GFP_KERNEL); 1662 if (!ggtt) 1663 return ERR_PTR(-ENOMEM); 1664 1665 INIT_LIST_HEAD(&ggtt->gt_list); 1666 1667 return ggtt; 1668 } 1669 1670 int i915_ggtt_enable_hw(struct drm_i915_private *i915) 1671 { 1672 if (GRAPHICS_VER(i915) < 6) 1673 return intel_ggtt_gmch_enable_hw(i915); 1674 1675 return 0; 1676 } 1677 1678 /** 1679 * i915_ggtt_resume_vm - Restore the memory mappings for a GGTT or DPT VM 1680 * @vm: The VM to restore the mappings for 1681 * 1682 * Restore the memory mappings for all objects mapped to HW via the GGTT or a 1683 * DPT page table. 1684 * 1685 * Returns %true if restoring the mapping for any object that was in a write 1686 * domain before suspend. 1687 */ 1688 bool i915_ggtt_resume_vm(struct i915_address_space *vm) 1689 { 1690 struct i915_vma *vma; 1691 bool write_domain_objs = false; 1692 1693 drm_WARN_ON(&vm->i915->drm, !vm->is_ggtt && !vm->is_dpt); 1694 1695 /* First fill our portion of the GTT with scratch pages */ 1696 vm->clear_range(vm, 0, vm->total); 1697 1698 /* clflush objects bound into the GGTT and rebind them. */ 1699 list_for_each_entry(vma, &vm->bound_list, vm_link) { 1700 struct drm_i915_gem_object *obj = vma->obj; 1701 unsigned int was_bound = 1702 atomic_read(&vma->flags) & I915_VMA_BIND_MASK; 1703 1704 GEM_BUG_ON(!was_bound); 1705 1706 /* 1707 * Clear the bound flags of the vma resource to allow 1708 * ptes to be repopulated. 1709 */ 1710 vma->resource->bound_flags = 0; 1711 vma->ops->bind_vma(vm, NULL, vma->resource, 1712 obj ? obj->pat_index : 1713 i915_gem_get_pat_index(vm->i915, 1714 I915_CACHE_NONE), 1715 was_bound); 1716 1717 if (obj) { /* only used during resume => exclusive access */ 1718 write_domain_objs |= fetch_and_zero(&obj->write_domain); 1719 obj->read_domains |= I915_GEM_DOMAIN_GTT; 1720 } 1721 } 1722 1723 return write_domain_objs; 1724 } 1725 1726 void i915_ggtt_resume(struct i915_ggtt *ggtt) 1727 { 1728 struct intel_gt *gt; 1729 bool flush; 1730 1731 list_for_each_entry(gt, &ggtt->gt_list, ggtt_link) 1732 intel_gt_check_and_clear_faults(gt); 1733 1734 flush = i915_ggtt_resume_vm(&ggtt->vm); 1735 1736 if (drm_mm_node_allocated(&ggtt->error_capture)) 1737 ggtt->vm.scratch_range(&ggtt->vm, ggtt->error_capture.start, 1738 ggtt->error_capture.size); 1739 1740 list_for_each_entry(gt, &ggtt->gt_list, ggtt_link) 1741 intel_uc_resume_mappings(>->uc); 1742 1743 ggtt->invalidate(ggtt); 1744 1745 if (flush) 1746 wbinvd_on_all_cpus(); 1747 1748 intel_ggtt_restore_fences(ggtt); 1749 } 1750