1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2019 Intel Corporation 4 */ 5 6 #include <drm/drm_managed.h> 7 #include <drm/intel-gtt.h> 8 9 #include "gem/i915_gem_internal.h" 10 #include "gem/i915_gem_lmem.h" 11 #include "pxp/intel_pxp.h" 12 13 #include "i915_drv.h" 14 #include "i915_perf_oa_regs.h" 15 #include "intel_context.h" 16 #include "intel_engine_pm.h" 17 #include "intel_engine_regs.h" 18 #include "intel_ggtt_gmch.h" 19 #include "intel_gt.h" 20 #include "intel_gt_buffer_pool.h" 21 #include "intel_gt_clock_utils.h" 22 #include "intel_gt_debugfs.h" 23 #include "intel_gt_mcr.h" 24 #include "intel_gt_pm.h" 25 #include "intel_gt_regs.h" 26 #include "intel_gt_requests.h" 27 #include "intel_migrate.h" 28 #include "intel_mocs.h" 29 #include "intel_pci_config.h" 30 #include "intel_pm.h" 31 #include "intel_rc6.h" 32 #include "intel_renderstate.h" 33 #include "intel_rps.h" 34 #include "intel_sa_media.h" 35 #include "intel_gt_sysfs.h" 36 #include "intel_uncore.h" 37 #include "shmem_utils.h" 38 39 void intel_gt_common_init_early(struct intel_gt *gt) 40 { 41 mtx_init(gt->irq_lock, IPL_TTY); 42 43 INIT_LIST_HEAD(>->closed_vma); 44 mtx_init(>->closed_lock, IPL_TTY); 45 46 init_llist_head(>->watchdog.list); 47 INIT_WORK(>->watchdog.work, intel_gt_watchdog_work); 48 49 intel_gt_init_buffer_pool(gt); 50 intel_gt_init_reset(gt); 51 intel_gt_init_requests(gt); 52 intel_gt_init_timelines(gt); 53 rw_init(>->tlb.invalidate_lock, "itlbinv"); 54 seqcount_mutex_init(>->tlb.seqno, >->tlb.invalidate_lock); 55 intel_gt_pm_init_early(gt); 56 57 intel_uc_init_early(>->uc); 58 intel_rps_init_early(>->rps); 59 } 60 61 /* Preliminary initialization of Tile 0 */ 62 int intel_root_gt_init_early(struct drm_i915_private *i915) 63 { 64 struct intel_gt *gt = to_gt(i915); 65 66 gt->i915 = i915; 67 gt->uncore = &i915->uncore; 68 gt->irq_lock = drmm_kzalloc(&i915->drm, sizeof(*gt->irq_lock), GFP_KERNEL); 69 if (!gt->irq_lock) 70 return -ENOMEM; 71 72 intel_gt_common_init_early(gt); 73 74 return 0; 75 } 76 77 static int intel_gt_probe_lmem(struct intel_gt *gt) 78 { 79 struct drm_i915_private *i915 = gt->i915; 80 unsigned int instance = gt->info.id; 81 int id = INTEL_REGION_LMEM_0 + instance; 82 struct intel_memory_region *mem; 83 int err; 84 85 mem = intel_gt_setup_lmem(gt); 86 if (IS_ERR(mem)) { 87 err = PTR_ERR(mem); 88 if (err == -ENODEV) 89 return 0; 90 91 drm_err(&i915->drm, 92 "Failed to setup region(%d) type=%d\n", 93 err, INTEL_MEMORY_LOCAL); 94 return err; 95 } 96 97 mem->id = id; 98 mem->instance = instance; 99 100 intel_memory_region_set_name(mem, "local%u", mem->instance); 101 102 GEM_BUG_ON(!HAS_REGION(i915, id)); 103 GEM_BUG_ON(i915->mm.regions[id]); 104 i915->mm.regions[id] = mem; 105 106 return 0; 107 } 108 109 int intel_gt_assign_ggtt(struct intel_gt *gt) 110 { 111 gt->ggtt = drmm_kzalloc(>->i915->drm, sizeof(*gt->ggtt), GFP_KERNEL); 112 113 return gt->ggtt ? 0 : -ENOMEM; 114 } 115 116 int intel_gt_init_mmio(struct intel_gt *gt) 117 { 118 intel_gt_init_clock_frequency(gt); 119 120 intel_uc_init_mmio(>->uc); 121 intel_sseu_info_init(gt); 122 intel_gt_mcr_init(gt); 123 124 return intel_engines_init_mmio(gt); 125 } 126 127 static void init_unused_ring(struct intel_gt *gt, u32 base) 128 { 129 struct intel_uncore *uncore = gt->uncore; 130 131 intel_uncore_write(uncore, RING_CTL(base), 0); 132 intel_uncore_write(uncore, RING_HEAD(base), 0); 133 intel_uncore_write(uncore, RING_TAIL(base), 0); 134 intel_uncore_write(uncore, RING_START(base), 0); 135 } 136 137 static void init_unused_rings(struct intel_gt *gt) 138 { 139 struct drm_i915_private *i915 = gt->i915; 140 141 if (IS_I830(i915)) { 142 init_unused_ring(gt, PRB1_BASE); 143 init_unused_ring(gt, SRB0_BASE); 144 init_unused_ring(gt, SRB1_BASE); 145 init_unused_ring(gt, SRB2_BASE); 146 init_unused_ring(gt, SRB3_BASE); 147 } else if (GRAPHICS_VER(i915) == 2) { 148 init_unused_ring(gt, SRB0_BASE); 149 init_unused_ring(gt, SRB1_BASE); 150 } else if (GRAPHICS_VER(i915) == 3) { 151 init_unused_ring(gt, PRB1_BASE); 152 init_unused_ring(gt, PRB2_BASE); 153 } 154 } 155 156 int intel_gt_init_hw(struct intel_gt *gt) 157 { 158 struct drm_i915_private *i915 = gt->i915; 159 struct intel_uncore *uncore = gt->uncore; 160 int ret; 161 162 gt->last_init_time = ktime_get(); 163 164 /* Double layer security blanket, see i915_gem_init() */ 165 intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL); 166 167 if (HAS_EDRAM(i915) && GRAPHICS_VER(i915) < 9) 168 intel_uncore_rmw(uncore, HSW_IDICR, 0, IDIHASHMSK(0xf)); 169 170 if (IS_HASWELL(i915)) 171 intel_uncore_write(uncore, 172 HSW_MI_PREDICATE_RESULT_2, 173 IS_HSW_GT3(i915) ? 174 LOWER_SLICE_ENABLED : LOWER_SLICE_DISABLED); 175 176 /* Apply the GT workarounds... */ 177 intel_gt_apply_workarounds(gt); 178 /* ...and determine whether they are sticking. */ 179 intel_gt_verify_workarounds(gt, "init"); 180 181 intel_gt_init_swizzling(gt); 182 183 /* 184 * At least 830 can leave some of the unused rings 185 * "active" (ie. head != tail) after resume which 186 * will prevent c3 entry. Makes sure all unused rings 187 * are totally idle. 188 */ 189 init_unused_rings(gt); 190 191 ret = i915_ppgtt_init_hw(gt); 192 if (ret) { 193 DRM_ERROR("Enabling PPGTT failed (%d)\n", ret); 194 goto out; 195 } 196 197 /* We can't enable contexts until all firmware is loaded */ 198 ret = intel_uc_init_hw(>->uc); 199 if (ret) { 200 i915_probe_error(i915, "Enabling uc failed (%d)\n", ret); 201 goto out; 202 } 203 204 intel_mocs_init(gt); 205 206 out: 207 intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL); 208 return ret; 209 } 210 211 static void rmw_set(struct intel_uncore *uncore, i915_reg_t reg, u32 set) 212 { 213 intel_uncore_rmw(uncore, reg, 0, set); 214 } 215 216 static void rmw_clear(struct intel_uncore *uncore, i915_reg_t reg, u32 clr) 217 { 218 intel_uncore_rmw(uncore, reg, clr, 0); 219 } 220 221 static void clear_register(struct intel_uncore *uncore, i915_reg_t reg) 222 { 223 intel_uncore_rmw(uncore, reg, 0, 0); 224 } 225 226 static void gen6_clear_engine_error_register(struct intel_engine_cs *engine) 227 { 228 GEN6_RING_FAULT_REG_RMW(engine, RING_FAULT_VALID, 0); 229 GEN6_RING_FAULT_REG_POSTING_READ(engine); 230 } 231 232 void 233 intel_gt_clear_error_registers(struct intel_gt *gt, 234 intel_engine_mask_t engine_mask) 235 { 236 struct drm_i915_private *i915 = gt->i915; 237 struct intel_uncore *uncore = gt->uncore; 238 u32 eir; 239 240 if (GRAPHICS_VER(i915) != 2) 241 clear_register(uncore, PGTBL_ER); 242 243 if (GRAPHICS_VER(i915) < 4) 244 clear_register(uncore, IPEIR(RENDER_RING_BASE)); 245 else 246 clear_register(uncore, IPEIR_I965); 247 248 clear_register(uncore, EIR); 249 eir = intel_uncore_read(uncore, EIR); 250 if (eir) { 251 /* 252 * some errors might have become stuck, 253 * mask them. 254 */ 255 DRM_DEBUG_DRIVER("EIR stuck: 0x%08x, masking\n", eir); 256 rmw_set(uncore, EMR, eir); 257 intel_uncore_write(uncore, GEN2_IIR, 258 I915_MASTER_ERROR_INTERRUPT); 259 } 260 261 if (GRAPHICS_VER(i915) >= 12) { 262 rmw_clear(uncore, GEN12_RING_FAULT_REG, RING_FAULT_VALID); 263 intel_uncore_posting_read(uncore, GEN12_RING_FAULT_REG); 264 } else if (GRAPHICS_VER(i915) >= 8) { 265 rmw_clear(uncore, GEN8_RING_FAULT_REG, RING_FAULT_VALID); 266 intel_uncore_posting_read(uncore, GEN8_RING_FAULT_REG); 267 } else if (GRAPHICS_VER(i915) >= 6) { 268 struct intel_engine_cs *engine; 269 enum intel_engine_id id; 270 271 for_each_engine_masked(engine, gt, engine_mask, id) 272 gen6_clear_engine_error_register(engine); 273 } 274 } 275 276 static void gen6_check_faults(struct intel_gt *gt) 277 { 278 struct intel_engine_cs *engine; 279 enum intel_engine_id id; 280 u32 fault; 281 282 for_each_engine(engine, gt, id) { 283 fault = GEN6_RING_FAULT_REG_READ(engine); 284 if (fault & RING_FAULT_VALID) { 285 drm_dbg(&engine->i915->drm, "Unexpected fault\n" 286 "\tAddr: 0x%08lx\n" 287 "\tAddress space: %s\n" 288 "\tSource ID: %d\n" 289 "\tType: %d\n", 290 (unsigned long)(fault & LINUX_PAGE_MASK), 291 fault & RING_FAULT_GTTSEL_MASK ? 292 "GGTT" : "PPGTT", 293 RING_FAULT_SRCID(fault), 294 RING_FAULT_FAULT_TYPE(fault)); 295 } 296 } 297 } 298 299 static void gen8_check_faults(struct intel_gt *gt) 300 { 301 struct intel_uncore *uncore = gt->uncore; 302 i915_reg_t fault_reg, fault_data0_reg, fault_data1_reg; 303 u32 fault; 304 305 if (GRAPHICS_VER(gt->i915) >= 12) { 306 fault_reg = GEN12_RING_FAULT_REG; 307 fault_data0_reg = GEN12_FAULT_TLB_DATA0; 308 fault_data1_reg = GEN12_FAULT_TLB_DATA1; 309 } else { 310 fault_reg = GEN8_RING_FAULT_REG; 311 fault_data0_reg = GEN8_FAULT_TLB_DATA0; 312 fault_data1_reg = GEN8_FAULT_TLB_DATA1; 313 } 314 315 fault = intel_uncore_read(uncore, fault_reg); 316 if (fault & RING_FAULT_VALID) { 317 u32 fault_data0, fault_data1; 318 u64 fault_addr; 319 320 fault_data0 = intel_uncore_read(uncore, fault_data0_reg); 321 fault_data1 = intel_uncore_read(uncore, fault_data1_reg); 322 323 fault_addr = ((u64)(fault_data1 & FAULT_VA_HIGH_BITS) << 44) | 324 ((u64)fault_data0 << 12); 325 326 drm_dbg(&uncore->i915->drm, "Unexpected fault\n" 327 "\tAddr: 0x%08x_%08x\n" 328 "\tAddress space: %s\n" 329 "\tEngine ID: %d\n" 330 "\tSource ID: %d\n" 331 "\tType: %d\n", 332 upper_32_bits(fault_addr), lower_32_bits(fault_addr), 333 fault_data1 & FAULT_GTT_SEL ? "GGTT" : "PPGTT", 334 GEN8_RING_FAULT_ENGINE_ID(fault), 335 RING_FAULT_SRCID(fault), 336 RING_FAULT_FAULT_TYPE(fault)); 337 } 338 } 339 340 void intel_gt_check_and_clear_faults(struct intel_gt *gt) 341 { 342 struct drm_i915_private *i915 = gt->i915; 343 344 /* From GEN8 onwards we only have one 'All Engine Fault Register' */ 345 if (GRAPHICS_VER(i915) >= 8) 346 gen8_check_faults(gt); 347 else if (GRAPHICS_VER(i915) >= 6) 348 gen6_check_faults(gt); 349 else 350 return; 351 352 intel_gt_clear_error_registers(gt, ALL_ENGINES); 353 } 354 355 void intel_gt_flush_ggtt_writes(struct intel_gt *gt) 356 { 357 struct intel_uncore *uncore = gt->uncore; 358 intel_wakeref_t wakeref; 359 360 /* 361 * No actual flushing is required for the GTT write domain for reads 362 * from the GTT domain. Writes to it "immediately" go to main memory 363 * as far as we know, so there's no chipset flush. It also doesn't 364 * land in the GPU render cache. 365 * 366 * However, we do have to enforce the order so that all writes through 367 * the GTT land before any writes to the device, such as updates to 368 * the GATT itself. 369 * 370 * We also have to wait a bit for the writes to land from the GTT. 371 * An uncached read (i.e. mmio) seems to be ideal for the round-trip 372 * timing. This issue has only been observed when switching quickly 373 * between GTT writes and CPU reads from inside the kernel on recent hw, 374 * and it appears to only affect discrete GTT blocks (i.e. on LLC 375 * system agents we cannot reproduce this behaviour, until Cannonlake 376 * that was!). 377 */ 378 379 wmb(); 380 381 if (INTEL_INFO(gt->i915)->has_coherent_ggtt) 382 return; 383 384 intel_gt_chipset_flush(gt); 385 386 with_intel_runtime_pm_if_in_use(uncore->rpm, wakeref) { 387 unsigned long flags; 388 389 spin_lock_irqsave(&uncore->lock, flags); 390 intel_uncore_posting_read_fw(uncore, 391 RING_HEAD(RENDER_RING_BASE)); 392 spin_unlock_irqrestore(&uncore->lock, flags); 393 } 394 } 395 396 void intel_gt_chipset_flush(struct intel_gt *gt) 397 { 398 wmb(); 399 if (GRAPHICS_VER(gt->i915) < 6) 400 intel_ggtt_gmch_flush(); 401 } 402 403 void intel_gt_driver_register(struct intel_gt *gt) 404 { 405 intel_gsc_init(>->gsc, gt->i915); 406 407 intel_rps_driver_register(>->rps); 408 409 intel_gt_debugfs_register(gt); 410 intel_gt_sysfs_register(gt); 411 } 412 413 static int intel_gt_init_scratch(struct intel_gt *gt, unsigned int size) 414 { 415 struct drm_i915_private *i915 = gt->i915; 416 struct drm_i915_gem_object *obj; 417 struct i915_vma *vma; 418 int ret; 419 420 obj = i915_gem_object_create_lmem(i915, size, 421 I915_BO_ALLOC_VOLATILE | 422 I915_BO_ALLOC_GPU_ONLY); 423 if (IS_ERR(obj)) 424 obj = i915_gem_object_create_stolen(i915, size); 425 if (IS_ERR(obj)) 426 obj = i915_gem_object_create_internal(i915, size); 427 if (IS_ERR(obj)) { 428 drm_err(&i915->drm, "Failed to allocate scratch page\n"); 429 return PTR_ERR(obj); 430 } 431 432 vma = i915_vma_instance(obj, >->ggtt->vm, NULL); 433 if (IS_ERR(vma)) { 434 ret = PTR_ERR(vma); 435 goto err_unref; 436 } 437 438 ret = i915_ggtt_pin(vma, NULL, 0, PIN_HIGH); 439 if (ret) 440 goto err_unref; 441 442 gt->scratch = i915_vma_make_unshrinkable(vma); 443 444 return 0; 445 446 err_unref: 447 i915_gem_object_put(obj); 448 return ret; 449 } 450 451 static void intel_gt_fini_scratch(struct intel_gt *gt) 452 { 453 i915_vma_unpin_and_release(>->scratch, 0); 454 } 455 456 static struct i915_address_space *kernel_vm(struct intel_gt *gt) 457 { 458 if (INTEL_PPGTT(gt->i915) > INTEL_PPGTT_ALIASING) 459 return &i915_ppgtt_create(gt, I915_BO_ALLOC_PM_EARLY)->vm; 460 else 461 return i915_vm_get(>->ggtt->vm); 462 } 463 464 static int __engines_record_defaults(struct intel_gt *gt) 465 { 466 struct i915_request *requests[I915_NUM_ENGINES] = {}; 467 struct intel_engine_cs *engine; 468 enum intel_engine_id id; 469 int err = 0; 470 471 /* 472 * As we reset the gpu during very early sanitisation, the current 473 * register state on the GPU should reflect its defaults values. 474 * We load a context onto the hw (with restore-inhibit), then switch 475 * over to a second context to save that default register state. We 476 * can then prime every new context with that state so they all start 477 * from the same default HW values. 478 */ 479 480 for_each_engine(engine, gt, id) { 481 struct intel_renderstate so; 482 struct intel_context *ce; 483 struct i915_request *rq; 484 485 /* We must be able to switch to something! */ 486 GEM_BUG_ON(!engine->kernel_context); 487 488 ce = intel_context_create(engine); 489 if (IS_ERR(ce)) { 490 err = PTR_ERR(ce); 491 goto out; 492 } 493 494 err = intel_renderstate_init(&so, ce); 495 if (err) 496 goto err; 497 498 rq = i915_request_create(ce); 499 if (IS_ERR(rq)) { 500 err = PTR_ERR(rq); 501 goto err_fini; 502 } 503 504 err = intel_engine_emit_ctx_wa(rq); 505 if (err) 506 goto err_rq; 507 508 err = intel_renderstate_emit(&so, rq); 509 if (err) 510 goto err_rq; 511 512 err_rq: 513 requests[id] = i915_request_get(rq); 514 i915_request_add(rq); 515 err_fini: 516 intel_renderstate_fini(&so, ce); 517 err: 518 if (err) { 519 intel_context_put(ce); 520 goto out; 521 } 522 } 523 524 /* Flush the default context image to memory, and enable powersaving. */ 525 if (intel_gt_wait_for_idle(gt, I915_GEM_IDLE_TIMEOUT) == -ETIME) { 526 err = -EIO; 527 goto out; 528 } 529 530 for (id = 0; id < ARRAY_SIZE(requests); id++) { 531 struct i915_request *rq; 532 struct uvm_object *state; 533 534 rq = requests[id]; 535 if (!rq) 536 continue; 537 538 if (rq->fence.error) { 539 err = -EIO; 540 goto out; 541 } 542 543 GEM_BUG_ON(!test_bit(CONTEXT_ALLOC_BIT, &rq->context->flags)); 544 if (!rq->context->state) 545 continue; 546 547 /* Keep a copy of the state's backing pages; free the obj */ 548 #ifdef __linux__ 549 state = shmem_create_from_object(rq->context->state->obj); 550 #else 551 state = uao_create_from_object(rq->context->state->obj); 552 #endif 553 if (IS_ERR(state)) { 554 err = PTR_ERR(state); 555 goto out; 556 } 557 rq->engine->default_state = state; 558 } 559 560 out: 561 /* 562 * If we have to abandon now, we expect the engines to be idle 563 * and ready to be torn-down. The quickest way we can accomplish 564 * this is by declaring ourselves wedged. 565 */ 566 if (err) 567 intel_gt_set_wedged(gt); 568 569 for (id = 0; id < ARRAY_SIZE(requests); id++) { 570 struct intel_context *ce; 571 struct i915_request *rq; 572 573 rq = requests[id]; 574 if (!rq) 575 continue; 576 577 ce = rq->context; 578 i915_request_put(rq); 579 intel_context_put(ce); 580 } 581 return err; 582 } 583 584 static int __engines_verify_workarounds(struct intel_gt *gt) 585 { 586 struct intel_engine_cs *engine; 587 enum intel_engine_id id; 588 int err = 0; 589 590 if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) 591 return 0; 592 593 for_each_engine(engine, gt, id) { 594 if (intel_engine_verify_workarounds(engine, "load")) 595 err = -EIO; 596 } 597 598 /* Flush and restore the kernel context for safety */ 599 if (intel_gt_wait_for_idle(gt, I915_GEM_IDLE_TIMEOUT) == -ETIME) 600 err = -EIO; 601 602 return err; 603 } 604 605 static void __intel_gt_disable(struct intel_gt *gt) 606 { 607 intel_gt_set_wedged_on_fini(gt); 608 609 intel_gt_suspend_prepare(gt); 610 intel_gt_suspend_late(gt); 611 612 GEM_BUG_ON(intel_gt_pm_is_awake(gt)); 613 } 614 615 int intel_gt_wait_for_idle(struct intel_gt *gt, long timeout) 616 { 617 long remaining_timeout; 618 619 /* If the device is asleep, we have no requests outstanding */ 620 if (!intel_gt_pm_is_awake(gt)) 621 return 0; 622 623 while ((timeout = intel_gt_retire_requests_timeout(gt, timeout, 624 &remaining_timeout)) > 0) { 625 cond_resched(); 626 if (signal_pending(current)) 627 return -EINTR; 628 } 629 630 if (timeout) 631 return timeout; 632 633 if (remaining_timeout < 0) 634 remaining_timeout = 0; 635 636 return intel_uc_wait_for_idle(>->uc, remaining_timeout); 637 } 638 639 int intel_gt_init(struct intel_gt *gt) 640 { 641 int err; 642 643 err = i915_inject_probe_error(gt->i915, -ENODEV); 644 if (err) 645 return err; 646 647 intel_gt_init_workarounds(gt); 648 649 /* 650 * This is just a security blanket to placate dragons. 651 * On some systems, we very sporadically observe that the first TLBs 652 * used by the CS may be stale, despite us poking the TLB reset. If 653 * we hold the forcewake during initialisation these problems 654 * just magically go away. 655 */ 656 intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL); 657 658 err = intel_gt_init_scratch(gt, 659 GRAPHICS_VER(gt->i915) == 2 ? SZ_256K : SZ_4K); 660 if (err) 661 goto out_fw; 662 663 intel_gt_pm_init(gt); 664 665 gt->vm = kernel_vm(gt); 666 if (!gt->vm) { 667 err = -ENOMEM; 668 goto err_pm; 669 } 670 671 intel_set_mocs_index(gt); 672 673 err = intel_engines_init(gt); 674 if (err) 675 goto err_engines; 676 677 err = intel_uc_init(>->uc); 678 if (err) 679 goto err_engines; 680 681 err = intel_gt_resume(gt); 682 if (err) 683 goto err_uc_init; 684 685 err = intel_gt_init_hwconfig(gt); 686 if (err) 687 drm_err(>->i915->drm, "Failed to retrieve hwconfig table: %pe\n", 688 ERR_PTR(err)); 689 690 err = __engines_record_defaults(gt); 691 if (err) 692 goto err_gt; 693 694 err = __engines_verify_workarounds(gt); 695 if (err) 696 goto err_gt; 697 698 err = i915_inject_probe_error(gt->i915, -EIO); 699 if (err) 700 goto err_gt; 701 702 intel_uc_init_late(>->uc); 703 704 intel_migrate_init(>->migrate, gt); 705 706 intel_pxp_init(>->pxp); 707 708 goto out_fw; 709 err_gt: 710 __intel_gt_disable(gt); 711 intel_uc_fini_hw(>->uc); 712 err_uc_init: 713 intel_uc_fini(>->uc); 714 err_engines: 715 intel_engines_release(gt); 716 i915_vm_put(fetch_and_zero(>->vm)); 717 err_pm: 718 intel_gt_pm_fini(gt); 719 intel_gt_fini_scratch(gt); 720 out_fw: 721 if (err) 722 intel_gt_set_wedged_on_init(gt); 723 intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL); 724 return err; 725 } 726 727 void intel_gt_driver_remove(struct intel_gt *gt) 728 { 729 __intel_gt_disable(gt); 730 731 intel_migrate_fini(>->migrate); 732 intel_uc_driver_remove(>->uc); 733 734 intel_engines_release(gt); 735 736 intel_gt_flush_buffer_pool(gt); 737 } 738 739 void intel_gt_driver_unregister(struct intel_gt *gt) 740 { 741 intel_wakeref_t wakeref; 742 743 intel_gt_sysfs_unregister(gt); 744 intel_rps_driver_unregister(>->rps); 745 intel_gsc_fini(>->gsc); 746 747 intel_pxp_fini(>->pxp); 748 749 /* 750 * Upon unregistering the device to prevent any new users, cancel 751 * all in-flight requests so that we can quickly unbind the active 752 * resources. 753 */ 754 intel_gt_set_wedged_on_fini(gt); 755 756 /* Scrub all HW state upon release */ 757 with_intel_runtime_pm(gt->uncore->rpm, wakeref) 758 __intel_gt_reset(gt, ALL_ENGINES); 759 } 760 761 void intel_gt_driver_release(struct intel_gt *gt) 762 { 763 struct i915_address_space *vm; 764 765 vm = fetch_and_zero(>->vm); 766 if (vm) /* FIXME being called twice on error paths :( */ 767 i915_vm_put(vm); 768 769 intel_wa_list_free(>->wa_list); 770 intel_gt_pm_fini(gt); 771 intel_gt_fini_scratch(gt); 772 intel_gt_fini_buffer_pool(gt); 773 intel_gt_fini_hwconfig(gt); 774 } 775 776 void intel_gt_driver_late_release_all(struct drm_i915_private *i915) 777 { 778 struct intel_gt *gt; 779 unsigned int id; 780 781 /* We need to wait for inflight RCU frees to release their grip */ 782 rcu_barrier(); 783 784 for_each_gt(gt, i915, id) { 785 intel_uc_driver_late_release(>->uc); 786 intel_gt_fini_requests(gt); 787 intel_gt_fini_reset(gt); 788 intel_gt_fini_timelines(gt); 789 mutex_destroy(>->tlb.invalidate_lock); 790 intel_engines_free(gt); 791 } 792 } 793 794 static int intel_gt_tile_setup(struct intel_gt *gt, phys_addr_t phys_addr) 795 { 796 int ret; 797 798 if (!gt_is_root(gt)) { 799 struct intel_uncore *uncore; 800 spinlock_t *irq_lock; 801 802 uncore = drmm_kzalloc(>->i915->drm, sizeof(*uncore), GFP_KERNEL); 803 if (!uncore) 804 return -ENOMEM; 805 806 irq_lock = drmm_kzalloc(>->i915->drm, sizeof(*irq_lock), GFP_KERNEL); 807 if (!irq_lock) 808 return -ENOMEM; 809 810 gt->uncore = uncore; 811 gt->irq_lock = irq_lock; 812 813 intel_gt_common_init_early(gt); 814 } 815 816 intel_uncore_init_early(gt->uncore, gt); 817 818 ret = intel_uncore_setup_mmio(gt->uncore, phys_addr); 819 if (ret) 820 return ret; 821 822 gt->phys_addr = phys_addr; 823 824 return 0; 825 } 826 827 #ifdef __linux__ 828 829 int intel_gt_probe_all(struct drm_i915_private *i915) 830 { 831 struct pci_dev *pdev = i915->drm.pdev; 832 struct intel_gt *gt = &i915->gt0; 833 const struct intel_gt_definition *gtdef; 834 phys_addr_t phys_addr; 835 unsigned int mmio_bar; 836 unsigned int i; 837 int ret; 838 839 mmio_bar = GRAPHICS_VER(i915) == 2 ? GEN2_GTTMMADR_BAR : GTTMMADR_BAR; 840 phys_addr = pci_resource_start(pdev, mmio_bar); 841 842 /* 843 * We always have at least one primary GT on any device 844 * and it has been already initialized early during probe 845 * in i915_driver_probe() 846 */ 847 gt->i915 = i915; 848 gt->name = "Primary GT"; 849 gt->info.engine_mask = RUNTIME_INFO(i915)->platform_engine_mask; 850 851 drm_dbg(&i915->drm, "Setting up %s\n", gt->name); 852 ret = intel_gt_tile_setup(gt, phys_addr); 853 if (ret) 854 return ret; 855 856 i915->gt[0] = gt; 857 858 if (!HAS_EXTRA_GT_LIST(i915)) 859 return 0; 860 861 for (i = 1, gtdef = &INTEL_INFO(i915)->extra_gt_list[i - 1]; 862 gtdef->name != NULL; 863 i++, gtdef = &INTEL_INFO(i915)->extra_gt_list[i - 1]) { 864 gt = drmm_kzalloc(&i915->drm, sizeof(*gt), GFP_KERNEL); 865 if (!gt) { 866 ret = -ENOMEM; 867 goto err; 868 } 869 870 gt->i915 = i915; 871 gt->name = gtdef->name; 872 gt->type = gtdef->type; 873 gt->info.engine_mask = gtdef->engine_mask; 874 gt->info.id = i; 875 876 drm_dbg(&i915->drm, "Setting up %s\n", gt->name); 877 if (GEM_WARN_ON(range_overflows_t(resource_size_t, 878 gtdef->mapping_base, 879 SZ_16M, 880 pci_resource_len(pdev, mmio_bar)))) { 881 ret = -ENODEV; 882 goto err; 883 } 884 885 switch (gtdef->type) { 886 case GT_TILE: 887 ret = intel_gt_tile_setup(gt, phys_addr + gtdef->mapping_base); 888 break; 889 890 case GT_MEDIA: 891 ret = intel_sa_mediagt_setup(gt, phys_addr + gtdef->mapping_base, 892 gtdef->gsi_offset); 893 break; 894 895 case GT_PRIMARY: 896 /* Primary GT should not appear in extra GT list */ 897 default: 898 MISSING_CASE(gtdef->type); 899 ret = -ENODEV; 900 } 901 902 if (ret) 903 goto err; 904 905 i915->gt[i] = gt; 906 } 907 908 return 0; 909 910 err: 911 i915_probe_error(i915, "Failed to initialize %s! (%d)\n", gtdef->name, ret); 912 intel_gt_release_all(i915); 913 914 return ret; 915 } 916 917 #else 918 919 int intel_gt_probe_all(struct drm_i915_private *i915) 920 { 921 struct pci_dev *pdev = i915->drm.pdev; 922 struct intel_gt *gt = &i915->gt0; 923 const struct intel_gt_definition *gtdef; 924 phys_addr_t phys_addr; 925 bus_size_t len; 926 pcireg_t type; 927 int flags; 928 unsigned int mmio_bar; 929 unsigned int i; 930 int ret; 931 932 mmio_bar = GRAPHICS_VER(i915) == 2 ? GEN2_GTTMMADR_BAR : GTTMMADR_BAR; 933 type = pci_mapreg_type(i915->pc, i915->tag, 0x10 + (mmio_bar * 4)); 934 ret = -pci_mapreg_info(i915->pc, i915->tag, 0x10 + (mmio_bar * 4), type, 935 &phys_addr, &len, NULL); 936 if (ret) 937 return ret; 938 939 /* 940 * We always have at least one primary GT on any device 941 * and it has been already initialized early during probe 942 * in i915_driver_probe() 943 */ 944 gt->i915 = i915; 945 gt->name = "Primary GT"; 946 gt->info.engine_mask = RUNTIME_INFO(i915)->platform_engine_mask; 947 948 drm_dbg(&i915->drm, "Setting up %s\n", gt->name); 949 ret = intel_gt_tile_setup(gt, phys_addr); 950 if (ret) 951 return ret; 952 953 i915->gt[0] = gt; 954 955 if (!HAS_EXTRA_GT_LIST(i915)) 956 return 0; 957 958 for (i = 1, gtdef = &INTEL_INFO(i915)->extra_gt_list[i - 1]; 959 gtdef->name != NULL; 960 i++, gtdef = &INTEL_INFO(i915)->extra_gt_list[i - 1]) { 961 gt = drmm_kzalloc(&i915->drm, sizeof(*gt), GFP_KERNEL); 962 if (!gt) { 963 ret = -ENOMEM; 964 goto err; 965 } 966 967 gt->i915 = i915; 968 gt->name = gtdef->name; 969 gt->type = gtdef->type; 970 gt->info.engine_mask = gtdef->engine_mask; 971 gt->info.id = i; 972 973 drm_dbg(&i915->drm, "Setting up %s\n", gt->name); 974 if (GEM_WARN_ON(range_overflows_t(resource_size_t, 975 gtdef->mapping_base, 976 SZ_16M, 977 len))) { 978 ret = -ENODEV; 979 goto err; 980 } 981 982 switch (gtdef->type) { 983 case GT_TILE: 984 ret = intel_gt_tile_setup(gt, phys_addr + gtdef->mapping_base); 985 break; 986 987 case GT_MEDIA: 988 ret = intel_sa_mediagt_setup(gt, phys_addr + gtdef->mapping_base, 989 gtdef->gsi_offset); 990 break; 991 992 case GT_PRIMARY: 993 /* Primary GT should not appear in extra GT list */ 994 default: 995 MISSING_CASE(gtdef->type); 996 ret = -ENODEV; 997 } 998 999 if (ret) 1000 goto err; 1001 1002 i915->gt[i] = gt; 1003 } 1004 1005 return 0; 1006 1007 err: 1008 i915_probe_error(i915, "Failed to initialize %s! (%d)\n", gtdef->name, ret); 1009 intel_gt_release_all(i915); 1010 1011 return ret; 1012 } 1013 1014 #endif 1015 1016 int intel_gt_tiles_init(struct drm_i915_private *i915) 1017 { 1018 struct intel_gt *gt; 1019 unsigned int id; 1020 int ret; 1021 1022 for_each_gt(gt, i915, id) { 1023 ret = intel_gt_probe_lmem(gt); 1024 if (ret) 1025 return ret; 1026 } 1027 1028 return 0; 1029 } 1030 1031 void intel_gt_release_all(struct drm_i915_private *i915) 1032 { 1033 struct intel_gt *gt; 1034 unsigned int id; 1035 1036 for_each_gt(gt, i915, id) 1037 i915->gt[id] = NULL; 1038 } 1039 1040 void intel_gt_info_print(const struct intel_gt_info *info, 1041 struct drm_printer *p) 1042 { 1043 drm_printf(p, "available engines: %x\n", info->engine_mask); 1044 1045 intel_sseu_dump(&info->sseu, p); 1046 } 1047 1048 struct reg_and_bit { 1049 i915_reg_t reg; 1050 u32 bit; 1051 }; 1052 1053 static struct reg_and_bit 1054 get_reg_and_bit(const struct intel_engine_cs *engine, const bool gen8, 1055 const i915_reg_t *regs, const unsigned int num) 1056 { 1057 const unsigned int class = engine->class; 1058 struct reg_and_bit rb = { }; 1059 1060 if (drm_WARN_ON_ONCE(&engine->i915->drm, 1061 class >= num || !regs[class].reg)) 1062 return rb; 1063 1064 rb.reg = regs[class]; 1065 if (gen8 && class == VIDEO_DECODE_CLASS) 1066 rb.reg.reg += 4 * engine->instance; /* GEN8_M2TCR */ 1067 else 1068 rb.bit = engine->instance; 1069 1070 rb.bit = BIT(rb.bit); 1071 1072 return rb; 1073 } 1074 1075 static void mmio_invalidate_full(struct intel_gt *gt) 1076 { 1077 static const i915_reg_t gen8_regs[] = { 1078 [RENDER_CLASS] = GEN8_RTCR, 1079 [VIDEO_DECODE_CLASS] = GEN8_M1TCR, /* , GEN8_M2TCR */ 1080 [VIDEO_ENHANCEMENT_CLASS] = GEN8_VTCR, 1081 [COPY_ENGINE_CLASS] = GEN8_BTCR, 1082 }; 1083 static const i915_reg_t gen12_regs[] = { 1084 [RENDER_CLASS] = GEN12_GFX_TLB_INV_CR, 1085 [VIDEO_DECODE_CLASS] = GEN12_VD_TLB_INV_CR, 1086 [VIDEO_ENHANCEMENT_CLASS] = GEN12_VE_TLB_INV_CR, 1087 [COPY_ENGINE_CLASS] = GEN12_BLT_TLB_INV_CR, 1088 [COMPUTE_CLASS] = GEN12_COMPCTX_TLB_INV_CR, 1089 }; 1090 struct drm_i915_private *i915 = gt->i915; 1091 struct intel_uncore *uncore = gt->uncore; 1092 struct intel_engine_cs *engine; 1093 intel_engine_mask_t awake, tmp; 1094 enum intel_engine_id id; 1095 const i915_reg_t *regs; 1096 unsigned int num = 0; 1097 1098 if (GRAPHICS_VER(i915) == 12) { 1099 regs = gen12_regs; 1100 num = ARRAY_SIZE(gen12_regs); 1101 } else if (GRAPHICS_VER(i915) >= 8 && GRAPHICS_VER(i915) <= 11) { 1102 regs = gen8_regs; 1103 num = ARRAY_SIZE(gen8_regs); 1104 } else if (GRAPHICS_VER(i915) < 8) { 1105 return; 1106 } 1107 1108 if (drm_WARN_ONCE(&i915->drm, !num, 1109 "Platform does not implement TLB invalidation!")) 1110 return; 1111 1112 intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL); 1113 1114 spin_lock_irq(&uncore->lock); /* serialise invalidate with GT reset */ 1115 1116 awake = 0; 1117 for_each_engine(engine, gt, id) { 1118 struct reg_and_bit rb; 1119 1120 if (!intel_engine_pm_is_awake(engine)) 1121 continue; 1122 1123 rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num); 1124 if (!i915_mmio_reg_offset(rb.reg)) 1125 continue; 1126 1127 if (GRAPHICS_VER(i915) == 12 && (engine->class == VIDEO_DECODE_CLASS || 1128 engine->class == VIDEO_ENHANCEMENT_CLASS || 1129 engine->class == COMPUTE_CLASS)) 1130 rb.bit = _MASKED_BIT_ENABLE(rb.bit); 1131 1132 intel_uncore_write_fw(uncore, rb.reg, rb.bit); 1133 awake |= engine->mask; 1134 } 1135 1136 GT_TRACE(gt, "invalidated engines %08x\n", awake); 1137 1138 /* Wa_2207587034:tgl,dg1,rkl,adl-s,adl-p */ 1139 if (awake && 1140 (IS_TIGERLAKE(i915) || 1141 IS_DG1(i915) || 1142 IS_ROCKETLAKE(i915) || 1143 IS_ALDERLAKE_S(i915) || 1144 IS_ALDERLAKE_P(i915))) 1145 intel_uncore_write_fw(uncore, GEN12_OA_TLB_INV_CR, 1); 1146 1147 spin_unlock_irq(&uncore->lock); 1148 1149 for_each_engine_masked(engine, gt, awake, tmp) { 1150 struct reg_and_bit rb; 1151 1152 /* 1153 * HW architecture suggest typical invalidation time at 40us, 1154 * with pessimistic cases up to 100us and a recommendation to 1155 * cap at 1ms. We go a bit higher just in case. 1156 */ 1157 const unsigned int timeout_us = 100; 1158 const unsigned int timeout_ms = 4; 1159 1160 rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num); 1161 if (__intel_wait_for_register_fw(uncore, 1162 rb.reg, rb.bit, 0, 1163 timeout_us, timeout_ms, 1164 NULL)) 1165 drm_err_ratelimited(>->i915->drm, 1166 "%s TLB invalidation did not complete in %ums!\n", 1167 engine->name, timeout_ms); 1168 } 1169 1170 /* 1171 * Use delayed put since a) we mostly expect a flurry of TLB 1172 * invalidations so it is good to avoid paying the forcewake cost and 1173 * b) it works around a bug in Icelake which cannot cope with too rapid 1174 * transitions. 1175 */ 1176 intel_uncore_forcewake_put_delayed(uncore, FORCEWAKE_ALL); 1177 } 1178 1179 static bool tlb_seqno_passed(const struct intel_gt *gt, u32 seqno) 1180 { 1181 u32 cur = intel_gt_tlb_seqno(gt); 1182 1183 /* Only skip if a *full* TLB invalidate barrier has passed */ 1184 return (s32)(cur - roundup2(seqno, 2)) > 0; 1185 } 1186 1187 void intel_gt_invalidate_tlb(struct intel_gt *gt, u32 seqno) 1188 { 1189 intel_wakeref_t wakeref; 1190 1191 if (I915_SELFTEST_ONLY(gt->awake == -ENODEV)) 1192 return; 1193 1194 if (intel_gt_is_wedged(gt)) 1195 return; 1196 1197 if (tlb_seqno_passed(gt, seqno)) 1198 return; 1199 1200 with_intel_gt_pm_if_awake(gt, wakeref) { 1201 mutex_lock(>->tlb.invalidate_lock); 1202 if (tlb_seqno_passed(gt, seqno)) 1203 goto unlock; 1204 1205 mmio_invalidate_full(gt); 1206 1207 #ifdef notyet 1208 write_seqcount_invalidate(>->tlb.seqno); 1209 #else 1210 barrier(); 1211 gt->tlb.seqno.seq.sequence += 2; 1212 #endif 1213 unlock: 1214 mutex_unlock(>->tlb.invalidate_lock); 1215 } 1216 } 1217