1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2014 Intel Corporation 4 */ 5 6 #include <linux/circ_buf.h> 7 8 #include "gem/i915_gem_context.h" 9 #include "gem/i915_gem_lmem.h" 10 #include "gt/gen8_engine_cs.h" 11 #include "gt/intel_breadcrumbs.h" 12 #include "gt/intel_context.h" 13 #include "gt/intel_engine_heartbeat.h" 14 #include "gt/intel_engine_pm.h" 15 #include "gt/intel_engine_regs.h" 16 #include "gt/intel_gpu_commands.h" 17 #include "gt/intel_gt.h" 18 #include "gt/intel_gt_clock_utils.h" 19 #include "gt/intel_gt_irq.h" 20 #include "gt/intel_gt_pm.h" 21 #include "gt/intel_gt_regs.h" 22 #include "gt/intel_gt_requests.h" 23 #include "gt/intel_lrc.h" 24 #include "gt/intel_lrc_reg.h" 25 #include "gt/intel_mocs.h" 26 #include "gt/intel_ring.h" 27 28 #include "intel_guc_ads.h" 29 #include "intel_guc_capture.h" 30 #include "intel_guc_print.h" 31 #include "intel_guc_submission.h" 32 33 #include "i915_drv.h" 34 #include "i915_reg.h" 35 #include "i915_trace.h" 36 37 /** 38 * DOC: GuC-based command submission 39 * 40 * The Scratch registers: 41 * There are 16 MMIO-based registers start from 0xC180. The kernel driver writes 42 * a value to the action register (SOFT_SCRATCH_0) along with any data. It then 43 * triggers an interrupt on the GuC via another register write (0xC4C8). 44 * Firmware writes a success/fail code back to the action register after 45 * processes the request. The kernel driver polls waiting for this update and 46 * then proceeds. 47 * 48 * Command Transport buffers (CTBs): 49 * Covered in detail in other sections but CTBs (Host to GuC - H2G, GuC to Host 50 * - G2H) are a message interface between the i915 and GuC. 51 * 52 * Context registration: 53 * Before a context can be submitted it must be registered with the GuC via a 54 * H2G. A unique guc_id is associated with each context. The context is either 55 * registered at request creation time (normal operation) or at submission time 56 * (abnormal operation, e.g. after a reset). 57 * 58 * Context submission: 59 * The i915 updates the LRC tail value in memory. The i915 must enable the 60 * scheduling of the context within the GuC for the GuC to actually consider it. 61 * Therefore, the first time a disabled context is submitted we use a schedule 62 * enable H2G, while follow up submissions are done via the context submit H2G, 63 * which informs the GuC that a previously enabled context has new work 64 * available. 65 * 66 * Context unpin: 67 * To unpin a context a H2G is used to disable scheduling. When the 68 * corresponding G2H returns indicating the scheduling disable operation has 69 * completed it is safe to unpin the context. While a disable is in flight it 70 * isn't safe to resubmit the context so a fence is used to stall all future 71 * requests of that context until the G2H is returned. Because this interaction 72 * with the GuC takes a non-zero amount of time we delay the disabling of 73 * scheduling after the pin count goes to zero by a configurable period of time 74 * (see SCHED_DISABLE_DELAY_MS). The thought is this gives the user a window of 75 * time to resubmit something on the context before doing this costly operation. 76 * This delay is only done if the context isn't closed and the guc_id usage is 77 * less than a threshold (see NUM_SCHED_DISABLE_GUC_IDS_THRESHOLD). 78 * 79 * Context deregistration: 80 * Before a context can be destroyed or if we steal its guc_id we must 81 * deregister the context with the GuC via H2G. If stealing the guc_id it isn't 82 * safe to submit anything to this guc_id until the deregister completes so a 83 * fence is used to stall all requests associated with this guc_id until the 84 * corresponding G2H returns indicating the guc_id has been deregistered. 85 * 86 * submission_state.guc_ids: 87 * Unique number associated with private GuC context data passed in during 88 * context registration / submission / deregistration. 64k available. Simple ida 89 * is used for allocation. 90 * 91 * Stealing guc_ids: 92 * If no guc_ids are available they can be stolen from another context at 93 * request creation time if that context is unpinned. If a guc_id can't be found 94 * we punt this problem to the user as we believe this is near impossible to hit 95 * during normal use cases. 96 * 97 * Locking: 98 * In the GuC submission code we have 3 basic spin locks which protect 99 * everything. Details about each below. 100 * 101 * sched_engine->lock 102 * This is the submission lock for all contexts that share an i915 schedule 103 * engine (sched_engine), thus only one of the contexts which share a 104 * sched_engine can be submitting at a time. Currently only one sched_engine is 105 * used for all of GuC submission but that could change in the future. 106 * 107 * guc->submission_state.lock 108 * Global lock for GuC submission state. Protects guc_ids and destroyed contexts 109 * list. 110 * 111 * ce->guc_state.lock 112 * Protects everything under ce->guc_state. Ensures that a context is in the 113 * correct state before issuing a H2G. e.g. We don't issue a schedule disable 114 * on a disabled context (bad idea), we don't issue a schedule enable when a 115 * schedule disable is in flight, etc... Also protects list of inflight requests 116 * on the context and the priority management state. Lock is individual to each 117 * context. 118 * 119 * Lock ordering rules: 120 * sched_engine->lock -> ce->guc_state.lock 121 * guc->submission_state.lock -> ce->guc_state.lock 122 * 123 * Reset races: 124 * When a full GT reset is triggered it is assumed that some G2H responses to 125 * H2Gs can be lost as the GuC is also reset. Losing these G2H can prove to be 126 * fatal as we do certain operations upon receiving a G2H (e.g. destroy 127 * contexts, release guc_ids, etc...). When this occurs we can scrub the 128 * context state and cleanup appropriately, however this is quite racey. 129 * To avoid races, the reset code must disable submission before scrubbing for 130 * the missing G2H, while the submission code must check for submission being 131 * disabled and skip sending H2Gs and updating context states when it is. Both 132 * sides must also make sure to hold the relevant locks. 133 */ 134 135 /* GuC Virtual Engine */ 136 struct guc_virtual_engine { 137 struct intel_engine_cs base; 138 struct intel_context context; 139 }; 140 141 static struct intel_context * 142 guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count, 143 unsigned long flags); 144 145 static struct intel_context * 146 guc_create_parallel(struct intel_engine_cs **engines, 147 unsigned int num_siblings, 148 unsigned int width); 149 150 #define GUC_REQUEST_SIZE 64 /* bytes */ 151 152 /* 153 * We reserve 1/16 of the guc_ids for multi-lrc as these need to be contiguous 154 * per the GuC submission interface. A different allocation algorithm is used 155 * (bitmap vs. ida) between multi-lrc and single-lrc hence the reason to 156 * partition the guc_id space. We believe the number of multi-lrc contexts in 157 * use should be low and 1/16 should be sufficient. Minimum of 32 guc_ids for 158 * multi-lrc. 159 */ 160 #define NUMBER_MULTI_LRC_GUC_ID(guc) \ 161 ((guc)->submission_state.num_guc_ids / 16) 162 163 /* 164 * Below is a set of functions which control the GuC scheduling state which 165 * require a lock. 166 */ 167 #define SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER BIT(0) 168 #define SCHED_STATE_DESTROYED BIT(1) 169 #define SCHED_STATE_PENDING_DISABLE BIT(2) 170 #define SCHED_STATE_BANNED BIT(3) 171 #define SCHED_STATE_ENABLED BIT(4) 172 #define SCHED_STATE_PENDING_ENABLE BIT(5) 173 #define SCHED_STATE_REGISTERED BIT(6) 174 #define SCHED_STATE_POLICY_REQUIRED BIT(7) 175 #define SCHED_STATE_CLOSED BIT(8) 176 #define SCHED_STATE_BLOCKED_SHIFT 9 177 #define SCHED_STATE_BLOCKED BIT(SCHED_STATE_BLOCKED_SHIFT) 178 #define SCHED_STATE_BLOCKED_MASK (0xfff << SCHED_STATE_BLOCKED_SHIFT) 179 180 static inline void init_sched_state(struct intel_context *ce) 181 { 182 lockdep_assert_held(&ce->guc_state.lock); 183 ce->guc_state.sched_state &= SCHED_STATE_BLOCKED_MASK; 184 } 185 186 /* 187 * Kernel contexts can have SCHED_STATE_REGISTERED after suspend. 188 * A context close can race with the submission path, so SCHED_STATE_CLOSED 189 * can be set immediately before we try to register. 190 */ 191 #define SCHED_STATE_VALID_INIT \ 192 (SCHED_STATE_BLOCKED_MASK | \ 193 SCHED_STATE_CLOSED | \ 194 SCHED_STATE_REGISTERED) 195 196 __maybe_unused 197 static bool sched_state_is_init(struct intel_context *ce) 198 { 199 return !(ce->guc_state.sched_state & ~SCHED_STATE_VALID_INIT); 200 } 201 202 static inline bool 203 context_wait_for_deregister_to_register(struct intel_context *ce) 204 { 205 return ce->guc_state.sched_state & 206 SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER; 207 } 208 209 static inline void 210 set_context_wait_for_deregister_to_register(struct intel_context *ce) 211 { 212 lockdep_assert_held(&ce->guc_state.lock); 213 ce->guc_state.sched_state |= 214 SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER; 215 } 216 217 static inline void 218 clr_context_wait_for_deregister_to_register(struct intel_context *ce) 219 { 220 lockdep_assert_held(&ce->guc_state.lock); 221 ce->guc_state.sched_state &= 222 ~SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER; 223 } 224 225 static inline bool 226 context_destroyed(struct intel_context *ce) 227 { 228 return ce->guc_state.sched_state & SCHED_STATE_DESTROYED; 229 } 230 231 static inline void 232 set_context_destroyed(struct intel_context *ce) 233 { 234 lockdep_assert_held(&ce->guc_state.lock); 235 ce->guc_state.sched_state |= SCHED_STATE_DESTROYED; 236 } 237 238 static inline bool context_pending_disable(struct intel_context *ce) 239 { 240 return ce->guc_state.sched_state & SCHED_STATE_PENDING_DISABLE; 241 } 242 243 static inline void set_context_pending_disable(struct intel_context *ce) 244 { 245 lockdep_assert_held(&ce->guc_state.lock); 246 ce->guc_state.sched_state |= SCHED_STATE_PENDING_DISABLE; 247 } 248 249 static inline void clr_context_pending_disable(struct intel_context *ce) 250 { 251 lockdep_assert_held(&ce->guc_state.lock); 252 ce->guc_state.sched_state &= ~SCHED_STATE_PENDING_DISABLE; 253 } 254 255 static inline bool context_banned(struct intel_context *ce) 256 { 257 return ce->guc_state.sched_state & SCHED_STATE_BANNED; 258 } 259 260 static inline void set_context_banned(struct intel_context *ce) 261 { 262 lockdep_assert_held(&ce->guc_state.lock); 263 ce->guc_state.sched_state |= SCHED_STATE_BANNED; 264 } 265 266 static inline void clr_context_banned(struct intel_context *ce) 267 { 268 lockdep_assert_held(&ce->guc_state.lock); 269 ce->guc_state.sched_state &= ~SCHED_STATE_BANNED; 270 } 271 272 static inline bool context_enabled(struct intel_context *ce) 273 { 274 return ce->guc_state.sched_state & SCHED_STATE_ENABLED; 275 } 276 277 static inline void set_context_enabled(struct intel_context *ce) 278 { 279 lockdep_assert_held(&ce->guc_state.lock); 280 ce->guc_state.sched_state |= SCHED_STATE_ENABLED; 281 } 282 283 static inline void clr_context_enabled(struct intel_context *ce) 284 { 285 lockdep_assert_held(&ce->guc_state.lock); 286 ce->guc_state.sched_state &= ~SCHED_STATE_ENABLED; 287 } 288 289 static inline bool context_pending_enable(struct intel_context *ce) 290 { 291 return ce->guc_state.sched_state & SCHED_STATE_PENDING_ENABLE; 292 } 293 294 static inline void set_context_pending_enable(struct intel_context *ce) 295 { 296 lockdep_assert_held(&ce->guc_state.lock); 297 ce->guc_state.sched_state |= SCHED_STATE_PENDING_ENABLE; 298 } 299 300 static inline void clr_context_pending_enable(struct intel_context *ce) 301 { 302 lockdep_assert_held(&ce->guc_state.lock); 303 ce->guc_state.sched_state &= ~SCHED_STATE_PENDING_ENABLE; 304 } 305 306 static inline bool context_registered(struct intel_context *ce) 307 { 308 return ce->guc_state.sched_state & SCHED_STATE_REGISTERED; 309 } 310 311 static inline void set_context_registered(struct intel_context *ce) 312 { 313 lockdep_assert_held(&ce->guc_state.lock); 314 ce->guc_state.sched_state |= SCHED_STATE_REGISTERED; 315 } 316 317 static inline void clr_context_registered(struct intel_context *ce) 318 { 319 lockdep_assert_held(&ce->guc_state.lock); 320 ce->guc_state.sched_state &= ~SCHED_STATE_REGISTERED; 321 } 322 323 static inline bool context_policy_required(struct intel_context *ce) 324 { 325 return ce->guc_state.sched_state & SCHED_STATE_POLICY_REQUIRED; 326 } 327 328 static inline void set_context_policy_required(struct intel_context *ce) 329 { 330 lockdep_assert_held(&ce->guc_state.lock); 331 ce->guc_state.sched_state |= SCHED_STATE_POLICY_REQUIRED; 332 } 333 334 static inline void clr_context_policy_required(struct intel_context *ce) 335 { 336 lockdep_assert_held(&ce->guc_state.lock); 337 ce->guc_state.sched_state &= ~SCHED_STATE_POLICY_REQUIRED; 338 } 339 340 static inline bool context_close_done(struct intel_context *ce) 341 { 342 return ce->guc_state.sched_state & SCHED_STATE_CLOSED; 343 } 344 345 static inline void set_context_close_done(struct intel_context *ce) 346 { 347 lockdep_assert_held(&ce->guc_state.lock); 348 ce->guc_state.sched_state |= SCHED_STATE_CLOSED; 349 } 350 351 static inline u32 context_blocked(struct intel_context *ce) 352 { 353 return (ce->guc_state.sched_state & SCHED_STATE_BLOCKED_MASK) >> 354 SCHED_STATE_BLOCKED_SHIFT; 355 } 356 357 static inline void incr_context_blocked(struct intel_context *ce) 358 { 359 lockdep_assert_held(&ce->guc_state.lock); 360 361 ce->guc_state.sched_state += SCHED_STATE_BLOCKED; 362 363 GEM_BUG_ON(!context_blocked(ce)); /* Overflow check */ 364 } 365 366 static inline void decr_context_blocked(struct intel_context *ce) 367 { 368 lockdep_assert_held(&ce->guc_state.lock); 369 370 GEM_BUG_ON(!context_blocked(ce)); /* Underflow check */ 371 372 ce->guc_state.sched_state -= SCHED_STATE_BLOCKED; 373 } 374 375 static struct intel_context * 376 request_to_scheduling_context(struct i915_request *rq) 377 { 378 return intel_context_to_parent(rq->context); 379 } 380 381 static inline bool context_guc_id_invalid(struct intel_context *ce) 382 { 383 return ce->guc_id.id == GUC_INVALID_CONTEXT_ID; 384 } 385 386 static inline void set_context_guc_id_invalid(struct intel_context *ce) 387 { 388 ce->guc_id.id = GUC_INVALID_CONTEXT_ID; 389 } 390 391 static inline struct intel_guc *ce_to_guc(struct intel_context *ce) 392 { 393 return &ce->engine->gt->uc.guc; 394 } 395 396 static inline struct i915_priolist *to_priolist(struct rb_node *rb) 397 { 398 return rb_entry(rb, struct i915_priolist, node); 399 } 400 401 /* 402 * When using multi-lrc submission a scratch memory area is reserved in the 403 * parent's context state for the process descriptor, work queue, and handshake 404 * between the parent + children contexts to insert safe preemption points 405 * between each of the BBs. Currently the scratch area is sized to a page. 406 * 407 * The layout of this scratch area is below: 408 * 0 guc_process_desc 409 * + sizeof(struct guc_process_desc) child go 410 * + CACHELINE_BYTES child join[0] 411 * ... 412 * + CACHELINE_BYTES child join[n - 1] 413 * ... unused 414 * PARENT_SCRATCH_SIZE / 2 work queue start 415 * ... work queue 416 * PARENT_SCRATCH_SIZE - 1 work queue end 417 */ 418 #define WQ_SIZE (PARENT_SCRATCH_SIZE / 2) 419 #define WQ_OFFSET (PARENT_SCRATCH_SIZE - WQ_SIZE) 420 421 struct sync_semaphore { 422 u32 semaphore; 423 u8 unused[CACHELINE_BYTES - sizeof(u32)]; 424 }; 425 426 struct parent_scratch { 427 union guc_descs { 428 struct guc_sched_wq_desc wq_desc; 429 struct guc_process_desc_v69 pdesc; 430 } descs; 431 432 struct sync_semaphore go; 433 struct sync_semaphore join[MAX_ENGINE_INSTANCE + 1]; 434 435 u8 unused[WQ_OFFSET - sizeof(union guc_descs) - 436 sizeof(struct sync_semaphore) * (MAX_ENGINE_INSTANCE + 2)]; 437 438 u32 wq[WQ_SIZE / sizeof(u32)]; 439 }; 440 441 static u32 __get_parent_scratch_offset(struct intel_context *ce) 442 { 443 GEM_BUG_ON(!ce->parallel.guc.parent_page); 444 445 return ce->parallel.guc.parent_page * PAGE_SIZE; 446 } 447 448 static u32 __get_wq_offset(struct intel_context *ce) 449 { 450 BUILD_BUG_ON(offsetof(struct parent_scratch, wq) != WQ_OFFSET); 451 452 return __get_parent_scratch_offset(ce) + WQ_OFFSET; 453 } 454 455 static struct parent_scratch * 456 __get_parent_scratch(struct intel_context *ce) 457 { 458 BUILD_BUG_ON(sizeof(struct parent_scratch) != PARENT_SCRATCH_SIZE); 459 BUILD_BUG_ON(sizeof(struct sync_semaphore) != CACHELINE_BYTES); 460 461 /* 462 * Need to subtract LRC_STATE_OFFSET here as the 463 * parallel.guc.parent_page is the offset into ce->state while 464 * ce->lrc_reg_reg is ce->state + LRC_STATE_OFFSET. 465 */ 466 return (struct parent_scratch *) 467 (ce->lrc_reg_state + 468 ((__get_parent_scratch_offset(ce) - 469 LRC_STATE_OFFSET) / sizeof(u32))); 470 } 471 472 static struct guc_process_desc_v69 * 473 __get_process_desc_v69(struct intel_context *ce) 474 { 475 struct parent_scratch *ps = __get_parent_scratch(ce); 476 477 return &ps->descs.pdesc; 478 } 479 480 static struct guc_sched_wq_desc * 481 __get_wq_desc_v70(struct intel_context *ce) 482 { 483 struct parent_scratch *ps = __get_parent_scratch(ce); 484 485 return &ps->descs.wq_desc; 486 } 487 488 static u32 *get_wq_pointer(struct intel_context *ce, u32 wqi_size) 489 { 490 /* 491 * Check for space in work queue. Caching a value of head pointer in 492 * intel_context structure in order reduce the number accesses to shared 493 * GPU memory which may be across a PCIe bus. 494 */ 495 #define AVAILABLE_SPACE \ 496 CIRC_SPACE(ce->parallel.guc.wqi_tail, ce->parallel.guc.wqi_head, WQ_SIZE) 497 if (wqi_size > AVAILABLE_SPACE) { 498 ce->parallel.guc.wqi_head = READ_ONCE(*ce->parallel.guc.wq_head); 499 500 if (wqi_size > AVAILABLE_SPACE) 501 return NULL; 502 } 503 #undef AVAILABLE_SPACE 504 505 return &__get_parent_scratch(ce)->wq[ce->parallel.guc.wqi_tail / sizeof(u32)]; 506 } 507 508 static inline struct intel_context *__get_context(struct intel_guc *guc, u32 id) 509 { 510 struct intel_context *ce = xa_load(&guc->context_lookup, id); 511 512 GEM_BUG_ON(id >= GUC_MAX_CONTEXT_ID); 513 514 return ce; 515 } 516 517 static struct guc_lrc_desc_v69 *__get_lrc_desc_v69(struct intel_guc *guc, u32 index) 518 { 519 struct guc_lrc_desc_v69 *base = guc->lrc_desc_pool_vaddr_v69; 520 521 if (!base) 522 return NULL; 523 524 GEM_BUG_ON(index >= GUC_MAX_CONTEXT_ID); 525 526 return &base[index]; 527 } 528 529 static int guc_lrc_desc_pool_create_v69(struct intel_guc *guc) 530 { 531 u32 size; 532 int ret; 533 534 size = PAGE_ALIGN(sizeof(struct guc_lrc_desc_v69) * 535 GUC_MAX_CONTEXT_ID); 536 ret = intel_guc_allocate_and_map_vma(guc, size, &guc->lrc_desc_pool_v69, 537 (void **)&guc->lrc_desc_pool_vaddr_v69); 538 if (ret) 539 return ret; 540 541 return 0; 542 } 543 544 static void guc_lrc_desc_pool_destroy_v69(struct intel_guc *guc) 545 { 546 if (!guc->lrc_desc_pool_vaddr_v69) 547 return; 548 549 guc->lrc_desc_pool_vaddr_v69 = NULL; 550 i915_vma_unpin_and_release(&guc->lrc_desc_pool_v69, I915_VMA_RELEASE_MAP); 551 } 552 553 static inline bool guc_submission_initialized(struct intel_guc *guc) 554 { 555 return guc->submission_initialized; 556 } 557 558 static inline void _reset_lrc_desc_v69(struct intel_guc *guc, u32 id) 559 { 560 struct guc_lrc_desc_v69 *desc = __get_lrc_desc_v69(guc, id); 561 562 if (desc) 563 memset(desc, 0, sizeof(*desc)); 564 } 565 566 static inline bool ctx_id_mapped(struct intel_guc *guc, u32 id) 567 { 568 return __get_context(guc, id); 569 } 570 571 static inline void set_ctx_id_mapping(struct intel_guc *guc, u32 id, 572 struct intel_context *ce) 573 { 574 unsigned long flags; 575 576 /* 577 * xarray API doesn't have xa_save_irqsave wrapper, so calling the 578 * lower level functions directly. 579 */ 580 xa_lock_irqsave(&guc->context_lookup, flags); 581 __xa_store(&guc->context_lookup, id, ce, GFP_ATOMIC); 582 xa_unlock_irqrestore(&guc->context_lookup, flags); 583 } 584 585 static inline void clr_ctx_id_mapping(struct intel_guc *guc, u32 id) 586 { 587 unsigned long flags; 588 589 if (unlikely(!guc_submission_initialized(guc))) 590 return; 591 592 _reset_lrc_desc_v69(guc, id); 593 594 /* 595 * xarray API doesn't have xa_erase_irqsave wrapper, so calling 596 * the lower level functions directly. 597 */ 598 xa_lock_irqsave(&guc->context_lookup, flags); 599 __xa_erase(&guc->context_lookup, id); 600 xa_unlock_irqrestore(&guc->context_lookup, flags); 601 } 602 603 static void decr_outstanding_submission_g2h(struct intel_guc *guc) 604 { 605 if (atomic_dec_and_test(&guc->outstanding_submission_g2h)) 606 wake_up_all(&guc->ct.wq); 607 } 608 609 static int guc_submission_send_busy_loop(struct intel_guc *guc, 610 const u32 *action, 611 u32 len, 612 u32 g2h_len_dw, 613 bool loop) 614 { 615 /* 616 * We always loop when a send requires a reply (i.e. g2h_len_dw > 0), 617 * so we don't handle the case where we don't get a reply because we 618 * aborted the send due to the channel being busy. 619 */ 620 GEM_BUG_ON(g2h_len_dw && !loop); 621 622 if (g2h_len_dw) 623 atomic_inc(&guc->outstanding_submission_g2h); 624 625 return intel_guc_send_busy_loop(guc, action, len, g2h_len_dw, loop); 626 } 627 628 int intel_guc_wait_for_pending_msg(struct intel_guc *guc, 629 atomic_t *wait_var, 630 bool interruptible, 631 long timeout) 632 { 633 const int state = interruptible ? 634 TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE; 635 DEFINE_WAIT(wait); 636 637 might_sleep(); 638 GEM_BUG_ON(timeout < 0); 639 640 if (!atomic_read(wait_var)) 641 return 0; 642 643 if (!timeout) 644 return -ETIME; 645 646 for (;;) { 647 prepare_to_wait(&guc->ct.wq, &wait, state); 648 649 if (!atomic_read(wait_var)) 650 break; 651 652 if (signal_pending_state(state, current)) { 653 timeout = -EINTR; 654 break; 655 } 656 657 if (!timeout) { 658 timeout = -ETIME; 659 break; 660 } 661 662 timeout = io_schedule_timeout(timeout); 663 } 664 finish_wait(&guc->ct.wq, &wait); 665 666 return (timeout < 0) ? timeout : 0; 667 } 668 669 int intel_guc_wait_for_idle(struct intel_guc *guc, long timeout) 670 { 671 if (!intel_uc_uses_guc_submission(&guc_to_gt(guc)->uc)) 672 return 0; 673 674 return intel_guc_wait_for_pending_msg(guc, 675 &guc->outstanding_submission_g2h, 676 true, timeout); 677 } 678 679 static int guc_context_policy_init_v70(struct intel_context *ce, bool loop); 680 static int try_context_registration(struct intel_context *ce, bool loop); 681 682 static int __guc_add_request(struct intel_guc *guc, struct i915_request *rq) 683 { 684 int err = 0; 685 struct intel_context *ce = request_to_scheduling_context(rq); 686 u32 action[3]; 687 int len = 0; 688 u32 g2h_len_dw = 0; 689 bool enabled; 690 691 lockdep_assert_held(&rq->engine->sched_engine->lock); 692 693 /* 694 * Corner case where requests were sitting in the priority list or a 695 * request resubmitted after the context was banned. 696 */ 697 if (unlikely(!intel_context_is_schedulable(ce))) { 698 i915_request_put(i915_request_mark_eio(rq)); 699 intel_engine_signal_breadcrumbs(ce->engine); 700 return 0; 701 } 702 703 GEM_BUG_ON(!atomic_read(&ce->guc_id.ref)); 704 GEM_BUG_ON(context_guc_id_invalid(ce)); 705 706 if (context_policy_required(ce)) { 707 err = guc_context_policy_init_v70(ce, false); 708 if (err) 709 return err; 710 } 711 712 spin_lock(&ce->guc_state.lock); 713 714 /* 715 * The request / context will be run on the hardware when scheduling 716 * gets enabled in the unblock. For multi-lrc we still submit the 717 * context to move the LRC tails. 718 */ 719 if (unlikely(context_blocked(ce) && !intel_context_is_parent(ce))) 720 goto out; 721 722 enabled = context_enabled(ce) || context_blocked(ce); 723 724 if (!enabled) { 725 action[len++] = INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET; 726 action[len++] = ce->guc_id.id; 727 action[len++] = GUC_CONTEXT_ENABLE; 728 set_context_pending_enable(ce); 729 intel_context_get(ce); 730 g2h_len_dw = G2H_LEN_DW_SCHED_CONTEXT_MODE_SET; 731 } else { 732 action[len++] = INTEL_GUC_ACTION_SCHED_CONTEXT; 733 action[len++] = ce->guc_id.id; 734 } 735 736 err = intel_guc_send_nb(guc, action, len, g2h_len_dw); 737 if (!enabled && !err) { 738 trace_intel_context_sched_enable(ce); 739 atomic_inc(&guc->outstanding_submission_g2h); 740 set_context_enabled(ce); 741 742 /* 743 * Without multi-lrc KMD does the submission step (moving the 744 * lrc tail) so enabling scheduling is sufficient to submit the 745 * context. This isn't the case in multi-lrc submission as the 746 * GuC needs to move the tails, hence the need for another H2G 747 * to submit a multi-lrc context after enabling scheduling. 748 */ 749 if (intel_context_is_parent(ce)) { 750 action[0] = INTEL_GUC_ACTION_SCHED_CONTEXT; 751 err = intel_guc_send_nb(guc, action, len - 1, 0); 752 } 753 } else if (!enabled) { 754 clr_context_pending_enable(ce); 755 intel_context_put(ce); 756 } 757 if (likely(!err)) 758 trace_i915_request_guc_submit(rq); 759 760 out: 761 spin_unlock(&ce->guc_state.lock); 762 return err; 763 } 764 765 static int guc_add_request(struct intel_guc *guc, struct i915_request *rq) 766 { 767 int ret = __guc_add_request(guc, rq); 768 769 if (unlikely(ret == -EBUSY)) { 770 guc->stalled_request = rq; 771 guc->submission_stall_reason = STALL_ADD_REQUEST; 772 } 773 774 return ret; 775 } 776 777 static inline void guc_set_lrc_tail(struct i915_request *rq) 778 { 779 rq->context->lrc_reg_state[CTX_RING_TAIL] = 780 intel_ring_set_tail(rq->ring, rq->tail); 781 } 782 783 static inline int rq_prio(const struct i915_request *rq) 784 { 785 return rq->sched.attr.priority; 786 } 787 788 static bool is_multi_lrc_rq(struct i915_request *rq) 789 { 790 return intel_context_is_parallel(rq->context); 791 } 792 793 static bool can_merge_rq(struct i915_request *rq, 794 struct i915_request *last) 795 { 796 return request_to_scheduling_context(rq) == 797 request_to_scheduling_context(last); 798 } 799 800 static u32 wq_space_until_wrap(struct intel_context *ce) 801 { 802 return (WQ_SIZE - ce->parallel.guc.wqi_tail); 803 } 804 805 static void write_wqi(struct intel_context *ce, u32 wqi_size) 806 { 807 BUILD_BUG_ON(!is_power_of_2(WQ_SIZE)); 808 809 /* 810 * Ensure WQI are visible before updating tail 811 */ 812 intel_guc_write_barrier(ce_to_guc(ce)); 813 814 ce->parallel.guc.wqi_tail = (ce->parallel.guc.wqi_tail + wqi_size) & 815 (WQ_SIZE - 1); 816 WRITE_ONCE(*ce->parallel.guc.wq_tail, ce->parallel.guc.wqi_tail); 817 } 818 819 static int guc_wq_noop_append(struct intel_context *ce) 820 { 821 u32 *wqi = get_wq_pointer(ce, wq_space_until_wrap(ce)); 822 u32 len_dw = wq_space_until_wrap(ce) / sizeof(u32) - 1; 823 824 if (!wqi) 825 return -EBUSY; 826 827 GEM_BUG_ON(!FIELD_FIT(WQ_LEN_MASK, len_dw)); 828 829 *wqi = FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_NOOP) | 830 FIELD_PREP(WQ_LEN_MASK, len_dw); 831 ce->parallel.guc.wqi_tail = 0; 832 833 return 0; 834 } 835 836 static int __guc_wq_item_append(struct i915_request *rq) 837 { 838 struct intel_context *ce = request_to_scheduling_context(rq); 839 struct intel_context *child; 840 unsigned int wqi_size = (ce->parallel.number_children + 4) * 841 sizeof(u32); 842 u32 *wqi; 843 u32 len_dw = (wqi_size / sizeof(u32)) - 1; 844 int ret; 845 846 /* Ensure context is in correct state updating work queue */ 847 GEM_BUG_ON(!atomic_read(&ce->guc_id.ref)); 848 GEM_BUG_ON(context_guc_id_invalid(ce)); 849 GEM_BUG_ON(context_wait_for_deregister_to_register(ce)); 850 GEM_BUG_ON(!ctx_id_mapped(ce_to_guc(ce), ce->guc_id.id)); 851 852 /* Insert NOOP if this work queue item will wrap the tail pointer. */ 853 if (wqi_size > wq_space_until_wrap(ce)) { 854 ret = guc_wq_noop_append(ce); 855 if (ret) 856 return ret; 857 } 858 859 wqi = get_wq_pointer(ce, wqi_size); 860 if (!wqi) 861 return -EBUSY; 862 863 GEM_BUG_ON(!FIELD_FIT(WQ_LEN_MASK, len_dw)); 864 865 *wqi++ = FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_MULTI_LRC) | 866 FIELD_PREP(WQ_LEN_MASK, len_dw); 867 *wqi++ = ce->lrc.lrca; 868 *wqi++ = FIELD_PREP(WQ_GUC_ID_MASK, ce->guc_id.id) | 869 FIELD_PREP(WQ_RING_TAIL_MASK, ce->ring->tail / sizeof(u64)); 870 *wqi++ = 0; /* fence_id */ 871 for_each_child(ce, child) 872 *wqi++ = child->ring->tail / sizeof(u64); 873 874 write_wqi(ce, wqi_size); 875 876 return 0; 877 } 878 879 static int guc_wq_item_append(struct intel_guc *guc, 880 struct i915_request *rq) 881 { 882 struct intel_context *ce = request_to_scheduling_context(rq); 883 int ret; 884 885 if (unlikely(!intel_context_is_schedulable(ce))) 886 return 0; 887 888 ret = __guc_wq_item_append(rq); 889 if (unlikely(ret == -EBUSY)) { 890 guc->stalled_request = rq; 891 guc->submission_stall_reason = STALL_MOVE_LRC_TAIL; 892 } 893 894 return ret; 895 } 896 897 static bool multi_lrc_submit(struct i915_request *rq) 898 { 899 struct intel_context *ce = request_to_scheduling_context(rq); 900 901 intel_ring_set_tail(rq->ring, rq->tail); 902 903 /* 904 * We expect the front end (execbuf IOCTL) to set this flag on the last 905 * request generated from a multi-BB submission. This indicates to the 906 * backend (GuC interface) that we should submit this context thus 907 * submitting all the requests generated in parallel. 908 */ 909 return test_bit(I915_FENCE_FLAG_SUBMIT_PARALLEL, &rq->fence.flags) || 910 !intel_context_is_schedulable(ce); 911 } 912 913 static int guc_dequeue_one_context(struct intel_guc *guc) 914 { 915 struct i915_sched_engine * const sched_engine = guc->sched_engine; 916 struct i915_request *last = NULL; 917 bool submit = false; 918 struct rb_node *rb; 919 int ret; 920 921 lockdep_assert_held(&sched_engine->lock); 922 923 if (guc->stalled_request) { 924 submit = true; 925 last = guc->stalled_request; 926 927 switch (guc->submission_stall_reason) { 928 case STALL_REGISTER_CONTEXT: 929 goto register_context; 930 case STALL_MOVE_LRC_TAIL: 931 goto move_lrc_tail; 932 case STALL_ADD_REQUEST: 933 goto add_request; 934 default: 935 MISSING_CASE(guc->submission_stall_reason); 936 } 937 } 938 939 while ((rb = rb_first_cached(&sched_engine->queue))) { 940 struct i915_priolist *p = to_priolist(rb); 941 struct i915_request *rq, *rn; 942 943 priolist_for_each_request_consume(rq, rn, p) { 944 if (last && !can_merge_rq(rq, last)) 945 goto register_context; 946 947 list_del_init(&rq->sched.link); 948 949 __i915_request_submit(rq); 950 951 trace_i915_request_in(rq, 0); 952 last = rq; 953 954 if (is_multi_lrc_rq(rq)) { 955 /* 956 * We need to coalesce all multi-lrc requests in 957 * a relationship into a single H2G. We are 958 * guaranteed that all of these requests will be 959 * submitted sequentially. 960 */ 961 if (multi_lrc_submit(rq)) { 962 submit = true; 963 goto register_context; 964 } 965 } else { 966 submit = true; 967 } 968 } 969 970 rb_erase_cached(&p->node, &sched_engine->queue); 971 i915_priolist_free(p); 972 } 973 974 register_context: 975 if (submit) { 976 struct intel_context *ce = request_to_scheduling_context(last); 977 978 if (unlikely(!ctx_id_mapped(guc, ce->guc_id.id) && 979 intel_context_is_schedulable(ce))) { 980 ret = try_context_registration(ce, false); 981 if (unlikely(ret == -EPIPE)) { 982 goto deadlk; 983 } else if (ret == -EBUSY) { 984 guc->stalled_request = last; 985 guc->submission_stall_reason = 986 STALL_REGISTER_CONTEXT; 987 goto schedule_tasklet; 988 } else if (ret != 0) { 989 GEM_WARN_ON(ret); /* Unexpected */ 990 goto deadlk; 991 } 992 } 993 994 move_lrc_tail: 995 if (is_multi_lrc_rq(last)) { 996 ret = guc_wq_item_append(guc, last); 997 if (ret == -EBUSY) { 998 goto schedule_tasklet; 999 } else if (ret != 0) { 1000 GEM_WARN_ON(ret); /* Unexpected */ 1001 goto deadlk; 1002 } 1003 } else { 1004 guc_set_lrc_tail(last); 1005 } 1006 1007 add_request: 1008 ret = guc_add_request(guc, last); 1009 if (unlikely(ret == -EPIPE)) { 1010 goto deadlk; 1011 } else if (ret == -EBUSY) { 1012 goto schedule_tasklet; 1013 } else if (ret != 0) { 1014 GEM_WARN_ON(ret); /* Unexpected */ 1015 goto deadlk; 1016 } 1017 } 1018 1019 guc->stalled_request = NULL; 1020 guc->submission_stall_reason = STALL_NONE; 1021 return submit; 1022 1023 deadlk: 1024 sched_engine->tasklet.callback = NULL; 1025 tasklet_disable_nosync(&sched_engine->tasklet); 1026 return false; 1027 1028 schedule_tasklet: 1029 tasklet_schedule(&sched_engine->tasklet); 1030 return false; 1031 } 1032 1033 static void guc_submission_tasklet(struct tasklet_struct *t) 1034 { 1035 struct i915_sched_engine *sched_engine = 1036 from_tasklet(sched_engine, t, tasklet); 1037 unsigned long flags; 1038 bool loop; 1039 1040 spin_lock_irqsave(&sched_engine->lock, flags); 1041 1042 do { 1043 loop = guc_dequeue_one_context(sched_engine->private_data); 1044 } while (loop); 1045 1046 i915_sched_engine_reset_on_empty(sched_engine); 1047 1048 spin_unlock_irqrestore(&sched_engine->lock, flags); 1049 } 1050 1051 static void cs_irq_handler(struct intel_engine_cs *engine, u16 iir) 1052 { 1053 if (iir & GT_RENDER_USER_INTERRUPT) 1054 intel_engine_signal_breadcrumbs(engine); 1055 } 1056 1057 static void __guc_context_destroy(struct intel_context *ce); 1058 static void release_guc_id(struct intel_guc *guc, struct intel_context *ce); 1059 static void guc_signal_context_fence(struct intel_context *ce); 1060 static void guc_cancel_context_requests(struct intel_context *ce); 1061 static void guc_blocked_fence_complete(struct intel_context *ce); 1062 1063 static void scrub_guc_desc_for_outstanding_g2h(struct intel_guc *guc) 1064 { 1065 struct intel_context *ce; 1066 unsigned long index, flags; 1067 bool pending_disable, pending_enable, deregister, destroyed, banned; 1068 1069 xa_lock_irqsave(&guc->context_lookup, flags); 1070 xa_for_each(&guc->context_lookup, index, ce) { 1071 /* 1072 * Corner case where the ref count on the object is zero but and 1073 * deregister G2H was lost. In this case we don't touch the ref 1074 * count and finish the destroy of the context. 1075 */ 1076 bool do_put = kref_get_unless_zero(&ce->ref); 1077 1078 xa_unlock(&guc->context_lookup); 1079 1080 if (test_bit(CONTEXT_GUC_INIT, &ce->flags) && 1081 (cancel_delayed_work(&ce->guc_state.sched_disable_delay_work))) { 1082 /* successful cancel so jump straight to close it */ 1083 intel_context_sched_disable_unpin(ce); 1084 } 1085 1086 spin_lock(&ce->guc_state.lock); 1087 1088 /* 1089 * Once we are at this point submission_disabled() is guaranteed 1090 * to be visible to all callers who set the below flags (see above 1091 * flush and flushes in reset_prepare). If submission_disabled() 1092 * is set, the caller shouldn't set these flags. 1093 */ 1094 1095 destroyed = context_destroyed(ce); 1096 pending_enable = context_pending_enable(ce); 1097 pending_disable = context_pending_disable(ce); 1098 deregister = context_wait_for_deregister_to_register(ce); 1099 banned = context_banned(ce); 1100 init_sched_state(ce); 1101 1102 spin_unlock(&ce->guc_state.lock); 1103 1104 if (pending_enable || destroyed || deregister) { 1105 decr_outstanding_submission_g2h(guc); 1106 if (deregister) 1107 guc_signal_context_fence(ce); 1108 if (destroyed) { 1109 intel_gt_pm_put_async(guc_to_gt(guc)); 1110 release_guc_id(guc, ce); 1111 __guc_context_destroy(ce); 1112 } 1113 if (pending_enable || deregister) 1114 intel_context_put(ce); 1115 } 1116 1117 /* Not mutualy exclusive with above if statement. */ 1118 if (pending_disable) { 1119 guc_signal_context_fence(ce); 1120 if (banned) { 1121 guc_cancel_context_requests(ce); 1122 intel_engine_signal_breadcrumbs(ce->engine); 1123 } 1124 intel_context_sched_disable_unpin(ce); 1125 decr_outstanding_submission_g2h(guc); 1126 1127 spin_lock(&ce->guc_state.lock); 1128 guc_blocked_fence_complete(ce); 1129 spin_unlock(&ce->guc_state.lock); 1130 1131 intel_context_put(ce); 1132 } 1133 1134 if (do_put) 1135 intel_context_put(ce); 1136 xa_lock(&guc->context_lookup); 1137 } 1138 xa_unlock_irqrestore(&guc->context_lookup, flags); 1139 } 1140 1141 /* 1142 * GuC stores busyness stats for each engine at context in/out boundaries. A 1143 * context 'in' logs execution start time, 'out' adds in -> out delta to total. 1144 * i915/kmd accesses 'start', 'total' and 'context id' from memory shared with 1145 * GuC. 1146 * 1147 * __i915_pmu_event_read samples engine busyness. When sampling, if context id 1148 * is valid (!= ~0) and start is non-zero, the engine is considered to be 1149 * active. For an active engine total busyness = total + (now - start), where 1150 * 'now' is the time at which the busyness is sampled. For inactive engine, 1151 * total busyness = total. 1152 * 1153 * All times are captured from GUCPMTIMESTAMP reg and are in gt clock domain. 1154 * 1155 * The start and total values provided by GuC are 32 bits and wrap around in a 1156 * few minutes. Since perf pmu provides busyness as 64 bit monotonically 1157 * increasing ns values, there is a need for this implementation to account for 1158 * overflows and extend the GuC provided values to 64 bits before returning 1159 * busyness to the user. In order to do that, a worker runs periodically at 1160 * frequency = 1/8th the time it takes for the timestamp to wrap (i.e. once in 1161 * 27 seconds for a gt clock frequency of 19.2 MHz). 1162 */ 1163 1164 #define WRAP_TIME_CLKS U32_MAX 1165 #define POLL_TIME_CLKS (WRAP_TIME_CLKS >> 3) 1166 1167 static void 1168 __extend_last_switch(struct intel_guc *guc, u64 *prev_start, u32 new_start) 1169 { 1170 u32 gt_stamp_hi = upper_32_bits(guc->timestamp.gt_stamp); 1171 u32 gt_stamp_last = lower_32_bits(guc->timestamp.gt_stamp); 1172 1173 if (new_start == lower_32_bits(*prev_start)) 1174 return; 1175 1176 /* 1177 * When gt is unparked, we update the gt timestamp and start the ping 1178 * worker that updates the gt_stamp every POLL_TIME_CLKS. As long as gt 1179 * is unparked, all switched in contexts will have a start time that is 1180 * within +/- POLL_TIME_CLKS of the most recent gt_stamp. 1181 * 1182 * If neither gt_stamp nor new_start has rolled over, then the 1183 * gt_stamp_hi does not need to be adjusted, however if one of them has 1184 * rolled over, we need to adjust gt_stamp_hi accordingly. 1185 * 1186 * The below conditions address the cases of new_start rollover and 1187 * gt_stamp_last rollover respectively. 1188 */ 1189 if (new_start < gt_stamp_last && 1190 (new_start - gt_stamp_last) <= POLL_TIME_CLKS) 1191 gt_stamp_hi++; 1192 1193 if (new_start > gt_stamp_last && 1194 (gt_stamp_last - new_start) <= POLL_TIME_CLKS && gt_stamp_hi) 1195 gt_stamp_hi--; 1196 1197 *prev_start = ((u64)gt_stamp_hi << 32) | new_start; 1198 } 1199 1200 #define record_read(map_, field_) \ 1201 iosys_map_rd_field(map_, 0, struct guc_engine_usage_record, field_) 1202 1203 /* 1204 * GuC updates shared memory and KMD reads it. Since this is not synchronized, 1205 * we run into a race where the value read is inconsistent. Sometimes the 1206 * inconsistency is in reading the upper MSB bytes of the last_in value when 1207 * this race occurs. 2 types of cases are seen - upper 8 bits are zero and upper 1208 * 24 bits are zero. Since these are non-zero values, it is non-trivial to 1209 * determine validity of these values. Instead we read the values multiple times 1210 * until they are consistent. In test runs, 3 attempts results in consistent 1211 * values. The upper bound is set to 6 attempts and may need to be tuned as per 1212 * any new occurences. 1213 */ 1214 static void __get_engine_usage_record(struct intel_engine_cs *engine, 1215 u32 *last_in, u32 *id, u32 *total) 1216 { 1217 struct iosys_map rec_map = intel_guc_engine_usage_record_map(engine); 1218 int i = 0; 1219 1220 do { 1221 *last_in = record_read(&rec_map, last_switch_in_stamp); 1222 *id = record_read(&rec_map, current_context_index); 1223 *total = record_read(&rec_map, total_runtime); 1224 1225 if (record_read(&rec_map, last_switch_in_stamp) == *last_in && 1226 record_read(&rec_map, current_context_index) == *id && 1227 record_read(&rec_map, total_runtime) == *total) 1228 break; 1229 } while (++i < 6); 1230 } 1231 1232 static void __set_engine_usage_record(struct intel_engine_cs *engine, 1233 u32 last_in, u32 id, u32 total) 1234 { 1235 struct iosys_map rec_map = intel_guc_engine_usage_record_map(engine); 1236 1237 #define record_write(map_, field_, val_) \ 1238 iosys_map_wr_field(map_, 0, struct guc_engine_usage_record, field_, val_) 1239 1240 record_write(&rec_map, last_switch_in_stamp, last_in); 1241 record_write(&rec_map, current_context_index, id); 1242 record_write(&rec_map, total_runtime, total); 1243 1244 #undef record_write 1245 } 1246 1247 static void guc_update_engine_gt_clks(struct intel_engine_cs *engine) 1248 { 1249 struct intel_engine_guc_stats *stats = &engine->stats.guc; 1250 struct intel_guc *guc = &engine->gt->uc.guc; 1251 u32 last_switch, ctx_id, total; 1252 1253 lockdep_assert_held(&guc->timestamp.lock); 1254 1255 __get_engine_usage_record(engine, &last_switch, &ctx_id, &total); 1256 1257 stats->running = ctx_id != ~0U && last_switch; 1258 if (stats->running) 1259 __extend_last_switch(guc, &stats->start_gt_clk, last_switch); 1260 1261 /* 1262 * Instead of adjusting the total for overflow, just add the 1263 * difference from previous sample stats->total_gt_clks 1264 */ 1265 if (total && total != ~0U) { 1266 stats->total_gt_clks += (u32)(total - stats->prev_total); 1267 stats->prev_total = total; 1268 } 1269 } 1270 1271 static u32 gpm_timestamp_shift(struct intel_gt *gt) 1272 { 1273 intel_wakeref_t wakeref; 1274 u32 reg, shift; 1275 1276 with_intel_runtime_pm(gt->uncore->rpm, wakeref) 1277 reg = intel_uncore_read(gt->uncore, RPM_CONFIG0); 1278 1279 shift = (reg & GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK) >> 1280 GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT; 1281 1282 return 3 - shift; 1283 } 1284 1285 static void guc_update_pm_timestamp(struct intel_guc *guc, ktime_t *now) 1286 { 1287 struct intel_gt *gt = guc_to_gt(guc); 1288 u32 gt_stamp_lo, gt_stamp_hi; 1289 u64 gpm_ts; 1290 1291 lockdep_assert_held(&guc->timestamp.lock); 1292 1293 gt_stamp_hi = upper_32_bits(guc->timestamp.gt_stamp); 1294 gpm_ts = intel_uncore_read64_2x32(gt->uncore, MISC_STATUS0, 1295 MISC_STATUS1) >> guc->timestamp.shift; 1296 gt_stamp_lo = lower_32_bits(gpm_ts); 1297 *now = ktime_get(); 1298 1299 if (gt_stamp_lo < lower_32_bits(guc->timestamp.gt_stamp)) 1300 gt_stamp_hi++; 1301 1302 guc->timestamp.gt_stamp = ((u64)gt_stamp_hi << 32) | gt_stamp_lo; 1303 } 1304 1305 /* 1306 * Unlike the execlist mode of submission total and active times are in terms of 1307 * gt clocks. The *now parameter is retained to return the cpu time at which the 1308 * busyness was sampled. 1309 */ 1310 static ktime_t guc_engine_busyness(struct intel_engine_cs *engine, ktime_t *now) 1311 { 1312 struct intel_engine_guc_stats stats_saved, *stats = &engine->stats.guc; 1313 struct i915_gpu_error *gpu_error = &engine->i915->gpu_error; 1314 struct intel_gt *gt = engine->gt; 1315 struct intel_guc *guc = >->uc.guc; 1316 u64 total, gt_stamp_saved; 1317 unsigned long flags; 1318 u32 reset_count; 1319 bool in_reset; 1320 1321 spin_lock_irqsave(&guc->timestamp.lock, flags); 1322 1323 /* 1324 * If a reset happened, we risk reading partially updated engine 1325 * busyness from GuC, so we just use the driver stored copy of busyness. 1326 * Synchronize with gt reset using reset_count and the 1327 * I915_RESET_BACKOFF flag. Note that reset flow updates the reset_count 1328 * after I915_RESET_BACKOFF flag, so ensure that the reset_count is 1329 * usable by checking the flag afterwards. 1330 */ 1331 reset_count = i915_reset_count(gpu_error); 1332 in_reset = test_bit(I915_RESET_BACKOFF, >->reset.flags); 1333 1334 *now = ktime_get(); 1335 1336 /* 1337 * The active busyness depends on start_gt_clk and gt_stamp. 1338 * gt_stamp is updated by i915 only when gt is awake and the 1339 * start_gt_clk is derived from GuC state. To get a consistent 1340 * view of activity, we query the GuC state only if gt is awake. 1341 */ 1342 if (!in_reset && intel_gt_pm_get_if_awake(gt)) { 1343 stats_saved = *stats; 1344 gt_stamp_saved = guc->timestamp.gt_stamp; 1345 /* 1346 * Update gt_clks, then gt timestamp to simplify the 'gt_stamp - 1347 * start_gt_clk' calculation below for active engines. 1348 */ 1349 guc_update_engine_gt_clks(engine); 1350 guc_update_pm_timestamp(guc, now); 1351 intel_gt_pm_put_async(gt); 1352 if (i915_reset_count(gpu_error) != reset_count) { 1353 *stats = stats_saved; 1354 guc->timestamp.gt_stamp = gt_stamp_saved; 1355 } 1356 } 1357 1358 total = intel_gt_clock_interval_to_ns(gt, stats->total_gt_clks); 1359 if (stats->running) { 1360 u64 clk = guc->timestamp.gt_stamp - stats->start_gt_clk; 1361 1362 total += intel_gt_clock_interval_to_ns(gt, clk); 1363 } 1364 1365 if (total > stats->total) 1366 stats->total = total; 1367 1368 spin_unlock_irqrestore(&guc->timestamp.lock, flags); 1369 1370 return ns_to_ktime(stats->total); 1371 } 1372 1373 static void guc_enable_busyness_worker(struct intel_guc *guc) 1374 { 1375 mod_delayed_work(system_highpri_wq, &guc->timestamp.work, guc->timestamp.ping_delay); 1376 } 1377 1378 static void guc_cancel_busyness_worker(struct intel_guc *guc) 1379 { 1380 cancel_delayed_work_sync(&guc->timestamp.work); 1381 } 1382 1383 static void __reset_guc_busyness_stats(struct intel_guc *guc) 1384 { 1385 struct intel_gt *gt = guc_to_gt(guc); 1386 struct intel_engine_cs *engine; 1387 enum intel_engine_id id; 1388 unsigned long flags; 1389 ktime_t unused; 1390 1391 guc_cancel_busyness_worker(guc); 1392 1393 spin_lock_irqsave(&guc->timestamp.lock, flags); 1394 1395 guc_update_pm_timestamp(guc, &unused); 1396 for_each_engine(engine, gt, id) { 1397 struct intel_engine_guc_stats *stats = &engine->stats.guc; 1398 1399 guc_update_engine_gt_clks(engine); 1400 1401 /* 1402 * If resetting a running context, accumulate the active 1403 * time as well since there will be no context switch. 1404 */ 1405 if (stats->running) { 1406 u64 clk = guc->timestamp.gt_stamp - stats->start_gt_clk; 1407 1408 stats->total_gt_clks += clk; 1409 } 1410 stats->prev_total = 0; 1411 stats->running = 0; 1412 } 1413 1414 spin_unlock_irqrestore(&guc->timestamp.lock, flags); 1415 } 1416 1417 static void __update_guc_busyness_stats(struct intel_guc *guc) 1418 { 1419 struct intel_gt *gt = guc_to_gt(guc); 1420 struct intel_engine_cs *engine; 1421 enum intel_engine_id id; 1422 unsigned long flags; 1423 ktime_t unused; 1424 1425 guc->timestamp.last_stat_jiffies = jiffies; 1426 1427 spin_lock_irqsave(&guc->timestamp.lock, flags); 1428 1429 guc_update_pm_timestamp(guc, &unused); 1430 for_each_engine(engine, gt, id) 1431 guc_update_engine_gt_clks(engine); 1432 1433 spin_unlock_irqrestore(&guc->timestamp.lock, flags); 1434 } 1435 1436 static void __guc_context_update_stats(struct intel_context *ce) 1437 { 1438 struct intel_guc *guc = ce_to_guc(ce); 1439 unsigned long flags; 1440 1441 spin_lock_irqsave(&guc->timestamp.lock, flags); 1442 lrc_update_runtime(ce); 1443 spin_unlock_irqrestore(&guc->timestamp.lock, flags); 1444 } 1445 1446 static void guc_context_update_stats(struct intel_context *ce) 1447 { 1448 if (!intel_context_pin_if_active(ce)) 1449 return; 1450 1451 __guc_context_update_stats(ce); 1452 intel_context_unpin(ce); 1453 } 1454 1455 static void guc_timestamp_ping(struct work_struct *wrk) 1456 { 1457 struct intel_guc *guc = container_of(wrk, typeof(*guc), 1458 timestamp.work.work); 1459 struct intel_uc *uc = container_of(guc, typeof(*uc), guc); 1460 struct intel_gt *gt = guc_to_gt(guc); 1461 struct intel_context *ce; 1462 intel_wakeref_t wakeref; 1463 unsigned long index; 1464 int srcu, ret; 1465 1466 /* 1467 * Ideally the busyness worker should take a gt pm wakeref because the 1468 * worker only needs to be active while gt is awake. However, the 1469 * gt_park path cancels the worker synchronously and this complicates 1470 * the flow if the worker is also running at the same time. The cancel 1471 * waits for the worker and when the worker releases the wakeref, that 1472 * would call gt_park and would lead to a deadlock. 1473 * 1474 * The resolution is to take the global pm wakeref if runtime pm is 1475 * already active. If not, we don't need to update the busyness stats as 1476 * the stats would already be updated when the gt was parked. 1477 * 1478 * Note: 1479 * - We do not requeue the worker if we cannot take a reference to runtime 1480 * pm since intel_guc_busyness_unpark would requeue the worker in the 1481 * resume path. 1482 * 1483 * - If the gt was parked longer than time taken for GT timestamp to roll 1484 * over, we ignore those rollovers since we don't care about tracking 1485 * the exact GT time. We only care about roll overs when the gt is 1486 * active and running workloads. 1487 * 1488 * - There is a window of time between gt_park and runtime suspend, 1489 * where the worker may run. This is acceptable since the worker will 1490 * not find any new data to update busyness. 1491 */ 1492 wakeref = intel_runtime_pm_get_if_active(>->i915->runtime_pm); 1493 if (!wakeref) 1494 return; 1495 1496 /* 1497 * Synchronize with gt reset to make sure the worker does not 1498 * corrupt the engine/guc stats. NB: can't actually block waiting 1499 * for a reset to complete as the reset requires flushing out 1500 * this worker thread if started. So waiting would deadlock. 1501 */ 1502 ret = intel_gt_reset_trylock(gt, &srcu); 1503 if (ret) 1504 goto err_trylock; 1505 1506 __update_guc_busyness_stats(guc); 1507 1508 /* adjust context stats for overflow */ 1509 xa_for_each(&guc->context_lookup, index, ce) 1510 guc_context_update_stats(ce); 1511 1512 intel_gt_reset_unlock(gt, srcu); 1513 1514 guc_enable_busyness_worker(guc); 1515 1516 err_trylock: 1517 intel_runtime_pm_put(>->i915->runtime_pm, wakeref); 1518 } 1519 1520 static int guc_action_enable_usage_stats(struct intel_guc *guc) 1521 { 1522 struct intel_gt *gt = guc_to_gt(guc); 1523 struct intel_engine_cs *engine; 1524 enum intel_engine_id id; 1525 u32 offset = intel_guc_engine_usage_offset(guc); 1526 u32 action[] = { 1527 INTEL_GUC_ACTION_SET_ENG_UTIL_BUFF, 1528 offset, 1529 0, 1530 }; 1531 1532 for_each_engine(engine, gt, id) 1533 __set_engine_usage_record(engine, 0, 0xffffffff, 0); 1534 1535 return intel_guc_send(guc, action, ARRAY_SIZE(action)); 1536 } 1537 1538 static int guc_init_engine_stats(struct intel_guc *guc) 1539 { 1540 struct intel_gt *gt = guc_to_gt(guc); 1541 intel_wakeref_t wakeref; 1542 int ret; 1543 1544 with_intel_runtime_pm(>->i915->runtime_pm, wakeref) 1545 ret = guc_action_enable_usage_stats(guc); 1546 1547 if (ret) 1548 guc_err(guc, "Failed to enable usage stats: %pe\n", ERR_PTR(ret)); 1549 else 1550 guc_enable_busyness_worker(guc); 1551 1552 return ret; 1553 } 1554 1555 static void guc_fini_engine_stats(struct intel_guc *guc) 1556 { 1557 guc_cancel_busyness_worker(guc); 1558 } 1559 1560 void intel_guc_busyness_park(struct intel_gt *gt) 1561 { 1562 struct intel_guc *guc = >->uc.guc; 1563 1564 if (!guc_submission_initialized(guc)) 1565 return; 1566 1567 /* 1568 * There is a race with suspend flow where the worker runs after suspend 1569 * and causes an unclaimed register access warning. Cancel the worker 1570 * synchronously here. 1571 */ 1572 guc_cancel_busyness_worker(guc); 1573 1574 /* 1575 * Before parking, we should sample engine busyness stats if we need to. 1576 * We can skip it if we are less than half a ping from the last time we 1577 * sampled the busyness stats. 1578 */ 1579 if (guc->timestamp.last_stat_jiffies && 1580 !time_after(jiffies, guc->timestamp.last_stat_jiffies + 1581 (guc->timestamp.ping_delay / 2))) 1582 return; 1583 1584 __update_guc_busyness_stats(guc); 1585 } 1586 1587 void intel_guc_busyness_unpark(struct intel_gt *gt) 1588 { 1589 struct intel_guc *guc = >->uc.guc; 1590 unsigned long flags; 1591 ktime_t unused; 1592 1593 if (!guc_submission_initialized(guc)) 1594 return; 1595 1596 spin_lock_irqsave(&guc->timestamp.lock, flags); 1597 guc_update_pm_timestamp(guc, &unused); 1598 spin_unlock_irqrestore(&guc->timestamp.lock, flags); 1599 guc_enable_busyness_worker(guc); 1600 } 1601 1602 static inline bool 1603 submission_disabled(struct intel_guc *guc) 1604 { 1605 struct i915_sched_engine * const sched_engine = guc->sched_engine; 1606 1607 return unlikely(!sched_engine || 1608 !__tasklet_is_enabled(&sched_engine->tasklet) || 1609 intel_gt_is_wedged(guc_to_gt(guc))); 1610 } 1611 1612 static void disable_submission(struct intel_guc *guc) 1613 { 1614 struct i915_sched_engine * const sched_engine = guc->sched_engine; 1615 1616 if (__tasklet_is_enabled(&sched_engine->tasklet)) { 1617 GEM_BUG_ON(!guc->ct.enabled); 1618 __tasklet_disable_sync_once(&sched_engine->tasklet); 1619 sched_engine->tasklet.callback = NULL; 1620 } 1621 } 1622 1623 static void enable_submission(struct intel_guc *guc) 1624 { 1625 struct i915_sched_engine * const sched_engine = guc->sched_engine; 1626 unsigned long flags; 1627 1628 spin_lock_irqsave(&guc->sched_engine->lock, flags); 1629 sched_engine->tasklet.callback = guc_submission_tasklet; 1630 wmb(); /* Make sure callback visible */ 1631 if (!__tasklet_is_enabled(&sched_engine->tasklet) && 1632 __tasklet_enable(&sched_engine->tasklet)) { 1633 GEM_BUG_ON(!guc->ct.enabled); 1634 1635 /* And kick in case we missed a new request submission. */ 1636 tasklet_hi_schedule(&sched_engine->tasklet); 1637 } 1638 spin_unlock_irqrestore(&guc->sched_engine->lock, flags); 1639 } 1640 1641 static void guc_flush_submissions(struct intel_guc *guc) 1642 { 1643 struct i915_sched_engine * const sched_engine = guc->sched_engine; 1644 unsigned long flags; 1645 1646 spin_lock_irqsave(&sched_engine->lock, flags); 1647 spin_unlock_irqrestore(&sched_engine->lock, flags); 1648 } 1649 1650 static void guc_flush_destroyed_contexts(struct intel_guc *guc); 1651 1652 void intel_guc_submission_reset_prepare(struct intel_guc *guc) 1653 { 1654 if (unlikely(!guc_submission_initialized(guc))) { 1655 /* Reset called during driver load? GuC not yet initialised! */ 1656 return; 1657 } 1658 1659 intel_gt_park_heartbeats(guc_to_gt(guc)); 1660 disable_submission(guc); 1661 guc->interrupts.disable(guc); 1662 __reset_guc_busyness_stats(guc); 1663 1664 /* Flush IRQ handler */ 1665 spin_lock_irq(guc_to_gt(guc)->irq_lock); 1666 spin_unlock_irq(guc_to_gt(guc)->irq_lock); 1667 1668 guc_flush_submissions(guc); 1669 guc_flush_destroyed_contexts(guc); 1670 flush_work(&guc->ct.requests.worker); 1671 1672 scrub_guc_desc_for_outstanding_g2h(guc); 1673 } 1674 1675 static struct intel_engine_cs * 1676 guc_virtual_get_sibling(struct intel_engine_cs *ve, unsigned int sibling) 1677 { 1678 struct intel_engine_cs *engine; 1679 intel_engine_mask_t tmp, mask = ve->mask; 1680 unsigned int num_siblings = 0; 1681 1682 for_each_engine_masked(engine, ve->gt, mask, tmp) 1683 if (num_siblings++ == sibling) 1684 return engine; 1685 1686 return NULL; 1687 } 1688 1689 static inline struct intel_engine_cs * 1690 __context_to_physical_engine(struct intel_context *ce) 1691 { 1692 struct intel_engine_cs *engine = ce->engine; 1693 1694 if (intel_engine_is_virtual(engine)) 1695 engine = guc_virtual_get_sibling(engine, 0); 1696 1697 return engine; 1698 } 1699 1700 static void guc_reset_state(struct intel_context *ce, u32 head, bool scrub) 1701 { 1702 struct intel_engine_cs *engine = __context_to_physical_engine(ce); 1703 1704 if (!intel_context_is_schedulable(ce)) 1705 return; 1706 1707 GEM_BUG_ON(!intel_context_is_pinned(ce)); 1708 1709 /* 1710 * We want a simple context + ring to execute the breadcrumb update. 1711 * We cannot rely on the context being intact across the GPU hang, 1712 * so clear it and rebuild just what we need for the breadcrumb. 1713 * All pending requests for this context will be zapped, and any 1714 * future request will be after userspace has had the opportunity 1715 * to recreate its own state. 1716 */ 1717 if (scrub) 1718 lrc_init_regs(ce, engine, true); 1719 1720 /* Rerun the request; its payload has been neutered (if guilty). */ 1721 lrc_update_regs(ce, engine, head); 1722 } 1723 1724 static void guc_engine_reset_prepare(struct intel_engine_cs *engine) 1725 { 1726 /* 1727 * Wa_22011802037: In addition to stopping the cs, we need 1728 * to wait for any pending mi force wakeups 1729 */ 1730 if (intel_engine_reset_needs_wa_22011802037(engine->gt)) { 1731 intel_engine_stop_cs(engine); 1732 intel_engine_wait_for_pending_mi_fw(engine); 1733 } 1734 } 1735 1736 static void guc_reset_nop(struct intel_engine_cs *engine) 1737 { 1738 } 1739 1740 static void guc_rewind_nop(struct intel_engine_cs *engine, bool stalled) 1741 { 1742 } 1743 1744 static void 1745 __unwind_incomplete_requests(struct intel_context *ce) 1746 { 1747 struct i915_request *rq, *rn; 1748 struct list_head *pl; 1749 int prio = I915_PRIORITY_INVALID; 1750 struct i915_sched_engine * const sched_engine = 1751 ce->engine->sched_engine; 1752 unsigned long flags; 1753 1754 spin_lock_irqsave(&sched_engine->lock, flags); 1755 spin_lock(&ce->guc_state.lock); 1756 list_for_each_entry_safe_reverse(rq, rn, 1757 &ce->guc_state.requests, 1758 sched.link) { 1759 if (i915_request_completed(rq)) 1760 continue; 1761 1762 list_del_init(&rq->sched.link); 1763 __i915_request_unsubmit(rq); 1764 1765 /* Push the request back into the queue for later resubmission. */ 1766 GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID); 1767 if (rq_prio(rq) != prio) { 1768 prio = rq_prio(rq); 1769 pl = i915_sched_lookup_priolist(sched_engine, prio); 1770 } 1771 GEM_BUG_ON(i915_sched_engine_is_empty(sched_engine)); 1772 1773 list_add(&rq->sched.link, pl); 1774 set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); 1775 } 1776 spin_unlock(&ce->guc_state.lock); 1777 spin_unlock_irqrestore(&sched_engine->lock, flags); 1778 } 1779 1780 static void __guc_reset_context(struct intel_context *ce, intel_engine_mask_t stalled) 1781 { 1782 bool guilty; 1783 struct i915_request *rq; 1784 unsigned long flags; 1785 u32 head; 1786 int i, number_children = ce->parallel.number_children; 1787 struct intel_context *parent = ce; 1788 1789 GEM_BUG_ON(intel_context_is_child(ce)); 1790 1791 intel_context_get(ce); 1792 1793 /* 1794 * GuC will implicitly mark the context as non-schedulable when it sends 1795 * the reset notification. Make sure our state reflects this change. The 1796 * context will be marked enabled on resubmission. 1797 */ 1798 spin_lock_irqsave(&ce->guc_state.lock, flags); 1799 clr_context_enabled(ce); 1800 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 1801 1802 /* 1803 * For each context in the relationship find the hanging request 1804 * resetting each context / request as needed 1805 */ 1806 for (i = 0; i < number_children + 1; ++i) { 1807 if (!intel_context_is_pinned(ce)) 1808 goto next_context; 1809 1810 guilty = false; 1811 rq = intel_context_get_active_request(ce); 1812 if (!rq) { 1813 head = ce->ring->tail; 1814 goto out_replay; 1815 } 1816 1817 if (i915_request_started(rq)) 1818 guilty = stalled & ce->engine->mask; 1819 1820 GEM_BUG_ON(i915_active_is_idle(&ce->active)); 1821 head = intel_ring_wrap(ce->ring, rq->head); 1822 1823 __i915_request_reset(rq, guilty); 1824 i915_request_put(rq); 1825 out_replay: 1826 guc_reset_state(ce, head, guilty); 1827 next_context: 1828 if (i != number_children) 1829 ce = list_next_entry(ce, parallel.child_link); 1830 } 1831 1832 __unwind_incomplete_requests(parent); 1833 intel_context_put(parent); 1834 } 1835 1836 void intel_guc_submission_reset(struct intel_guc *guc, intel_engine_mask_t stalled) 1837 { 1838 struct intel_context *ce; 1839 unsigned long index; 1840 unsigned long flags; 1841 1842 if (unlikely(!guc_submission_initialized(guc))) { 1843 /* Reset called during driver load? GuC not yet initialised! */ 1844 return; 1845 } 1846 1847 xa_lock_irqsave(&guc->context_lookup, flags); 1848 xa_for_each(&guc->context_lookup, index, ce) { 1849 if (!kref_get_unless_zero(&ce->ref)) 1850 continue; 1851 1852 xa_unlock(&guc->context_lookup); 1853 1854 if (intel_context_is_pinned(ce) && 1855 !intel_context_is_child(ce)) 1856 __guc_reset_context(ce, stalled); 1857 1858 intel_context_put(ce); 1859 1860 xa_lock(&guc->context_lookup); 1861 } 1862 xa_unlock_irqrestore(&guc->context_lookup, flags); 1863 1864 /* GuC is blown away, drop all references to contexts */ 1865 xa_destroy(&guc->context_lookup); 1866 } 1867 1868 static void guc_cancel_context_requests(struct intel_context *ce) 1869 { 1870 struct i915_sched_engine *sched_engine = ce_to_guc(ce)->sched_engine; 1871 struct i915_request *rq; 1872 unsigned long flags; 1873 1874 /* Mark all executing requests as skipped. */ 1875 spin_lock_irqsave(&sched_engine->lock, flags); 1876 spin_lock(&ce->guc_state.lock); 1877 list_for_each_entry(rq, &ce->guc_state.requests, sched.link) 1878 i915_request_put(i915_request_mark_eio(rq)); 1879 spin_unlock(&ce->guc_state.lock); 1880 spin_unlock_irqrestore(&sched_engine->lock, flags); 1881 } 1882 1883 static void 1884 guc_cancel_sched_engine_requests(struct i915_sched_engine *sched_engine) 1885 { 1886 struct i915_request *rq, *rn; 1887 struct rb_node *rb; 1888 unsigned long flags; 1889 1890 /* Can be called during boot if GuC fails to load */ 1891 if (!sched_engine) 1892 return; 1893 1894 /* 1895 * Before we call engine->cancel_requests(), we should have exclusive 1896 * access to the submission state. This is arranged for us by the 1897 * caller disabling the interrupt generation, the tasklet and other 1898 * threads that may then access the same state, giving us a free hand 1899 * to reset state. However, we still need to let lockdep be aware that 1900 * we know this state may be accessed in hardirq context, so we 1901 * disable the irq around this manipulation and we want to keep 1902 * the spinlock focused on its duties and not accidentally conflate 1903 * coverage to the submission's irq state. (Similarly, although we 1904 * shouldn't need to disable irq around the manipulation of the 1905 * submission's irq state, we also wish to remind ourselves that 1906 * it is irq state.) 1907 */ 1908 spin_lock_irqsave(&sched_engine->lock, flags); 1909 1910 /* Flush the queued requests to the timeline list (for retiring). */ 1911 while ((rb = rb_first_cached(&sched_engine->queue))) { 1912 struct i915_priolist *p = to_priolist(rb); 1913 1914 priolist_for_each_request_consume(rq, rn, p) { 1915 list_del_init(&rq->sched.link); 1916 1917 __i915_request_submit(rq); 1918 1919 i915_request_put(i915_request_mark_eio(rq)); 1920 } 1921 1922 rb_erase_cached(&p->node, &sched_engine->queue); 1923 i915_priolist_free(p); 1924 } 1925 1926 /* Remaining _unready_ requests will be nop'ed when submitted */ 1927 1928 sched_engine->queue_priority_hint = INT_MIN; 1929 sched_engine->queue = RB_ROOT_CACHED; 1930 1931 spin_unlock_irqrestore(&sched_engine->lock, flags); 1932 } 1933 1934 void intel_guc_submission_cancel_requests(struct intel_guc *guc) 1935 { 1936 struct intel_context *ce; 1937 unsigned long index; 1938 unsigned long flags; 1939 1940 xa_lock_irqsave(&guc->context_lookup, flags); 1941 xa_for_each(&guc->context_lookup, index, ce) { 1942 if (!kref_get_unless_zero(&ce->ref)) 1943 continue; 1944 1945 xa_unlock(&guc->context_lookup); 1946 1947 if (intel_context_is_pinned(ce) && 1948 !intel_context_is_child(ce)) 1949 guc_cancel_context_requests(ce); 1950 1951 intel_context_put(ce); 1952 1953 xa_lock(&guc->context_lookup); 1954 } 1955 xa_unlock_irqrestore(&guc->context_lookup, flags); 1956 1957 guc_cancel_sched_engine_requests(guc->sched_engine); 1958 1959 /* GuC is blown away, drop all references to contexts */ 1960 xa_destroy(&guc->context_lookup); 1961 } 1962 1963 void intel_guc_submission_reset_finish(struct intel_guc *guc) 1964 { 1965 /* Reset called during driver load or during wedge? */ 1966 if (unlikely(!guc_submission_initialized(guc) || 1967 intel_gt_is_wedged(guc_to_gt(guc)))) { 1968 return; 1969 } 1970 1971 /* 1972 * Technically possible for either of these values to be non-zero here, 1973 * but very unlikely + harmless. Regardless let's add a warn so we can 1974 * see in CI if this happens frequently / a precursor to taking down the 1975 * machine. 1976 */ 1977 GEM_WARN_ON(atomic_read(&guc->outstanding_submission_g2h)); 1978 atomic_set(&guc->outstanding_submission_g2h, 0); 1979 1980 intel_guc_global_policies_update(guc); 1981 enable_submission(guc); 1982 intel_gt_unpark_heartbeats(guc_to_gt(guc)); 1983 } 1984 1985 static void destroyed_worker_func(struct work_struct *w); 1986 static void reset_fail_worker_func(struct work_struct *w); 1987 1988 /* 1989 * Set up the memory resources to be shared with the GuC (via the GGTT) 1990 * at firmware loading time. 1991 */ 1992 int intel_guc_submission_init(struct intel_guc *guc) 1993 { 1994 struct intel_gt *gt = guc_to_gt(guc); 1995 int ret; 1996 1997 if (guc->submission_initialized) 1998 return 0; 1999 2000 if (GUC_SUBMIT_VER(guc) < MAKE_GUC_VER(1, 0, 0)) { 2001 ret = guc_lrc_desc_pool_create_v69(guc); 2002 if (ret) 2003 return ret; 2004 } 2005 2006 guc->submission_state.guc_ids_bitmap = 2007 bitmap_zalloc(NUMBER_MULTI_LRC_GUC_ID(guc), GFP_KERNEL); 2008 if (!guc->submission_state.guc_ids_bitmap) { 2009 ret = -ENOMEM; 2010 goto destroy_pool; 2011 } 2012 2013 guc->timestamp.ping_delay = (POLL_TIME_CLKS / gt->clock_frequency + 1) * HZ; 2014 guc->timestamp.shift = gpm_timestamp_shift(gt); 2015 guc->submission_initialized = true; 2016 2017 return 0; 2018 2019 destroy_pool: 2020 guc_lrc_desc_pool_destroy_v69(guc); 2021 2022 return ret; 2023 } 2024 2025 void intel_guc_submission_fini(struct intel_guc *guc) 2026 { 2027 if (!guc->submission_initialized) 2028 return; 2029 2030 guc_flush_destroyed_contexts(guc); 2031 guc_lrc_desc_pool_destroy_v69(guc); 2032 i915_sched_engine_put(guc->sched_engine); 2033 bitmap_free(guc->submission_state.guc_ids_bitmap); 2034 guc->submission_initialized = false; 2035 } 2036 2037 static inline void queue_request(struct i915_sched_engine *sched_engine, 2038 struct i915_request *rq, 2039 int prio) 2040 { 2041 GEM_BUG_ON(!list_empty(&rq->sched.link)); 2042 list_add_tail(&rq->sched.link, 2043 i915_sched_lookup_priolist(sched_engine, prio)); 2044 set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); 2045 tasklet_hi_schedule(&sched_engine->tasklet); 2046 } 2047 2048 static int guc_bypass_tasklet_submit(struct intel_guc *guc, 2049 struct i915_request *rq) 2050 { 2051 int ret = 0; 2052 2053 __i915_request_submit(rq); 2054 2055 trace_i915_request_in(rq, 0); 2056 2057 if (is_multi_lrc_rq(rq)) { 2058 if (multi_lrc_submit(rq)) { 2059 ret = guc_wq_item_append(guc, rq); 2060 if (!ret) 2061 ret = guc_add_request(guc, rq); 2062 } 2063 } else { 2064 guc_set_lrc_tail(rq); 2065 ret = guc_add_request(guc, rq); 2066 } 2067 2068 if (unlikely(ret == -EPIPE)) 2069 disable_submission(guc); 2070 2071 return ret; 2072 } 2073 2074 static bool need_tasklet(struct intel_guc *guc, struct i915_request *rq) 2075 { 2076 struct i915_sched_engine *sched_engine = rq->engine->sched_engine; 2077 struct intel_context *ce = request_to_scheduling_context(rq); 2078 2079 return submission_disabled(guc) || guc->stalled_request || 2080 !i915_sched_engine_is_empty(sched_engine) || 2081 !ctx_id_mapped(guc, ce->guc_id.id); 2082 } 2083 2084 static void guc_submit_request(struct i915_request *rq) 2085 { 2086 struct i915_sched_engine *sched_engine = rq->engine->sched_engine; 2087 struct intel_guc *guc = &rq->engine->gt->uc.guc; 2088 unsigned long flags; 2089 2090 /* Will be called from irq-context when using foreign fences. */ 2091 spin_lock_irqsave(&sched_engine->lock, flags); 2092 2093 if (need_tasklet(guc, rq)) 2094 queue_request(sched_engine, rq, rq_prio(rq)); 2095 else if (guc_bypass_tasklet_submit(guc, rq) == -EBUSY) 2096 tasklet_hi_schedule(&sched_engine->tasklet); 2097 2098 spin_unlock_irqrestore(&sched_engine->lock, flags); 2099 } 2100 2101 static int new_guc_id(struct intel_guc *guc, struct intel_context *ce) 2102 { 2103 int ret; 2104 2105 GEM_BUG_ON(intel_context_is_child(ce)); 2106 2107 if (intel_context_is_parent(ce)) 2108 ret = bitmap_find_free_region(guc->submission_state.guc_ids_bitmap, 2109 NUMBER_MULTI_LRC_GUC_ID(guc), 2110 order_base_2(ce->parallel.number_children 2111 + 1)); 2112 else 2113 ret = ida_simple_get(&guc->submission_state.guc_ids, 2114 NUMBER_MULTI_LRC_GUC_ID(guc), 2115 guc->submission_state.num_guc_ids, 2116 GFP_KERNEL | __GFP_RETRY_MAYFAIL | 2117 __GFP_NOWARN); 2118 if (unlikely(ret < 0)) 2119 return ret; 2120 2121 if (!intel_context_is_parent(ce)) 2122 ++guc->submission_state.guc_ids_in_use; 2123 2124 ce->guc_id.id = ret; 2125 return 0; 2126 } 2127 2128 static void __release_guc_id(struct intel_guc *guc, struct intel_context *ce) 2129 { 2130 GEM_BUG_ON(intel_context_is_child(ce)); 2131 2132 if (!context_guc_id_invalid(ce)) { 2133 if (intel_context_is_parent(ce)) { 2134 bitmap_release_region(guc->submission_state.guc_ids_bitmap, 2135 ce->guc_id.id, 2136 order_base_2(ce->parallel.number_children 2137 + 1)); 2138 } else { 2139 --guc->submission_state.guc_ids_in_use; 2140 ida_simple_remove(&guc->submission_state.guc_ids, 2141 ce->guc_id.id); 2142 } 2143 clr_ctx_id_mapping(guc, ce->guc_id.id); 2144 set_context_guc_id_invalid(ce); 2145 } 2146 if (!list_empty(&ce->guc_id.link)) 2147 list_del_init(&ce->guc_id.link); 2148 } 2149 2150 static void release_guc_id(struct intel_guc *guc, struct intel_context *ce) 2151 { 2152 unsigned long flags; 2153 2154 spin_lock_irqsave(&guc->submission_state.lock, flags); 2155 __release_guc_id(guc, ce); 2156 spin_unlock_irqrestore(&guc->submission_state.lock, flags); 2157 } 2158 2159 static int steal_guc_id(struct intel_guc *guc, struct intel_context *ce) 2160 { 2161 struct intel_context *cn; 2162 2163 lockdep_assert_held(&guc->submission_state.lock); 2164 GEM_BUG_ON(intel_context_is_child(ce)); 2165 GEM_BUG_ON(intel_context_is_parent(ce)); 2166 2167 if (!list_empty(&guc->submission_state.guc_id_list)) { 2168 cn = list_first_entry(&guc->submission_state.guc_id_list, 2169 struct intel_context, 2170 guc_id.link); 2171 2172 GEM_BUG_ON(atomic_read(&cn->guc_id.ref)); 2173 GEM_BUG_ON(context_guc_id_invalid(cn)); 2174 GEM_BUG_ON(intel_context_is_child(cn)); 2175 GEM_BUG_ON(intel_context_is_parent(cn)); 2176 2177 list_del_init(&cn->guc_id.link); 2178 ce->guc_id.id = cn->guc_id.id; 2179 2180 spin_lock(&cn->guc_state.lock); 2181 clr_context_registered(cn); 2182 spin_unlock(&cn->guc_state.lock); 2183 2184 set_context_guc_id_invalid(cn); 2185 2186 #ifdef CONFIG_DRM_I915_SELFTEST 2187 guc->number_guc_id_stolen++; 2188 #endif 2189 2190 return 0; 2191 } else { 2192 return -EAGAIN; 2193 } 2194 } 2195 2196 static int assign_guc_id(struct intel_guc *guc, struct intel_context *ce) 2197 { 2198 int ret; 2199 2200 lockdep_assert_held(&guc->submission_state.lock); 2201 GEM_BUG_ON(intel_context_is_child(ce)); 2202 2203 ret = new_guc_id(guc, ce); 2204 if (unlikely(ret < 0)) { 2205 if (intel_context_is_parent(ce)) 2206 return -ENOSPC; 2207 2208 ret = steal_guc_id(guc, ce); 2209 if (ret < 0) 2210 return ret; 2211 } 2212 2213 if (intel_context_is_parent(ce)) { 2214 struct intel_context *child; 2215 int i = 1; 2216 2217 for_each_child(ce, child) 2218 child->guc_id.id = ce->guc_id.id + i++; 2219 } 2220 2221 return 0; 2222 } 2223 2224 #define PIN_GUC_ID_TRIES 4 2225 static int pin_guc_id(struct intel_guc *guc, struct intel_context *ce) 2226 { 2227 int ret = 0; 2228 unsigned long flags, tries = PIN_GUC_ID_TRIES; 2229 2230 GEM_BUG_ON(atomic_read(&ce->guc_id.ref)); 2231 2232 try_again: 2233 spin_lock_irqsave(&guc->submission_state.lock, flags); 2234 2235 might_lock(&ce->guc_state.lock); 2236 2237 if (context_guc_id_invalid(ce)) { 2238 ret = assign_guc_id(guc, ce); 2239 if (ret) 2240 goto out_unlock; 2241 ret = 1; /* Indidcates newly assigned guc_id */ 2242 } 2243 if (!list_empty(&ce->guc_id.link)) 2244 list_del_init(&ce->guc_id.link); 2245 atomic_inc(&ce->guc_id.ref); 2246 2247 out_unlock: 2248 spin_unlock_irqrestore(&guc->submission_state.lock, flags); 2249 2250 /* 2251 * -EAGAIN indicates no guc_id are available, let's retire any 2252 * outstanding requests to see if that frees up a guc_id. If the first 2253 * retire didn't help, insert a sleep with the timeslice duration before 2254 * attempting to retire more requests. Double the sleep period each 2255 * subsequent pass before finally giving up. The sleep period has max of 2256 * 100ms and minimum of 1ms. 2257 */ 2258 if (ret == -EAGAIN && --tries) { 2259 if (PIN_GUC_ID_TRIES - tries > 1) { 2260 unsigned int timeslice_shifted = 2261 ce->engine->props.timeslice_duration_ms << 2262 (PIN_GUC_ID_TRIES - tries - 2); 2263 unsigned int max = min_t(unsigned int, 100, 2264 timeslice_shifted); 2265 2266 drm_msleep(max_t(unsigned int, max, 1)); 2267 } 2268 intel_gt_retire_requests(guc_to_gt(guc)); 2269 goto try_again; 2270 } 2271 2272 return ret; 2273 } 2274 2275 static void unpin_guc_id(struct intel_guc *guc, struct intel_context *ce) 2276 { 2277 unsigned long flags; 2278 2279 GEM_BUG_ON(atomic_read(&ce->guc_id.ref) < 0); 2280 GEM_BUG_ON(intel_context_is_child(ce)); 2281 2282 if (unlikely(context_guc_id_invalid(ce) || 2283 intel_context_is_parent(ce))) 2284 return; 2285 2286 spin_lock_irqsave(&guc->submission_state.lock, flags); 2287 if (!context_guc_id_invalid(ce) && list_empty(&ce->guc_id.link) && 2288 !atomic_read(&ce->guc_id.ref)) 2289 list_add_tail(&ce->guc_id.link, 2290 &guc->submission_state.guc_id_list); 2291 spin_unlock_irqrestore(&guc->submission_state.lock, flags); 2292 } 2293 2294 static int __guc_action_register_multi_lrc_v69(struct intel_guc *guc, 2295 struct intel_context *ce, 2296 u32 guc_id, 2297 u32 offset, 2298 bool loop) 2299 { 2300 struct intel_context *child; 2301 u32 action[4 + MAX_ENGINE_INSTANCE]; 2302 int len = 0; 2303 2304 GEM_BUG_ON(ce->parallel.number_children > MAX_ENGINE_INSTANCE); 2305 2306 action[len++] = INTEL_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC; 2307 action[len++] = guc_id; 2308 action[len++] = ce->parallel.number_children + 1; 2309 action[len++] = offset; 2310 for_each_child(ce, child) { 2311 offset += sizeof(struct guc_lrc_desc_v69); 2312 action[len++] = offset; 2313 } 2314 2315 return guc_submission_send_busy_loop(guc, action, len, 0, loop); 2316 } 2317 2318 static int __guc_action_register_multi_lrc_v70(struct intel_guc *guc, 2319 struct intel_context *ce, 2320 struct guc_ctxt_registration_info *info, 2321 bool loop) 2322 { 2323 struct intel_context *child; 2324 u32 action[13 + (MAX_ENGINE_INSTANCE * 2)]; 2325 int len = 0; 2326 u32 next_id; 2327 2328 GEM_BUG_ON(ce->parallel.number_children > MAX_ENGINE_INSTANCE); 2329 2330 action[len++] = INTEL_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC; 2331 action[len++] = info->flags; 2332 action[len++] = info->context_idx; 2333 action[len++] = info->engine_class; 2334 action[len++] = info->engine_submit_mask; 2335 action[len++] = info->wq_desc_lo; 2336 action[len++] = info->wq_desc_hi; 2337 action[len++] = info->wq_base_lo; 2338 action[len++] = info->wq_base_hi; 2339 action[len++] = info->wq_size; 2340 action[len++] = ce->parallel.number_children + 1; 2341 action[len++] = info->hwlrca_lo; 2342 action[len++] = info->hwlrca_hi; 2343 2344 next_id = info->context_idx + 1; 2345 for_each_child(ce, child) { 2346 GEM_BUG_ON(next_id++ != child->guc_id.id); 2347 2348 /* 2349 * NB: GuC interface supports 64 bit LRCA even though i915/HW 2350 * only supports 32 bit currently. 2351 */ 2352 action[len++] = lower_32_bits(child->lrc.lrca); 2353 action[len++] = upper_32_bits(child->lrc.lrca); 2354 } 2355 2356 GEM_BUG_ON(len > ARRAY_SIZE(action)); 2357 2358 return guc_submission_send_busy_loop(guc, action, len, 0, loop); 2359 } 2360 2361 static int __guc_action_register_context_v69(struct intel_guc *guc, 2362 u32 guc_id, 2363 u32 offset, 2364 bool loop) 2365 { 2366 u32 action[] = { 2367 INTEL_GUC_ACTION_REGISTER_CONTEXT, 2368 guc_id, 2369 offset, 2370 }; 2371 2372 return guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), 2373 0, loop); 2374 } 2375 2376 static int __guc_action_register_context_v70(struct intel_guc *guc, 2377 struct guc_ctxt_registration_info *info, 2378 bool loop) 2379 { 2380 u32 action[] = { 2381 INTEL_GUC_ACTION_REGISTER_CONTEXT, 2382 info->flags, 2383 info->context_idx, 2384 info->engine_class, 2385 info->engine_submit_mask, 2386 info->wq_desc_lo, 2387 info->wq_desc_hi, 2388 info->wq_base_lo, 2389 info->wq_base_hi, 2390 info->wq_size, 2391 info->hwlrca_lo, 2392 info->hwlrca_hi, 2393 }; 2394 2395 return guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), 2396 0, loop); 2397 } 2398 2399 static void prepare_context_registration_info_v69(struct intel_context *ce); 2400 static void prepare_context_registration_info_v70(struct intel_context *ce, 2401 struct guc_ctxt_registration_info *info); 2402 2403 static int 2404 register_context_v69(struct intel_guc *guc, struct intel_context *ce, bool loop) 2405 { 2406 u32 offset = intel_guc_ggtt_offset(guc, guc->lrc_desc_pool_v69) + 2407 ce->guc_id.id * sizeof(struct guc_lrc_desc_v69); 2408 2409 prepare_context_registration_info_v69(ce); 2410 2411 if (intel_context_is_parent(ce)) 2412 return __guc_action_register_multi_lrc_v69(guc, ce, ce->guc_id.id, 2413 offset, loop); 2414 else 2415 return __guc_action_register_context_v69(guc, ce->guc_id.id, 2416 offset, loop); 2417 } 2418 2419 static int 2420 register_context_v70(struct intel_guc *guc, struct intel_context *ce, bool loop) 2421 { 2422 struct guc_ctxt_registration_info info; 2423 2424 prepare_context_registration_info_v70(ce, &info); 2425 2426 if (intel_context_is_parent(ce)) 2427 return __guc_action_register_multi_lrc_v70(guc, ce, &info, loop); 2428 else 2429 return __guc_action_register_context_v70(guc, &info, loop); 2430 } 2431 2432 static int register_context(struct intel_context *ce, bool loop) 2433 { 2434 struct intel_guc *guc = ce_to_guc(ce); 2435 int ret; 2436 2437 GEM_BUG_ON(intel_context_is_child(ce)); 2438 trace_intel_context_register(ce); 2439 2440 if (GUC_SUBMIT_VER(guc) >= MAKE_GUC_VER(1, 0, 0)) 2441 ret = register_context_v70(guc, ce, loop); 2442 else 2443 ret = register_context_v69(guc, ce, loop); 2444 2445 if (likely(!ret)) { 2446 unsigned long flags; 2447 2448 spin_lock_irqsave(&ce->guc_state.lock, flags); 2449 set_context_registered(ce); 2450 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 2451 2452 if (GUC_SUBMIT_VER(guc) >= MAKE_GUC_VER(1, 0, 0)) 2453 guc_context_policy_init_v70(ce, loop); 2454 } 2455 2456 return ret; 2457 } 2458 2459 static int __guc_action_deregister_context(struct intel_guc *guc, 2460 u32 guc_id) 2461 { 2462 u32 action[] = { 2463 INTEL_GUC_ACTION_DEREGISTER_CONTEXT, 2464 guc_id, 2465 }; 2466 2467 return guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), 2468 G2H_LEN_DW_DEREGISTER_CONTEXT, 2469 true); 2470 } 2471 2472 static int deregister_context(struct intel_context *ce, u32 guc_id) 2473 { 2474 struct intel_guc *guc = ce_to_guc(ce); 2475 2476 GEM_BUG_ON(intel_context_is_child(ce)); 2477 trace_intel_context_deregister(ce); 2478 2479 return __guc_action_deregister_context(guc, guc_id); 2480 } 2481 2482 static inline void clear_children_join_go_memory(struct intel_context *ce) 2483 { 2484 struct parent_scratch *ps = __get_parent_scratch(ce); 2485 int i; 2486 2487 ps->go.semaphore = 0; 2488 for (i = 0; i < ce->parallel.number_children + 1; ++i) 2489 ps->join[i].semaphore = 0; 2490 } 2491 2492 static inline u32 get_children_go_value(struct intel_context *ce) 2493 { 2494 return __get_parent_scratch(ce)->go.semaphore; 2495 } 2496 2497 static inline u32 get_children_join_value(struct intel_context *ce, 2498 u8 child_index) 2499 { 2500 return __get_parent_scratch(ce)->join[child_index].semaphore; 2501 } 2502 2503 struct context_policy { 2504 u32 count; 2505 struct guc_update_context_policy h2g; 2506 }; 2507 2508 static u32 __guc_context_policy_action_size(struct context_policy *policy) 2509 { 2510 size_t bytes = sizeof(policy->h2g.header) + 2511 (sizeof(policy->h2g.klv[0]) * policy->count); 2512 2513 return bytes / sizeof(u32); 2514 } 2515 2516 static void __guc_context_policy_start_klv(struct context_policy *policy, u16 guc_id) 2517 { 2518 policy->h2g.header.action = INTEL_GUC_ACTION_HOST2GUC_UPDATE_CONTEXT_POLICIES; 2519 policy->h2g.header.ctx_id = guc_id; 2520 policy->count = 0; 2521 } 2522 2523 #define MAKE_CONTEXT_POLICY_ADD(func, id) \ 2524 static void __guc_context_policy_add_##func(struct context_policy *policy, u32 data) \ 2525 { \ 2526 GEM_BUG_ON(policy->count >= GUC_CONTEXT_POLICIES_KLV_NUM_IDS); \ 2527 policy->h2g.klv[policy->count].kl = \ 2528 FIELD_PREP(GUC_KLV_0_KEY, GUC_CONTEXT_POLICIES_KLV_ID_##id) | \ 2529 FIELD_PREP(GUC_KLV_0_LEN, 1); \ 2530 policy->h2g.klv[policy->count].value = data; \ 2531 policy->count++; \ 2532 } 2533 2534 MAKE_CONTEXT_POLICY_ADD(execution_quantum, EXECUTION_QUANTUM) 2535 MAKE_CONTEXT_POLICY_ADD(preemption_timeout, PREEMPTION_TIMEOUT) 2536 MAKE_CONTEXT_POLICY_ADD(priority, SCHEDULING_PRIORITY) 2537 MAKE_CONTEXT_POLICY_ADD(preempt_to_idle, PREEMPT_TO_IDLE_ON_QUANTUM_EXPIRY) 2538 2539 #undef MAKE_CONTEXT_POLICY_ADD 2540 2541 static int __guc_context_set_context_policies(struct intel_guc *guc, 2542 struct context_policy *policy, 2543 bool loop) 2544 { 2545 return guc_submission_send_busy_loop(guc, (u32 *)&policy->h2g, 2546 __guc_context_policy_action_size(policy), 2547 0, loop); 2548 } 2549 2550 static int guc_context_policy_init_v70(struct intel_context *ce, bool loop) 2551 { 2552 struct intel_engine_cs *engine = ce->engine; 2553 struct intel_guc *guc = &engine->gt->uc.guc; 2554 struct context_policy policy; 2555 u32 execution_quantum; 2556 u32 preemption_timeout; 2557 unsigned long flags; 2558 int ret; 2559 2560 /* NB: For both of these, zero means disabled. */ 2561 GEM_BUG_ON(overflows_type(engine->props.timeslice_duration_ms * 1000, 2562 execution_quantum)); 2563 GEM_BUG_ON(overflows_type(engine->props.preempt_timeout_ms * 1000, 2564 preemption_timeout)); 2565 execution_quantum = engine->props.timeslice_duration_ms * 1000; 2566 preemption_timeout = engine->props.preempt_timeout_ms * 1000; 2567 2568 __guc_context_policy_start_klv(&policy, ce->guc_id.id); 2569 2570 __guc_context_policy_add_priority(&policy, ce->guc_state.prio); 2571 __guc_context_policy_add_execution_quantum(&policy, execution_quantum); 2572 __guc_context_policy_add_preemption_timeout(&policy, preemption_timeout); 2573 2574 if (engine->flags & I915_ENGINE_WANT_FORCED_PREEMPTION) 2575 __guc_context_policy_add_preempt_to_idle(&policy, 1); 2576 2577 ret = __guc_context_set_context_policies(guc, &policy, loop); 2578 2579 spin_lock_irqsave(&ce->guc_state.lock, flags); 2580 if (ret != 0) 2581 set_context_policy_required(ce); 2582 else 2583 clr_context_policy_required(ce); 2584 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 2585 2586 return ret; 2587 } 2588 2589 static void guc_context_policy_init_v69(struct intel_engine_cs *engine, 2590 struct guc_lrc_desc_v69 *desc) 2591 { 2592 desc->policy_flags = 0; 2593 2594 if (engine->flags & I915_ENGINE_WANT_FORCED_PREEMPTION) 2595 desc->policy_flags |= CONTEXT_POLICY_FLAG_PREEMPT_TO_IDLE_V69; 2596 2597 /* NB: For both of these, zero means disabled. */ 2598 GEM_BUG_ON(overflows_type(engine->props.timeslice_duration_ms * 1000, 2599 desc->execution_quantum)); 2600 GEM_BUG_ON(overflows_type(engine->props.preempt_timeout_ms * 1000, 2601 desc->preemption_timeout)); 2602 desc->execution_quantum = engine->props.timeslice_duration_ms * 1000; 2603 desc->preemption_timeout = engine->props.preempt_timeout_ms * 1000; 2604 } 2605 2606 static u32 map_guc_prio_to_lrc_desc_prio(u8 prio) 2607 { 2608 /* 2609 * this matches the mapping we do in map_i915_prio_to_guc_prio() 2610 * (e.g. prio < I915_PRIORITY_NORMAL maps to GUC_CLIENT_PRIORITY_NORMAL) 2611 */ 2612 switch (prio) { 2613 default: 2614 MISSING_CASE(prio); 2615 fallthrough; 2616 case GUC_CLIENT_PRIORITY_KMD_NORMAL: 2617 return GEN12_CTX_PRIORITY_NORMAL; 2618 case GUC_CLIENT_PRIORITY_NORMAL: 2619 return GEN12_CTX_PRIORITY_LOW; 2620 case GUC_CLIENT_PRIORITY_HIGH: 2621 case GUC_CLIENT_PRIORITY_KMD_HIGH: 2622 return GEN12_CTX_PRIORITY_HIGH; 2623 } 2624 } 2625 2626 static void prepare_context_registration_info_v69(struct intel_context *ce) 2627 { 2628 struct intel_engine_cs *engine = ce->engine; 2629 struct intel_guc *guc = &engine->gt->uc.guc; 2630 u32 ctx_id = ce->guc_id.id; 2631 struct guc_lrc_desc_v69 *desc; 2632 struct intel_context *child; 2633 2634 GEM_BUG_ON(!engine->mask); 2635 2636 /* 2637 * Ensure LRC + CT vmas are is same region as write barrier is done 2638 * based on CT vma region. 2639 */ 2640 GEM_BUG_ON(i915_gem_object_is_lmem(guc->ct.vma->obj) != 2641 i915_gem_object_is_lmem(ce->ring->vma->obj)); 2642 2643 desc = __get_lrc_desc_v69(guc, ctx_id); 2644 GEM_BUG_ON(!desc); 2645 desc->engine_class = engine_class_to_guc_class(engine->class); 2646 desc->engine_submit_mask = engine->logical_mask; 2647 desc->hw_context_desc = ce->lrc.lrca; 2648 desc->priority = ce->guc_state.prio; 2649 desc->context_flags = CONTEXT_REGISTRATION_FLAG_KMD; 2650 guc_context_policy_init_v69(engine, desc); 2651 2652 /* 2653 * If context is a parent, we need to register a process descriptor 2654 * describing a work queue and register all child contexts. 2655 */ 2656 if (intel_context_is_parent(ce)) { 2657 struct guc_process_desc_v69 *pdesc; 2658 2659 ce->parallel.guc.wqi_tail = 0; 2660 ce->parallel.guc.wqi_head = 0; 2661 2662 desc->process_desc = i915_ggtt_offset(ce->state) + 2663 __get_parent_scratch_offset(ce); 2664 desc->wq_addr = i915_ggtt_offset(ce->state) + 2665 __get_wq_offset(ce); 2666 desc->wq_size = WQ_SIZE; 2667 2668 pdesc = __get_process_desc_v69(ce); 2669 memset(pdesc, 0, sizeof(*(pdesc))); 2670 pdesc->stage_id = ce->guc_id.id; 2671 pdesc->wq_base_addr = desc->wq_addr; 2672 pdesc->wq_size_bytes = desc->wq_size; 2673 pdesc->wq_status = WQ_STATUS_ACTIVE; 2674 2675 ce->parallel.guc.wq_head = &pdesc->head; 2676 ce->parallel.guc.wq_tail = &pdesc->tail; 2677 ce->parallel.guc.wq_status = &pdesc->wq_status; 2678 2679 for_each_child(ce, child) { 2680 desc = __get_lrc_desc_v69(guc, child->guc_id.id); 2681 2682 desc->engine_class = 2683 engine_class_to_guc_class(engine->class); 2684 desc->hw_context_desc = child->lrc.lrca; 2685 desc->priority = ce->guc_state.prio; 2686 desc->context_flags = CONTEXT_REGISTRATION_FLAG_KMD; 2687 guc_context_policy_init_v69(engine, desc); 2688 } 2689 2690 clear_children_join_go_memory(ce); 2691 } 2692 } 2693 2694 static void prepare_context_registration_info_v70(struct intel_context *ce, 2695 struct guc_ctxt_registration_info *info) 2696 { 2697 struct intel_engine_cs *engine = ce->engine; 2698 struct intel_guc *guc = &engine->gt->uc.guc; 2699 u32 ctx_id = ce->guc_id.id; 2700 2701 GEM_BUG_ON(!engine->mask); 2702 2703 /* 2704 * Ensure LRC + CT vmas are is same region as write barrier is done 2705 * based on CT vma region. 2706 */ 2707 GEM_BUG_ON(i915_gem_object_is_lmem(guc->ct.vma->obj) != 2708 i915_gem_object_is_lmem(ce->ring->vma->obj)); 2709 2710 memset(info, 0, sizeof(*info)); 2711 info->context_idx = ctx_id; 2712 info->engine_class = engine_class_to_guc_class(engine->class); 2713 info->engine_submit_mask = engine->logical_mask; 2714 /* 2715 * NB: GuC interface supports 64 bit LRCA even though i915/HW 2716 * only supports 32 bit currently. 2717 */ 2718 info->hwlrca_lo = lower_32_bits(ce->lrc.lrca); 2719 info->hwlrca_hi = upper_32_bits(ce->lrc.lrca); 2720 if (engine->flags & I915_ENGINE_HAS_EU_PRIORITY) 2721 info->hwlrca_lo |= map_guc_prio_to_lrc_desc_prio(ce->guc_state.prio); 2722 info->flags = CONTEXT_REGISTRATION_FLAG_KMD; 2723 2724 /* 2725 * If context is a parent, we need to register a process descriptor 2726 * describing a work queue and register all child contexts. 2727 */ 2728 if (intel_context_is_parent(ce)) { 2729 struct guc_sched_wq_desc *wq_desc; 2730 u64 wq_desc_offset, wq_base_offset; 2731 2732 ce->parallel.guc.wqi_tail = 0; 2733 ce->parallel.guc.wqi_head = 0; 2734 2735 wq_desc_offset = (u64)i915_ggtt_offset(ce->state) + 2736 __get_parent_scratch_offset(ce); 2737 wq_base_offset = (u64)i915_ggtt_offset(ce->state) + 2738 __get_wq_offset(ce); 2739 info->wq_desc_lo = lower_32_bits(wq_desc_offset); 2740 info->wq_desc_hi = upper_32_bits(wq_desc_offset); 2741 info->wq_base_lo = lower_32_bits(wq_base_offset); 2742 info->wq_base_hi = upper_32_bits(wq_base_offset); 2743 info->wq_size = WQ_SIZE; 2744 2745 wq_desc = __get_wq_desc_v70(ce); 2746 memset(wq_desc, 0, sizeof(*wq_desc)); 2747 wq_desc->wq_status = WQ_STATUS_ACTIVE; 2748 2749 ce->parallel.guc.wq_head = &wq_desc->head; 2750 ce->parallel.guc.wq_tail = &wq_desc->tail; 2751 ce->parallel.guc.wq_status = &wq_desc->wq_status; 2752 2753 clear_children_join_go_memory(ce); 2754 } 2755 } 2756 2757 static int try_context_registration(struct intel_context *ce, bool loop) 2758 { 2759 struct intel_engine_cs *engine = ce->engine; 2760 struct intel_runtime_pm *runtime_pm = engine->uncore->rpm; 2761 struct intel_guc *guc = &engine->gt->uc.guc; 2762 intel_wakeref_t wakeref; 2763 u32 ctx_id = ce->guc_id.id; 2764 bool context_registered; 2765 int ret = 0; 2766 2767 GEM_BUG_ON(!sched_state_is_init(ce)); 2768 2769 context_registered = ctx_id_mapped(guc, ctx_id); 2770 2771 clr_ctx_id_mapping(guc, ctx_id); 2772 set_ctx_id_mapping(guc, ctx_id, ce); 2773 2774 /* 2775 * The context_lookup xarray is used to determine if the hardware 2776 * context is currently registered. There are two cases in which it 2777 * could be registered either the guc_id has been stolen from another 2778 * context or the lrc descriptor address of this context has changed. In 2779 * either case the context needs to be deregistered with the GuC before 2780 * registering this context. 2781 */ 2782 if (context_registered) { 2783 bool disabled; 2784 unsigned long flags; 2785 2786 trace_intel_context_steal_guc_id(ce); 2787 GEM_BUG_ON(!loop); 2788 2789 /* Seal race with Reset */ 2790 spin_lock_irqsave(&ce->guc_state.lock, flags); 2791 disabled = submission_disabled(guc); 2792 if (likely(!disabled)) { 2793 set_context_wait_for_deregister_to_register(ce); 2794 intel_context_get(ce); 2795 } 2796 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 2797 if (unlikely(disabled)) { 2798 clr_ctx_id_mapping(guc, ctx_id); 2799 return 0; /* Will get registered later */ 2800 } 2801 2802 /* 2803 * If stealing the guc_id, this ce has the same guc_id as the 2804 * context whose guc_id was stolen. 2805 */ 2806 with_intel_runtime_pm(runtime_pm, wakeref) 2807 ret = deregister_context(ce, ce->guc_id.id); 2808 if (unlikely(ret == -ENODEV)) 2809 ret = 0; /* Will get registered later */ 2810 } else { 2811 with_intel_runtime_pm(runtime_pm, wakeref) 2812 ret = register_context(ce, loop); 2813 if (unlikely(ret == -EBUSY)) { 2814 clr_ctx_id_mapping(guc, ctx_id); 2815 } else if (unlikely(ret == -ENODEV)) { 2816 clr_ctx_id_mapping(guc, ctx_id); 2817 ret = 0; /* Will get registered later */ 2818 } 2819 } 2820 2821 return ret; 2822 } 2823 2824 static int __guc_context_pre_pin(struct intel_context *ce, 2825 struct intel_engine_cs *engine, 2826 struct i915_gem_ww_ctx *ww, 2827 void **vaddr) 2828 { 2829 return lrc_pre_pin(ce, engine, ww, vaddr); 2830 } 2831 2832 static int __guc_context_pin(struct intel_context *ce, 2833 struct intel_engine_cs *engine, 2834 void *vaddr) 2835 { 2836 if (i915_ggtt_offset(ce->state) != 2837 (ce->lrc.lrca & CTX_GTT_ADDRESS_MASK)) 2838 set_bit(CONTEXT_LRCA_DIRTY, &ce->flags); 2839 2840 /* 2841 * GuC context gets pinned in guc_request_alloc. See that function for 2842 * explaination of why. 2843 */ 2844 2845 return lrc_pin(ce, engine, vaddr); 2846 } 2847 2848 static int guc_context_pre_pin(struct intel_context *ce, 2849 struct i915_gem_ww_ctx *ww, 2850 void **vaddr) 2851 { 2852 return __guc_context_pre_pin(ce, ce->engine, ww, vaddr); 2853 } 2854 2855 static int guc_context_pin(struct intel_context *ce, void *vaddr) 2856 { 2857 int ret = __guc_context_pin(ce, ce->engine, vaddr); 2858 2859 if (likely(!ret && !intel_context_is_barrier(ce))) 2860 intel_engine_pm_get(ce->engine); 2861 2862 return ret; 2863 } 2864 2865 static void guc_context_unpin(struct intel_context *ce) 2866 { 2867 struct intel_guc *guc = ce_to_guc(ce); 2868 2869 __guc_context_update_stats(ce); 2870 unpin_guc_id(guc, ce); 2871 lrc_unpin(ce); 2872 2873 if (likely(!intel_context_is_barrier(ce))) 2874 intel_engine_pm_put_async(ce->engine); 2875 } 2876 2877 static void guc_context_post_unpin(struct intel_context *ce) 2878 { 2879 lrc_post_unpin(ce); 2880 } 2881 2882 static void __guc_context_sched_enable(struct intel_guc *guc, 2883 struct intel_context *ce) 2884 { 2885 u32 action[] = { 2886 INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET, 2887 ce->guc_id.id, 2888 GUC_CONTEXT_ENABLE 2889 }; 2890 2891 trace_intel_context_sched_enable(ce); 2892 2893 guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), 2894 G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, true); 2895 } 2896 2897 static void __guc_context_sched_disable(struct intel_guc *guc, 2898 struct intel_context *ce, 2899 u16 guc_id) 2900 { 2901 u32 action[] = { 2902 INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET, 2903 guc_id, /* ce->guc_id.id not stable */ 2904 GUC_CONTEXT_DISABLE 2905 }; 2906 2907 GEM_BUG_ON(guc_id == GUC_INVALID_CONTEXT_ID); 2908 2909 GEM_BUG_ON(intel_context_is_child(ce)); 2910 trace_intel_context_sched_disable(ce); 2911 2912 guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), 2913 G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, true); 2914 } 2915 2916 static void guc_blocked_fence_complete(struct intel_context *ce) 2917 { 2918 lockdep_assert_held(&ce->guc_state.lock); 2919 2920 if (!i915_sw_fence_done(&ce->guc_state.blocked)) 2921 i915_sw_fence_complete(&ce->guc_state.blocked); 2922 } 2923 2924 static void guc_blocked_fence_reinit(struct intel_context *ce) 2925 { 2926 lockdep_assert_held(&ce->guc_state.lock); 2927 GEM_BUG_ON(!i915_sw_fence_done(&ce->guc_state.blocked)); 2928 2929 /* 2930 * This fence is always complete unless a pending schedule disable is 2931 * outstanding. We arm the fence here and complete it when we receive 2932 * the pending schedule disable complete message. 2933 */ 2934 i915_sw_fence_fini(&ce->guc_state.blocked); 2935 i915_sw_fence_reinit(&ce->guc_state.blocked); 2936 i915_sw_fence_await(&ce->guc_state.blocked); 2937 i915_sw_fence_commit(&ce->guc_state.blocked); 2938 } 2939 2940 static u16 prep_context_pending_disable(struct intel_context *ce) 2941 { 2942 lockdep_assert_held(&ce->guc_state.lock); 2943 2944 set_context_pending_disable(ce); 2945 clr_context_enabled(ce); 2946 guc_blocked_fence_reinit(ce); 2947 intel_context_get(ce); 2948 2949 return ce->guc_id.id; 2950 } 2951 2952 static struct i915_sw_fence *guc_context_block(struct intel_context *ce) 2953 { 2954 struct intel_guc *guc = ce_to_guc(ce); 2955 unsigned long flags; 2956 struct intel_runtime_pm *runtime_pm = ce->engine->uncore->rpm; 2957 intel_wakeref_t wakeref; 2958 u16 guc_id; 2959 bool enabled; 2960 2961 GEM_BUG_ON(intel_context_is_child(ce)); 2962 2963 spin_lock_irqsave(&ce->guc_state.lock, flags); 2964 2965 incr_context_blocked(ce); 2966 2967 enabled = context_enabled(ce); 2968 if (unlikely(!enabled || submission_disabled(guc))) { 2969 if (enabled) 2970 clr_context_enabled(ce); 2971 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 2972 return &ce->guc_state.blocked; 2973 } 2974 2975 /* 2976 * We add +2 here as the schedule disable complete CTB handler calls 2977 * intel_context_sched_disable_unpin (-2 to pin_count). 2978 */ 2979 atomic_add(2, &ce->pin_count); 2980 2981 guc_id = prep_context_pending_disable(ce); 2982 2983 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 2984 2985 with_intel_runtime_pm(runtime_pm, wakeref) 2986 __guc_context_sched_disable(guc, ce, guc_id); 2987 2988 return &ce->guc_state.blocked; 2989 } 2990 2991 #define SCHED_STATE_MULTI_BLOCKED_MASK \ 2992 (SCHED_STATE_BLOCKED_MASK & ~SCHED_STATE_BLOCKED) 2993 #define SCHED_STATE_NO_UNBLOCK \ 2994 (SCHED_STATE_MULTI_BLOCKED_MASK | \ 2995 SCHED_STATE_PENDING_DISABLE | \ 2996 SCHED_STATE_BANNED) 2997 2998 static bool context_cant_unblock(struct intel_context *ce) 2999 { 3000 lockdep_assert_held(&ce->guc_state.lock); 3001 3002 return (ce->guc_state.sched_state & SCHED_STATE_NO_UNBLOCK) || 3003 context_guc_id_invalid(ce) || 3004 !ctx_id_mapped(ce_to_guc(ce), ce->guc_id.id) || 3005 !intel_context_is_pinned(ce); 3006 } 3007 3008 static void guc_context_unblock(struct intel_context *ce) 3009 { 3010 struct intel_guc *guc = ce_to_guc(ce); 3011 unsigned long flags; 3012 struct intel_runtime_pm *runtime_pm = ce->engine->uncore->rpm; 3013 intel_wakeref_t wakeref; 3014 bool enable; 3015 3016 GEM_BUG_ON(context_enabled(ce)); 3017 GEM_BUG_ON(intel_context_is_child(ce)); 3018 3019 spin_lock_irqsave(&ce->guc_state.lock, flags); 3020 3021 if (unlikely(submission_disabled(guc) || 3022 context_cant_unblock(ce))) { 3023 enable = false; 3024 } else { 3025 enable = true; 3026 set_context_pending_enable(ce); 3027 set_context_enabled(ce); 3028 intel_context_get(ce); 3029 } 3030 3031 decr_context_blocked(ce); 3032 3033 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3034 3035 if (enable) { 3036 with_intel_runtime_pm(runtime_pm, wakeref) 3037 __guc_context_sched_enable(guc, ce); 3038 } 3039 } 3040 3041 static void guc_context_cancel_request(struct intel_context *ce, 3042 struct i915_request *rq) 3043 { 3044 struct intel_context *block_context = 3045 request_to_scheduling_context(rq); 3046 3047 if (i915_sw_fence_signaled(&rq->submit)) { 3048 struct i915_sw_fence *fence; 3049 3050 intel_context_get(ce); 3051 fence = guc_context_block(block_context); 3052 i915_sw_fence_wait(fence); 3053 if (!i915_request_completed(rq)) { 3054 __i915_request_skip(rq); 3055 guc_reset_state(ce, intel_ring_wrap(ce->ring, rq->head), 3056 true); 3057 } 3058 3059 guc_context_unblock(block_context); 3060 intel_context_put(ce); 3061 } 3062 } 3063 3064 static void __guc_context_set_preemption_timeout(struct intel_guc *guc, 3065 u16 guc_id, 3066 u32 preemption_timeout) 3067 { 3068 if (GUC_SUBMIT_VER(guc) >= MAKE_GUC_VER(1, 0, 0)) { 3069 struct context_policy policy; 3070 3071 __guc_context_policy_start_klv(&policy, guc_id); 3072 __guc_context_policy_add_preemption_timeout(&policy, preemption_timeout); 3073 __guc_context_set_context_policies(guc, &policy, true); 3074 } else { 3075 u32 action[] = { 3076 INTEL_GUC_ACTION_V69_SET_CONTEXT_PREEMPTION_TIMEOUT, 3077 guc_id, 3078 preemption_timeout 3079 }; 3080 3081 intel_guc_send_busy_loop(guc, action, ARRAY_SIZE(action), 0, true); 3082 } 3083 } 3084 3085 static void 3086 guc_context_revoke(struct intel_context *ce, struct i915_request *rq, 3087 unsigned int preempt_timeout_ms) 3088 { 3089 struct intel_guc *guc = ce_to_guc(ce); 3090 struct intel_runtime_pm *runtime_pm = 3091 &ce->engine->gt->i915->runtime_pm; 3092 intel_wakeref_t wakeref; 3093 unsigned long flags; 3094 3095 GEM_BUG_ON(intel_context_is_child(ce)); 3096 3097 guc_flush_submissions(guc); 3098 3099 spin_lock_irqsave(&ce->guc_state.lock, flags); 3100 set_context_banned(ce); 3101 3102 if (submission_disabled(guc) || 3103 (!context_enabled(ce) && !context_pending_disable(ce))) { 3104 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3105 3106 guc_cancel_context_requests(ce); 3107 intel_engine_signal_breadcrumbs(ce->engine); 3108 } else if (!context_pending_disable(ce)) { 3109 u16 guc_id; 3110 3111 /* 3112 * We add +2 here as the schedule disable complete CTB handler 3113 * calls intel_context_sched_disable_unpin (-2 to pin_count). 3114 */ 3115 atomic_add(2, &ce->pin_count); 3116 3117 guc_id = prep_context_pending_disable(ce); 3118 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3119 3120 /* 3121 * In addition to disabling scheduling, set the preemption 3122 * timeout to the minimum value (1 us) so the banned context 3123 * gets kicked off the HW ASAP. 3124 */ 3125 with_intel_runtime_pm(runtime_pm, wakeref) { 3126 __guc_context_set_preemption_timeout(guc, guc_id, 3127 preempt_timeout_ms); 3128 __guc_context_sched_disable(guc, ce, guc_id); 3129 } 3130 } else { 3131 if (!context_guc_id_invalid(ce)) 3132 with_intel_runtime_pm(runtime_pm, wakeref) 3133 __guc_context_set_preemption_timeout(guc, 3134 ce->guc_id.id, 3135 preempt_timeout_ms); 3136 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3137 } 3138 } 3139 3140 static void do_sched_disable(struct intel_guc *guc, struct intel_context *ce, 3141 unsigned long flags) 3142 __releases(ce->guc_state.lock) 3143 { 3144 struct intel_runtime_pm *runtime_pm = &ce->engine->gt->i915->runtime_pm; 3145 intel_wakeref_t wakeref; 3146 u16 guc_id; 3147 3148 lockdep_assert_held(&ce->guc_state.lock); 3149 guc_id = prep_context_pending_disable(ce); 3150 3151 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3152 3153 with_intel_runtime_pm(runtime_pm, wakeref) 3154 __guc_context_sched_disable(guc, ce, guc_id); 3155 } 3156 3157 static bool bypass_sched_disable(struct intel_guc *guc, 3158 struct intel_context *ce) 3159 { 3160 lockdep_assert_held(&ce->guc_state.lock); 3161 GEM_BUG_ON(intel_context_is_child(ce)); 3162 3163 if (submission_disabled(guc) || context_guc_id_invalid(ce) || 3164 !ctx_id_mapped(guc, ce->guc_id.id)) { 3165 clr_context_enabled(ce); 3166 return true; 3167 } 3168 3169 return !context_enabled(ce); 3170 } 3171 3172 static void __delay_sched_disable(struct work_struct *wrk) 3173 { 3174 struct intel_context *ce = 3175 container_of(wrk, typeof(*ce), guc_state.sched_disable_delay_work.work); 3176 struct intel_guc *guc = ce_to_guc(ce); 3177 unsigned long flags; 3178 3179 spin_lock_irqsave(&ce->guc_state.lock, flags); 3180 3181 if (bypass_sched_disable(guc, ce)) { 3182 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3183 intel_context_sched_disable_unpin(ce); 3184 } else { 3185 do_sched_disable(guc, ce, flags); 3186 } 3187 } 3188 3189 static bool guc_id_pressure(struct intel_guc *guc, struct intel_context *ce) 3190 { 3191 /* 3192 * parent contexts are perma-pinned, if we are unpinning do schedule 3193 * disable immediately. 3194 */ 3195 if (intel_context_is_parent(ce)) 3196 return true; 3197 3198 /* 3199 * If we are beyond the threshold for avail guc_ids, do schedule disable immediately. 3200 */ 3201 return guc->submission_state.guc_ids_in_use > 3202 guc->submission_state.sched_disable_gucid_threshold; 3203 } 3204 3205 static void guc_context_sched_disable(struct intel_context *ce) 3206 { 3207 struct intel_guc *guc = ce_to_guc(ce); 3208 u64 delay = guc->submission_state.sched_disable_delay_ms; 3209 unsigned long flags; 3210 3211 spin_lock_irqsave(&ce->guc_state.lock, flags); 3212 3213 if (bypass_sched_disable(guc, ce)) { 3214 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3215 intel_context_sched_disable_unpin(ce); 3216 } else if (!intel_context_is_closed(ce) && !guc_id_pressure(guc, ce) && 3217 delay) { 3218 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3219 mod_delayed_work(system_unbound_wq, 3220 &ce->guc_state.sched_disable_delay_work, 3221 msecs_to_jiffies(delay)); 3222 } else { 3223 do_sched_disable(guc, ce, flags); 3224 } 3225 } 3226 3227 static void guc_context_close(struct intel_context *ce) 3228 { 3229 unsigned long flags; 3230 3231 if (test_bit(CONTEXT_GUC_INIT, &ce->flags) && 3232 cancel_delayed_work(&ce->guc_state.sched_disable_delay_work)) 3233 __delay_sched_disable(&ce->guc_state.sched_disable_delay_work.work); 3234 3235 spin_lock_irqsave(&ce->guc_state.lock, flags); 3236 set_context_close_done(ce); 3237 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3238 } 3239 3240 static inline void guc_lrc_desc_unpin(struct intel_context *ce) 3241 { 3242 struct intel_guc *guc = ce_to_guc(ce); 3243 struct intel_gt *gt = guc_to_gt(guc); 3244 unsigned long flags; 3245 bool disabled; 3246 3247 GEM_BUG_ON(!intel_gt_pm_is_awake(gt)); 3248 GEM_BUG_ON(!ctx_id_mapped(guc, ce->guc_id.id)); 3249 GEM_BUG_ON(ce != __get_context(guc, ce->guc_id.id)); 3250 GEM_BUG_ON(context_enabled(ce)); 3251 3252 /* Seal race with Reset */ 3253 spin_lock_irqsave(&ce->guc_state.lock, flags); 3254 disabled = submission_disabled(guc); 3255 if (likely(!disabled)) { 3256 __intel_gt_pm_get(gt); 3257 set_context_destroyed(ce); 3258 clr_context_registered(ce); 3259 } 3260 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3261 if (unlikely(disabled)) { 3262 release_guc_id(guc, ce); 3263 __guc_context_destroy(ce); 3264 return; 3265 } 3266 3267 deregister_context(ce, ce->guc_id.id); 3268 } 3269 3270 static void __guc_context_destroy(struct intel_context *ce) 3271 { 3272 GEM_BUG_ON(ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_KMD_HIGH] || 3273 ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_HIGH] || 3274 ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_KMD_NORMAL] || 3275 ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_NORMAL]); 3276 3277 lrc_fini(ce); 3278 intel_context_fini(ce); 3279 3280 if (intel_engine_is_virtual(ce->engine)) { 3281 struct guc_virtual_engine *ve = 3282 container_of(ce, typeof(*ve), context); 3283 3284 if (ve->base.breadcrumbs) 3285 intel_breadcrumbs_put(ve->base.breadcrumbs); 3286 3287 kfree(ve); 3288 } else { 3289 intel_context_free(ce); 3290 } 3291 } 3292 3293 static void guc_flush_destroyed_contexts(struct intel_guc *guc) 3294 { 3295 struct intel_context *ce; 3296 unsigned long flags; 3297 3298 GEM_BUG_ON(!submission_disabled(guc) && 3299 guc_submission_initialized(guc)); 3300 3301 while (!list_empty(&guc->submission_state.destroyed_contexts)) { 3302 spin_lock_irqsave(&guc->submission_state.lock, flags); 3303 ce = list_first_entry_or_null(&guc->submission_state.destroyed_contexts, 3304 struct intel_context, 3305 destroyed_link); 3306 if (ce) 3307 list_del_init(&ce->destroyed_link); 3308 spin_unlock_irqrestore(&guc->submission_state.lock, flags); 3309 3310 if (!ce) 3311 break; 3312 3313 release_guc_id(guc, ce); 3314 __guc_context_destroy(ce); 3315 } 3316 } 3317 3318 static void deregister_destroyed_contexts(struct intel_guc *guc) 3319 { 3320 struct intel_context *ce; 3321 unsigned long flags; 3322 3323 while (!list_empty(&guc->submission_state.destroyed_contexts)) { 3324 spin_lock_irqsave(&guc->submission_state.lock, flags); 3325 ce = list_first_entry_or_null(&guc->submission_state.destroyed_contexts, 3326 struct intel_context, 3327 destroyed_link); 3328 if (ce) 3329 list_del_init(&ce->destroyed_link); 3330 spin_unlock_irqrestore(&guc->submission_state.lock, flags); 3331 3332 if (!ce) 3333 break; 3334 3335 guc_lrc_desc_unpin(ce); 3336 } 3337 } 3338 3339 static void destroyed_worker_func(struct work_struct *w) 3340 { 3341 struct intel_guc *guc = container_of(w, struct intel_guc, 3342 submission_state.destroyed_worker); 3343 struct intel_gt *gt = guc_to_gt(guc); 3344 int tmp; 3345 3346 with_intel_gt_pm(gt, tmp) 3347 deregister_destroyed_contexts(guc); 3348 } 3349 3350 static void guc_context_destroy(struct kref *kref) 3351 { 3352 struct intel_context *ce = container_of(kref, typeof(*ce), ref); 3353 struct intel_guc *guc = ce_to_guc(ce); 3354 unsigned long flags; 3355 bool destroy; 3356 3357 /* 3358 * If the guc_id is invalid this context has been stolen and we can free 3359 * it immediately. Also can be freed immediately if the context is not 3360 * registered with the GuC or the GuC is in the middle of a reset. 3361 */ 3362 spin_lock_irqsave(&guc->submission_state.lock, flags); 3363 destroy = submission_disabled(guc) || context_guc_id_invalid(ce) || 3364 !ctx_id_mapped(guc, ce->guc_id.id); 3365 if (likely(!destroy)) { 3366 if (!list_empty(&ce->guc_id.link)) 3367 list_del_init(&ce->guc_id.link); 3368 list_add_tail(&ce->destroyed_link, 3369 &guc->submission_state.destroyed_contexts); 3370 } else { 3371 __release_guc_id(guc, ce); 3372 } 3373 spin_unlock_irqrestore(&guc->submission_state.lock, flags); 3374 if (unlikely(destroy)) { 3375 __guc_context_destroy(ce); 3376 return; 3377 } 3378 3379 /* 3380 * We use a worker to issue the H2G to deregister the context as we can 3381 * take the GT PM for the first time which isn't allowed from an atomic 3382 * context. 3383 */ 3384 queue_work(system_unbound_wq, &guc->submission_state.destroyed_worker); 3385 } 3386 3387 static int guc_context_alloc(struct intel_context *ce) 3388 { 3389 return lrc_alloc(ce, ce->engine); 3390 } 3391 3392 static void __guc_context_set_prio(struct intel_guc *guc, 3393 struct intel_context *ce) 3394 { 3395 if (GUC_SUBMIT_VER(guc) >= MAKE_GUC_VER(1, 0, 0)) { 3396 struct context_policy policy; 3397 3398 __guc_context_policy_start_klv(&policy, ce->guc_id.id); 3399 __guc_context_policy_add_priority(&policy, ce->guc_state.prio); 3400 __guc_context_set_context_policies(guc, &policy, true); 3401 } else { 3402 u32 action[] = { 3403 INTEL_GUC_ACTION_V69_SET_CONTEXT_PRIORITY, 3404 ce->guc_id.id, 3405 ce->guc_state.prio, 3406 }; 3407 3408 guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), 0, true); 3409 } 3410 } 3411 3412 static void guc_context_set_prio(struct intel_guc *guc, 3413 struct intel_context *ce, 3414 u8 prio) 3415 { 3416 GEM_BUG_ON(prio < GUC_CLIENT_PRIORITY_KMD_HIGH || 3417 prio > GUC_CLIENT_PRIORITY_NORMAL); 3418 lockdep_assert_held(&ce->guc_state.lock); 3419 3420 if (ce->guc_state.prio == prio || submission_disabled(guc) || 3421 !context_registered(ce)) { 3422 ce->guc_state.prio = prio; 3423 return; 3424 } 3425 3426 ce->guc_state.prio = prio; 3427 __guc_context_set_prio(guc, ce); 3428 3429 trace_intel_context_set_prio(ce); 3430 } 3431 3432 static inline u8 map_i915_prio_to_guc_prio(int prio) 3433 { 3434 if (prio == I915_PRIORITY_NORMAL) 3435 return GUC_CLIENT_PRIORITY_KMD_NORMAL; 3436 else if (prio < I915_PRIORITY_NORMAL) 3437 return GUC_CLIENT_PRIORITY_NORMAL; 3438 else if (prio < I915_PRIORITY_DISPLAY) 3439 return GUC_CLIENT_PRIORITY_HIGH; 3440 else 3441 return GUC_CLIENT_PRIORITY_KMD_HIGH; 3442 } 3443 3444 static inline void add_context_inflight_prio(struct intel_context *ce, 3445 u8 guc_prio) 3446 { 3447 lockdep_assert_held(&ce->guc_state.lock); 3448 GEM_BUG_ON(guc_prio >= ARRAY_SIZE(ce->guc_state.prio_count)); 3449 3450 ++ce->guc_state.prio_count[guc_prio]; 3451 3452 /* Overflow protection */ 3453 GEM_WARN_ON(!ce->guc_state.prio_count[guc_prio]); 3454 } 3455 3456 static inline void sub_context_inflight_prio(struct intel_context *ce, 3457 u8 guc_prio) 3458 { 3459 lockdep_assert_held(&ce->guc_state.lock); 3460 GEM_BUG_ON(guc_prio >= ARRAY_SIZE(ce->guc_state.prio_count)); 3461 3462 /* Underflow protection */ 3463 GEM_WARN_ON(!ce->guc_state.prio_count[guc_prio]); 3464 3465 --ce->guc_state.prio_count[guc_prio]; 3466 } 3467 3468 static inline void update_context_prio(struct intel_context *ce) 3469 { 3470 struct intel_guc *guc = &ce->engine->gt->uc.guc; 3471 int i; 3472 3473 BUILD_BUG_ON(GUC_CLIENT_PRIORITY_KMD_HIGH != 0); 3474 BUILD_BUG_ON(GUC_CLIENT_PRIORITY_KMD_HIGH > GUC_CLIENT_PRIORITY_NORMAL); 3475 3476 lockdep_assert_held(&ce->guc_state.lock); 3477 3478 for (i = 0; i < ARRAY_SIZE(ce->guc_state.prio_count); ++i) { 3479 if (ce->guc_state.prio_count[i]) { 3480 guc_context_set_prio(guc, ce, i); 3481 break; 3482 } 3483 } 3484 } 3485 3486 static inline bool new_guc_prio_higher(u8 old_guc_prio, u8 new_guc_prio) 3487 { 3488 /* Lower value is higher priority */ 3489 return new_guc_prio < old_guc_prio; 3490 } 3491 3492 static void add_to_context(struct i915_request *rq) 3493 { 3494 struct intel_context *ce = request_to_scheduling_context(rq); 3495 u8 new_guc_prio = map_i915_prio_to_guc_prio(rq_prio(rq)); 3496 3497 GEM_BUG_ON(intel_context_is_child(ce)); 3498 GEM_BUG_ON(rq->guc_prio == GUC_PRIO_FINI); 3499 3500 spin_lock(&ce->guc_state.lock); 3501 list_move_tail(&rq->sched.link, &ce->guc_state.requests); 3502 3503 if (rq->guc_prio == GUC_PRIO_INIT) { 3504 rq->guc_prio = new_guc_prio; 3505 add_context_inflight_prio(ce, rq->guc_prio); 3506 } else if (new_guc_prio_higher(rq->guc_prio, new_guc_prio)) { 3507 sub_context_inflight_prio(ce, rq->guc_prio); 3508 rq->guc_prio = new_guc_prio; 3509 add_context_inflight_prio(ce, rq->guc_prio); 3510 } 3511 update_context_prio(ce); 3512 3513 spin_unlock(&ce->guc_state.lock); 3514 } 3515 3516 static void guc_prio_fini(struct i915_request *rq, struct intel_context *ce) 3517 { 3518 lockdep_assert_held(&ce->guc_state.lock); 3519 3520 if (rq->guc_prio != GUC_PRIO_INIT && 3521 rq->guc_prio != GUC_PRIO_FINI) { 3522 sub_context_inflight_prio(ce, rq->guc_prio); 3523 update_context_prio(ce); 3524 } 3525 rq->guc_prio = GUC_PRIO_FINI; 3526 } 3527 3528 static void remove_from_context(struct i915_request *rq) 3529 { 3530 struct intel_context *ce = request_to_scheduling_context(rq); 3531 3532 GEM_BUG_ON(intel_context_is_child(ce)); 3533 3534 spin_lock_irq(&ce->guc_state.lock); 3535 3536 list_del_init(&rq->sched.link); 3537 clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); 3538 3539 /* Prevent further __await_execution() registering a cb, then flush */ 3540 set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags); 3541 3542 guc_prio_fini(rq, ce); 3543 3544 spin_unlock_irq(&ce->guc_state.lock); 3545 3546 atomic_dec(&ce->guc_id.ref); 3547 i915_request_notify_execute_cb_imm(rq); 3548 } 3549 3550 static const struct intel_context_ops guc_context_ops = { 3551 .flags = COPS_RUNTIME_CYCLES, 3552 .alloc = guc_context_alloc, 3553 3554 .close = guc_context_close, 3555 3556 .pre_pin = guc_context_pre_pin, 3557 .pin = guc_context_pin, 3558 .unpin = guc_context_unpin, 3559 .post_unpin = guc_context_post_unpin, 3560 3561 .revoke = guc_context_revoke, 3562 3563 .cancel_request = guc_context_cancel_request, 3564 3565 .enter = intel_context_enter_engine, 3566 .exit = intel_context_exit_engine, 3567 3568 .sched_disable = guc_context_sched_disable, 3569 3570 .update_stats = guc_context_update_stats, 3571 3572 .reset = lrc_reset, 3573 .destroy = guc_context_destroy, 3574 3575 .create_virtual = guc_create_virtual, 3576 .create_parallel = guc_create_parallel, 3577 }; 3578 3579 static void submit_work_cb(struct irq_work *wrk) 3580 { 3581 struct i915_request *rq = container_of(wrk, typeof(*rq), submit_work); 3582 3583 might_lock(&rq->engine->sched_engine->lock); 3584 i915_sw_fence_complete(&rq->submit); 3585 } 3586 3587 static void __guc_signal_context_fence(struct intel_context *ce) 3588 { 3589 struct i915_request *rq, *rn; 3590 3591 lockdep_assert_held(&ce->guc_state.lock); 3592 3593 if (!list_empty(&ce->guc_state.fences)) 3594 trace_intel_context_fence_release(ce); 3595 3596 /* 3597 * Use an IRQ to ensure locking order of sched_engine->lock -> 3598 * ce->guc_state.lock is preserved. 3599 */ 3600 list_for_each_entry_safe(rq, rn, &ce->guc_state.fences, 3601 guc_fence_link) { 3602 list_del(&rq->guc_fence_link); 3603 irq_work_queue(&rq->submit_work); 3604 } 3605 3606 INIT_LIST_HEAD(&ce->guc_state.fences); 3607 } 3608 3609 static void guc_signal_context_fence(struct intel_context *ce) 3610 { 3611 unsigned long flags; 3612 3613 GEM_BUG_ON(intel_context_is_child(ce)); 3614 3615 spin_lock_irqsave(&ce->guc_state.lock, flags); 3616 clr_context_wait_for_deregister_to_register(ce); 3617 __guc_signal_context_fence(ce); 3618 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3619 } 3620 3621 static bool context_needs_register(struct intel_context *ce, bool new_guc_id) 3622 { 3623 return (new_guc_id || test_bit(CONTEXT_LRCA_DIRTY, &ce->flags) || 3624 !ctx_id_mapped(ce_to_guc(ce), ce->guc_id.id)) && 3625 !submission_disabled(ce_to_guc(ce)); 3626 } 3627 3628 static void guc_context_init(struct intel_context *ce) 3629 { 3630 const struct i915_gem_context *ctx; 3631 int prio = I915_CONTEXT_DEFAULT_PRIORITY; 3632 3633 rcu_read_lock(); 3634 ctx = rcu_dereference(ce->gem_context); 3635 if (ctx) 3636 prio = ctx->sched.priority; 3637 rcu_read_unlock(); 3638 3639 ce->guc_state.prio = map_i915_prio_to_guc_prio(prio); 3640 3641 INIT_DELAYED_WORK(&ce->guc_state.sched_disable_delay_work, 3642 __delay_sched_disable); 3643 3644 set_bit(CONTEXT_GUC_INIT, &ce->flags); 3645 } 3646 3647 static int guc_request_alloc(struct i915_request *rq) 3648 { 3649 struct intel_context *ce = request_to_scheduling_context(rq); 3650 struct intel_guc *guc = ce_to_guc(ce); 3651 unsigned long flags; 3652 int ret; 3653 3654 GEM_BUG_ON(!intel_context_is_pinned(rq->context)); 3655 3656 /* 3657 * Flush enough space to reduce the likelihood of waiting after 3658 * we start building the request - in which case we will just 3659 * have to repeat work. 3660 */ 3661 rq->reserved_space += GUC_REQUEST_SIZE; 3662 3663 /* 3664 * Note that after this point, we have committed to using 3665 * this request as it is being used to both track the 3666 * state of engine initialisation and liveness of the 3667 * golden renderstate above. Think twice before you try 3668 * to cancel/unwind this request now. 3669 */ 3670 3671 /* Unconditionally invalidate GPU caches and TLBs. */ 3672 ret = rq->engine->emit_flush(rq, EMIT_INVALIDATE); 3673 if (ret) 3674 return ret; 3675 3676 rq->reserved_space -= GUC_REQUEST_SIZE; 3677 3678 if (unlikely(!test_bit(CONTEXT_GUC_INIT, &ce->flags))) 3679 guc_context_init(ce); 3680 3681 /* 3682 * If the context gets closed while the execbuf is ongoing, the context 3683 * close code will race with the below code to cancel the delayed work. 3684 * If the context close wins the race and cancels the work, it will 3685 * immediately call the sched disable (see guc_context_close), so there 3686 * is a chance we can get past this check while the sched_disable code 3687 * is being executed. To make sure that code completes before we check 3688 * the status further down, we wait for the close process to complete. 3689 * Else, this code path could send a request down thinking that the 3690 * context is still in a schedule-enable mode while the GuC ends up 3691 * dropping the request completely because the disable did go from the 3692 * context_close path right to GuC just prior. In the event the CT is 3693 * full, we could potentially need to wait up to 1.5 seconds. 3694 */ 3695 if (cancel_delayed_work_sync(&ce->guc_state.sched_disable_delay_work)) 3696 intel_context_sched_disable_unpin(ce); 3697 else if (intel_context_is_closed(ce)) 3698 if (wait_for(context_close_done(ce), 1500)) 3699 guc_warn(guc, "timed out waiting on context sched close before realloc\n"); 3700 /* 3701 * Call pin_guc_id here rather than in the pinning step as with 3702 * dma_resv, contexts can be repeatedly pinned / unpinned trashing the 3703 * guc_id and creating horrible race conditions. This is especially bad 3704 * when guc_id are being stolen due to over subscription. By the time 3705 * this function is reached, it is guaranteed that the guc_id will be 3706 * persistent until the generated request is retired. Thus, sealing these 3707 * race conditions. It is still safe to fail here if guc_id are 3708 * exhausted and return -EAGAIN to the user indicating that they can try 3709 * again in the future. 3710 * 3711 * There is no need for a lock here as the timeline mutex ensures at 3712 * most one context can be executing this code path at once. The 3713 * guc_id_ref is incremented once for every request in flight and 3714 * decremented on each retire. When it is zero, a lock around the 3715 * increment (in pin_guc_id) is needed to seal a race with unpin_guc_id. 3716 */ 3717 if (atomic_add_unless(&ce->guc_id.ref, 1, 0)) 3718 goto out; 3719 3720 ret = pin_guc_id(guc, ce); /* returns 1 if new guc_id assigned */ 3721 if (unlikely(ret < 0)) 3722 return ret; 3723 if (context_needs_register(ce, !!ret)) { 3724 ret = try_context_registration(ce, true); 3725 if (unlikely(ret)) { /* unwind */ 3726 if (ret == -EPIPE) { 3727 disable_submission(guc); 3728 goto out; /* GPU will be reset */ 3729 } 3730 atomic_dec(&ce->guc_id.ref); 3731 unpin_guc_id(guc, ce); 3732 return ret; 3733 } 3734 } 3735 3736 clear_bit(CONTEXT_LRCA_DIRTY, &ce->flags); 3737 3738 out: 3739 /* 3740 * We block all requests on this context if a G2H is pending for a 3741 * schedule disable or context deregistration as the GuC will fail a 3742 * schedule enable or context registration if either G2H is pending 3743 * respectfully. Once a G2H returns, the fence is released that is 3744 * blocking these requests (see guc_signal_context_fence). 3745 */ 3746 spin_lock_irqsave(&ce->guc_state.lock, flags); 3747 if (context_wait_for_deregister_to_register(ce) || 3748 context_pending_disable(ce)) { 3749 init_irq_work(&rq->submit_work, submit_work_cb); 3750 i915_sw_fence_await(&rq->submit); 3751 3752 list_add_tail(&rq->guc_fence_link, &ce->guc_state.fences); 3753 } 3754 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 3755 3756 return 0; 3757 } 3758 3759 static int guc_virtual_context_pre_pin(struct intel_context *ce, 3760 struct i915_gem_ww_ctx *ww, 3761 void **vaddr) 3762 { 3763 struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0); 3764 3765 return __guc_context_pre_pin(ce, engine, ww, vaddr); 3766 } 3767 3768 static int guc_virtual_context_pin(struct intel_context *ce, void *vaddr) 3769 { 3770 struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0); 3771 int ret = __guc_context_pin(ce, engine, vaddr); 3772 intel_engine_mask_t tmp, mask = ce->engine->mask; 3773 3774 if (likely(!ret)) 3775 for_each_engine_masked(engine, ce->engine->gt, mask, tmp) 3776 intel_engine_pm_get(engine); 3777 3778 return ret; 3779 } 3780 3781 static void guc_virtual_context_unpin(struct intel_context *ce) 3782 { 3783 intel_engine_mask_t tmp, mask = ce->engine->mask; 3784 struct intel_engine_cs *engine; 3785 struct intel_guc *guc = ce_to_guc(ce); 3786 3787 GEM_BUG_ON(context_enabled(ce)); 3788 GEM_BUG_ON(intel_context_is_barrier(ce)); 3789 3790 unpin_guc_id(guc, ce); 3791 lrc_unpin(ce); 3792 3793 for_each_engine_masked(engine, ce->engine->gt, mask, tmp) 3794 intel_engine_pm_put_async(engine); 3795 } 3796 3797 static void guc_virtual_context_enter(struct intel_context *ce) 3798 { 3799 intel_engine_mask_t tmp, mask = ce->engine->mask; 3800 struct intel_engine_cs *engine; 3801 3802 for_each_engine_masked(engine, ce->engine->gt, mask, tmp) 3803 intel_engine_pm_get(engine); 3804 3805 intel_timeline_enter(ce->timeline); 3806 } 3807 3808 static void guc_virtual_context_exit(struct intel_context *ce) 3809 { 3810 intel_engine_mask_t tmp, mask = ce->engine->mask; 3811 struct intel_engine_cs *engine; 3812 3813 for_each_engine_masked(engine, ce->engine->gt, mask, tmp) 3814 intel_engine_pm_put(engine); 3815 3816 intel_timeline_exit(ce->timeline); 3817 } 3818 3819 static int guc_virtual_context_alloc(struct intel_context *ce) 3820 { 3821 struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0); 3822 3823 return lrc_alloc(ce, engine); 3824 } 3825 3826 static const struct intel_context_ops virtual_guc_context_ops = { 3827 .flags = COPS_RUNTIME_CYCLES, 3828 .alloc = guc_virtual_context_alloc, 3829 3830 .close = guc_context_close, 3831 3832 .pre_pin = guc_virtual_context_pre_pin, 3833 .pin = guc_virtual_context_pin, 3834 .unpin = guc_virtual_context_unpin, 3835 .post_unpin = guc_context_post_unpin, 3836 3837 .revoke = guc_context_revoke, 3838 3839 .cancel_request = guc_context_cancel_request, 3840 3841 .enter = guc_virtual_context_enter, 3842 .exit = guc_virtual_context_exit, 3843 3844 .sched_disable = guc_context_sched_disable, 3845 .update_stats = guc_context_update_stats, 3846 3847 .destroy = guc_context_destroy, 3848 3849 .get_sibling = guc_virtual_get_sibling, 3850 }; 3851 3852 static int guc_parent_context_pin(struct intel_context *ce, void *vaddr) 3853 { 3854 struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0); 3855 struct intel_guc *guc = ce_to_guc(ce); 3856 int ret; 3857 3858 GEM_BUG_ON(!intel_context_is_parent(ce)); 3859 GEM_BUG_ON(!intel_engine_is_virtual(ce->engine)); 3860 3861 ret = pin_guc_id(guc, ce); 3862 if (unlikely(ret < 0)) 3863 return ret; 3864 3865 return __guc_context_pin(ce, engine, vaddr); 3866 } 3867 3868 static int guc_child_context_pin(struct intel_context *ce, void *vaddr) 3869 { 3870 struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0); 3871 3872 GEM_BUG_ON(!intel_context_is_child(ce)); 3873 GEM_BUG_ON(!intel_engine_is_virtual(ce->engine)); 3874 3875 __intel_context_pin(ce->parallel.parent); 3876 return __guc_context_pin(ce, engine, vaddr); 3877 } 3878 3879 static void guc_parent_context_unpin(struct intel_context *ce) 3880 { 3881 struct intel_guc *guc = ce_to_guc(ce); 3882 3883 GEM_BUG_ON(context_enabled(ce)); 3884 GEM_BUG_ON(intel_context_is_barrier(ce)); 3885 GEM_BUG_ON(!intel_context_is_parent(ce)); 3886 GEM_BUG_ON(!intel_engine_is_virtual(ce->engine)); 3887 3888 unpin_guc_id(guc, ce); 3889 lrc_unpin(ce); 3890 } 3891 3892 static void guc_child_context_unpin(struct intel_context *ce) 3893 { 3894 GEM_BUG_ON(context_enabled(ce)); 3895 GEM_BUG_ON(intel_context_is_barrier(ce)); 3896 GEM_BUG_ON(!intel_context_is_child(ce)); 3897 GEM_BUG_ON(!intel_engine_is_virtual(ce->engine)); 3898 3899 lrc_unpin(ce); 3900 } 3901 3902 static void guc_child_context_post_unpin(struct intel_context *ce) 3903 { 3904 GEM_BUG_ON(!intel_context_is_child(ce)); 3905 GEM_BUG_ON(!intel_context_is_pinned(ce->parallel.parent)); 3906 GEM_BUG_ON(!intel_engine_is_virtual(ce->engine)); 3907 3908 lrc_post_unpin(ce); 3909 intel_context_unpin(ce->parallel.parent); 3910 } 3911 3912 static void guc_child_context_destroy(struct kref *kref) 3913 { 3914 struct intel_context *ce = container_of(kref, typeof(*ce), ref); 3915 3916 __guc_context_destroy(ce); 3917 } 3918 3919 static const struct intel_context_ops virtual_parent_context_ops = { 3920 .alloc = guc_virtual_context_alloc, 3921 3922 .close = guc_context_close, 3923 3924 .pre_pin = guc_context_pre_pin, 3925 .pin = guc_parent_context_pin, 3926 .unpin = guc_parent_context_unpin, 3927 .post_unpin = guc_context_post_unpin, 3928 3929 .revoke = guc_context_revoke, 3930 3931 .cancel_request = guc_context_cancel_request, 3932 3933 .enter = guc_virtual_context_enter, 3934 .exit = guc_virtual_context_exit, 3935 3936 .sched_disable = guc_context_sched_disable, 3937 3938 .destroy = guc_context_destroy, 3939 3940 .get_sibling = guc_virtual_get_sibling, 3941 }; 3942 3943 static const struct intel_context_ops virtual_child_context_ops = { 3944 .alloc = guc_virtual_context_alloc, 3945 3946 .pre_pin = guc_context_pre_pin, 3947 .pin = guc_child_context_pin, 3948 .unpin = guc_child_context_unpin, 3949 .post_unpin = guc_child_context_post_unpin, 3950 3951 .cancel_request = guc_context_cancel_request, 3952 3953 .enter = guc_virtual_context_enter, 3954 .exit = guc_virtual_context_exit, 3955 3956 .destroy = guc_child_context_destroy, 3957 3958 .get_sibling = guc_virtual_get_sibling, 3959 }; 3960 3961 /* 3962 * The below override of the breadcrumbs is enabled when the user configures a 3963 * context for parallel submission (multi-lrc, parent-child). 3964 * 3965 * The overridden breadcrumbs implements an algorithm which allows the GuC to 3966 * safely preempt all the hw contexts configured for parallel submission 3967 * between each BB. The contract between the i915 and GuC is if the parent 3968 * context can be preempted, all the children can be preempted, and the GuC will 3969 * always try to preempt the parent before the children. A handshake between the 3970 * parent / children breadcrumbs ensures the i915 holds up its end of the deal 3971 * creating a window to preempt between each set of BBs. 3972 */ 3973 static int emit_bb_start_parent_no_preempt_mid_batch(struct i915_request *rq, 3974 u64 offset, u32 len, 3975 const unsigned int flags); 3976 static int emit_bb_start_child_no_preempt_mid_batch(struct i915_request *rq, 3977 u64 offset, u32 len, 3978 const unsigned int flags); 3979 static u32 * 3980 emit_fini_breadcrumb_parent_no_preempt_mid_batch(struct i915_request *rq, 3981 u32 *cs); 3982 static u32 * 3983 emit_fini_breadcrumb_child_no_preempt_mid_batch(struct i915_request *rq, 3984 u32 *cs); 3985 3986 static struct intel_context * 3987 guc_create_parallel(struct intel_engine_cs **engines, 3988 unsigned int num_siblings, 3989 unsigned int width) 3990 { 3991 struct intel_engine_cs **siblings = NULL; 3992 struct intel_context *parent = NULL, *ce, *err; 3993 int i, j; 3994 3995 siblings = kmalloc_array(num_siblings, 3996 sizeof(*siblings), 3997 GFP_KERNEL); 3998 if (!siblings) 3999 return ERR_PTR(-ENOMEM); 4000 4001 for (i = 0; i < width; ++i) { 4002 for (j = 0; j < num_siblings; ++j) 4003 siblings[j] = engines[i * num_siblings + j]; 4004 4005 ce = intel_engine_create_virtual(siblings, num_siblings, 4006 FORCE_VIRTUAL); 4007 if (IS_ERR(ce)) { 4008 err = ERR_CAST(ce); 4009 goto unwind; 4010 } 4011 4012 if (i == 0) { 4013 parent = ce; 4014 parent->ops = &virtual_parent_context_ops; 4015 } else { 4016 ce->ops = &virtual_child_context_ops; 4017 intel_context_bind_parent_child(parent, ce); 4018 } 4019 } 4020 4021 parent->parallel.fence_context = dma_fence_context_alloc(1); 4022 4023 parent->engine->emit_bb_start = 4024 emit_bb_start_parent_no_preempt_mid_batch; 4025 parent->engine->emit_fini_breadcrumb = 4026 emit_fini_breadcrumb_parent_no_preempt_mid_batch; 4027 parent->engine->emit_fini_breadcrumb_dw = 4028 12 + 4 * parent->parallel.number_children; 4029 for_each_child(parent, ce) { 4030 ce->engine->emit_bb_start = 4031 emit_bb_start_child_no_preempt_mid_batch; 4032 ce->engine->emit_fini_breadcrumb = 4033 emit_fini_breadcrumb_child_no_preempt_mid_batch; 4034 ce->engine->emit_fini_breadcrumb_dw = 16; 4035 } 4036 4037 kfree(siblings); 4038 return parent; 4039 4040 unwind: 4041 if (parent) 4042 intel_context_put(parent); 4043 kfree(siblings); 4044 return err; 4045 } 4046 4047 static bool 4048 guc_irq_enable_breadcrumbs(struct intel_breadcrumbs *b) 4049 { 4050 struct intel_engine_cs *sibling; 4051 intel_engine_mask_t tmp, mask = b->engine_mask; 4052 bool result = false; 4053 4054 for_each_engine_masked(sibling, b->irq_engine->gt, mask, tmp) 4055 result |= intel_engine_irq_enable(sibling); 4056 4057 return result; 4058 } 4059 4060 static void 4061 guc_irq_disable_breadcrumbs(struct intel_breadcrumbs *b) 4062 { 4063 struct intel_engine_cs *sibling; 4064 intel_engine_mask_t tmp, mask = b->engine_mask; 4065 4066 for_each_engine_masked(sibling, b->irq_engine->gt, mask, tmp) 4067 intel_engine_irq_disable(sibling); 4068 } 4069 4070 static void guc_init_breadcrumbs(struct intel_engine_cs *engine) 4071 { 4072 int i; 4073 4074 /* 4075 * In GuC submission mode we do not know which physical engine a request 4076 * will be scheduled on, this creates a problem because the breadcrumb 4077 * interrupt is per physical engine. To work around this we attach 4078 * requests and direct all breadcrumb interrupts to the first instance 4079 * of an engine per class. In addition all breadcrumb interrupts are 4080 * enabled / disabled across an engine class in unison. 4081 */ 4082 for (i = 0; i < MAX_ENGINE_INSTANCE; ++i) { 4083 struct intel_engine_cs *sibling = 4084 engine->gt->engine_class[engine->class][i]; 4085 4086 if (sibling) { 4087 if (engine->breadcrumbs != sibling->breadcrumbs) { 4088 intel_breadcrumbs_put(engine->breadcrumbs); 4089 engine->breadcrumbs = 4090 intel_breadcrumbs_get(sibling->breadcrumbs); 4091 } 4092 break; 4093 } 4094 } 4095 4096 if (engine->breadcrumbs) { 4097 engine->breadcrumbs->engine_mask |= engine->mask; 4098 engine->breadcrumbs->irq_enable = guc_irq_enable_breadcrumbs; 4099 engine->breadcrumbs->irq_disable = guc_irq_disable_breadcrumbs; 4100 } 4101 } 4102 4103 static void guc_bump_inflight_request_prio(struct i915_request *rq, 4104 int prio) 4105 { 4106 struct intel_context *ce = request_to_scheduling_context(rq); 4107 u8 new_guc_prio = map_i915_prio_to_guc_prio(prio); 4108 4109 /* Short circuit function */ 4110 if (prio < I915_PRIORITY_NORMAL || 4111 rq->guc_prio == GUC_PRIO_FINI || 4112 (rq->guc_prio != GUC_PRIO_INIT && 4113 !new_guc_prio_higher(rq->guc_prio, new_guc_prio))) 4114 return; 4115 4116 spin_lock(&ce->guc_state.lock); 4117 if (rq->guc_prio != GUC_PRIO_FINI) { 4118 if (rq->guc_prio != GUC_PRIO_INIT) 4119 sub_context_inflight_prio(ce, rq->guc_prio); 4120 rq->guc_prio = new_guc_prio; 4121 add_context_inflight_prio(ce, rq->guc_prio); 4122 update_context_prio(ce); 4123 } 4124 spin_unlock(&ce->guc_state.lock); 4125 } 4126 4127 static void guc_retire_inflight_request_prio(struct i915_request *rq) 4128 { 4129 struct intel_context *ce = request_to_scheduling_context(rq); 4130 4131 spin_lock(&ce->guc_state.lock); 4132 guc_prio_fini(rq, ce); 4133 spin_unlock(&ce->guc_state.lock); 4134 } 4135 4136 static void sanitize_hwsp(struct intel_engine_cs *engine) 4137 { 4138 struct intel_timeline *tl; 4139 4140 list_for_each_entry(tl, &engine->status_page.timelines, engine_link) 4141 intel_timeline_reset_seqno(tl); 4142 } 4143 4144 static void guc_sanitize(struct intel_engine_cs *engine) 4145 { 4146 /* 4147 * Poison residual state on resume, in case the suspend didn't! 4148 * 4149 * We have to assume that across suspend/resume (or other loss 4150 * of control) that the contents of our pinned buffers has been 4151 * lost, replaced by garbage. Since this doesn't always happen, 4152 * let's poison such state so that we more quickly spot when 4153 * we falsely assume it has been preserved. 4154 */ 4155 if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) 4156 memset(engine->status_page.addr, POISON_INUSE, PAGE_SIZE); 4157 4158 /* 4159 * The kernel_context HWSP is stored in the status_page. As above, 4160 * that may be lost on resume/initialisation, and so we need to 4161 * reset the value in the HWSP. 4162 */ 4163 sanitize_hwsp(engine); 4164 4165 /* And scrub the dirty cachelines for the HWSP */ 4166 drm_clflush_virt_range(engine->status_page.addr, PAGE_SIZE); 4167 4168 intel_engine_reset_pinned_contexts(engine); 4169 } 4170 4171 static void setup_hwsp(struct intel_engine_cs *engine) 4172 { 4173 intel_engine_set_hwsp_writemask(engine, ~0u); /* HWSTAM */ 4174 4175 ENGINE_WRITE_FW(engine, 4176 RING_HWS_PGA, 4177 i915_ggtt_offset(engine->status_page.vma)); 4178 } 4179 4180 static void start_engine(struct intel_engine_cs *engine) 4181 { 4182 ENGINE_WRITE_FW(engine, 4183 RING_MODE_GEN7, 4184 _MASKED_BIT_ENABLE(GEN11_GFX_DISABLE_LEGACY_MODE)); 4185 4186 ENGINE_WRITE_FW(engine, RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING)); 4187 ENGINE_POSTING_READ(engine, RING_MI_MODE); 4188 } 4189 4190 static int guc_resume(struct intel_engine_cs *engine) 4191 { 4192 assert_forcewakes_active(engine->uncore, FORCEWAKE_ALL); 4193 4194 intel_mocs_init_engine(engine); 4195 4196 intel_breadcrumbs_reset(engine->breadcrumbs); 4197 4198 setup_hwsp(engine); 4199 start_engine(engine); 4200 4201 if (engine->flags & I915_ENGINE_FIRST_RENDER_COMPUTE) 4202 xehp_enable_ccs_engines(engine); 4203 4204 return 0; 4205 } 4206 4207 static bool guc_sched_engine_disabled(struct i915_sched_engine *sched_engine) 4208 { 4209 return !sched_engine->tasklet.callback; 4210 } 4211 4212 static void guc_set_default_submission(struct intel_engine_cs *engine) 4213 { 4214 engine->submit_request = guc_submit_request; 4215 } 4216 4217 static inline int guc_kernel_context_pin(struct intel_guc *guc, 4218 struct intel_context *ce) 4219 { 4220 int ret; 4221 4222 /* 4223 * Note: we purposefully do not check the returns below because 4224 * the registration can only fail if a reset is just starting. 4225 * This is called at the end of reset so presumably another reset 4226 * isn't happening and even it did this code would be run again. 4227 */ 4228 4229 if (context_guc_id_invalid(ce)) { 4230 ret = pin_guc_id(guc, ce); 4231 4232 if (ret < 0) 4233 return ret; 4234 } 4235 4236 if (!test_bit(CONTEXT_GUC_INIT, &ce->flags)) 4237 guc_context_init(ce); 4238 4239 ret = try_context_registration(ce, true); 4240 if (ret) 4241 unpin_guc_id(guc, ce); 4242 4243 return ret; 4244 } 4245 4246 static inline int guc_init_submission(struct intel_guc *guc) 4247 { 4248 struct intel_gt *gt = guc_to_gt(guc); 4249 struct intel_engine_cs *engine; 4250 enum intel_engine_id id; 4251 4252 /* make sure all descriptors are clean... */ 4253 xa_destroy(&guc->context_lookup); 4254 4255 /* 4256 * A reset might have occurred while we had a pending stalled request, 4257 * so make sure we clean that up. 4258 */ 4259 guc->stalled_request = NULL; 4260 guc->submission_stall_reason = STALL_NONE; 4261 4262 /* 4263 * Some contexts might have been pinned before we enabled GuC 4264 * submission, so we need to add them to the GuC bookeeping. 4265 * Also, after a reset the of the GuC we want to make sure that the 4266 * information shared with GuC is properly reset. The kernel LRCs are 4267 * not attached to the gem_context, so they need to be added separately. 4268 */ 4269 for_each_engine(engine, gt, id) { 4270 struct intel_context *ce; 4271 4272 list_for_each_entry(ce, &engine->pinned_contexts_list, 4273 pinned_contexts_link) { 4274 int ret = guc_kernel_context_pin(guc, ce); 4275 4276 if (ret) { 4277 /* No point in trying to clean up as i915 will wedge on failure */ 4278 return ret; 4279 } 4280 } 4281 } 4282 4283 return 0; 4284 } 4285 4286 static void guc_release(struct intel_engine_cs *engine) 4287 { 4288 engine->sanitize = NULL; /* no longer in control, nothing to sanitize */ 4289 4290 intel_engine_cleanup_common(engine); 4291 lrc_fini_wa_ctx(engine); 4292 } 4293 4294 static void virtual_guc_bump_serial(struct intel_engine_cs *engine) 4295 { 4296 struct intel_engine_cs *e; 4297 intel_engine_mask_t tmp, mask = engine->mask; 4298 4299 for_each_engine_masked(e, engine->gt, mask, tmp) 4300 e->serial++; 4301 } 4302 4303 static void guc_default_vfuncs(struct intel_engine_cs *engine) 4304 { 4305 /* Default vfuncs which can be overridden by each engine. */ 4306 4307 engine->resume = guc_resume; 4308 4309 engine->cops = &guc_context_ops; 4310 engine->request_alloc = guc_request_alloc; 4311 engine->add_active_request = add_to_context; 4312 engine->remove_active_request = remove_from_context; 4313 4314 engine->sched_engine->schedule = i915_schedule; 4315 4316 engine->reset.prepare = guc_engine_reset_prepare; 4317 engine->reset.rewind = guc_rewind_nop; 4318 engine->reset.cancel = guc_reset_nop; 4319 engine->reset.finish = guc_reset_nop; 4320 4321 engine->emit_flush = gen8_emit_flush_xcs; 4322 engine->emit_init_breadcrumb = gen8_emit_init_breadcrumb; 4323 engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_xcs; 4324 if (GRAPHICS_VER(engine->i915) >= 12) { 4325 engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb_xcs; 4326 engine->emit_flush = gen12_emit_flush_xcs; 4327 } 4328 engine->set_default_submission = guc_set_default_submission; 4329 engine->busyness = guc_engine_busyness; 4330 4331 engine->flags |= I915_ENGINE_SUPPORTS_STATS; 4332 engine->flags |= I915_ENGINE_HAS_PREEMPTION; 4333 engine->flags |= I915_ENGINE_HAS_TIMESLICES; 4334 4335 /* Wa_14014475959:dg2 */ 4336 if (engine->class == COMPUTE_CLASS) 4337 if (IS_GFX_GT_IP_STEP(engine->gt, IP_VER(12, 70), STEP_A0, STEP_B0) || 4338 IS_DG2(engine->i915)) 4339 engine->flags |= I915_ENGINE_USES_WA_HOLD_CCS_SWITCHOUT; 4340 4341 /* 4342 * TODO: GuC supports timeslicing and semaphores as well, but they're 4343 * handled by the firmware so some minor tweaks are required before 4344 * enabling. 4345 * 4346 * engine->flags |= I915_ENGINE_HAS_SEMAPHORES; 4347 */ 4348 4349 engine->emit_bb_start = gen8_emit_bb_start; 4350 if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50)) 4351 engine->emit_bb_start = xehp_emit_bb_start; 4352 } 4353 4354 static void rcs_submission_override(struct intel_engine_cs *engine) 4355 { 4356 switch (GRAPHICS_VER(engine->i915)) { 4357 case 12: 4358 engine->emit_flush = gen12_emit_flush_rcs; 4359 engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb_rcs; 4360 break; 4361 case 11: 4362 engine->emit_flush = gen11_emit_flush_rcs; 4363 engine->emit_fini_breadcrumb = gen11_emit_fini_breadcrumb_rcs; 4364 break; 4365 default: 4366 engine->emit_flush = gen8_emit_flush_rcs; 4367 engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_rcs; 4368 break; 4369 } 4370 } 4371 4372 static inline void guc_default_irqs(struct intel_engine_cs *engine) 4373 { 4374 engine->irq_keep_mask = GT_RENDER_USER_INTERRUPT; 4375 intel_engine_set_irq_handler(engine, cs_irq_handler); 4376 } 4377 4378 static void guc_sched_engine_destroy(struct kref *kref) 4379 { 4380 struct i915_sched_engine *sched_engine = 4381 container_of(kref, typeof(*sched_engine), ref); 4382 struct intel_guc *guc = sched_engine->private_data; 4383 4384 guc->sched_engine = NULL; 4385 tasklet_kill(&sched_engine->tasklet); /* flush the callback */ 4386 kfree(sched_engine); 4387 } 4388 4389 int intel_guc_submission_setup(struct intel_engine_cs *engine) 4390 { 4391 struct drm_i915_private *i915 = engine->i915; 4392 struct intel_guc *guc = &engine->gt->uc.guc; 4393 4394 /* 4395 * The setup relies on several assumptions (e.g. irqs always enabled) 4396 * that are only valid on gen11+ 4397 */ 4398 GEM_BUG_ON(GRAPHICS_VER(i915) < 11); 4399 4400 if (!guc->sched_engine) { 4401 guc->sched_engine = i915_sched_engine_create(ENGINE_VIRTUAL); 4402 if (!guc->sched_engine) 4403 return -ENOMEM; 4404 4405 guc->sched_engine->schedule = i915_schedule; 4406 guc->sched_engine->disabled = guc_sched_engine_disabled; 4407 guc->sched_engine->private_data = guc; 4408 guc->sched_engine->destroy = guc_sched_engine_destroy; 4409 guc->sched_engine->bump_inflight_request_prio = 4410 guc_bump_inflight_request_prio; 4411 guc->sched_engine->retire_inflight_request_prio = 4412 guc_retire_inflight_request_prio; 4413 tasklet_setup(&guc->sched_engine->tasklet, 4414 guc_submission_tasklet); 4415 } 4416 i915_sched_engine_put(engine->sched_engine); 4417 engine->sched_engine = i915_sched_engine_get(guc->sched_engine); 4418 4419 guc_default_vfuncs(engine); 4420 guc_default_irqs(engine); 4421 guc_init_breadcrumbs(engine); 4422 4423 if (engine->flags & I915_ENGINE_HAS_RCS_REG_STATE) 4424 rcs_submission_override(engine); 4425 4426 lrc_init_wa_ctx(engine); 4427 4428 /* Finally, take ownership and responsibility for cleanup! */ 4429 engine->sanitize = guc_sanitize; 4430 engine->release = guc_release; 4431 4432 return 0; 4433 } 4434 4435 struct scheduling_policy { 4436 /* internal data */ 4437 u32 max_words, num_words; 4438 u32 count; 4439 /* API data */ 4440 struct guc_update_scheduling_policy h2g; 4441 }; 4442 4443 static u32 __guc_scheduling_policy_action_size(struct scheduling_policy *policy) 4444 { 4445 u32 *start = (void *)&policy->h2g; 4446 u32 *end = policy->h2g.data + policy->num_words; 4447 size_t delta = end - start; 4448 4449 return delta; 4450 } 4451 4452 static struct scheduling_policy *__guc_scheduling_policy_start_klv(struct scheduling_policy *policy) 4453 { 4454 policy->h2g.header.action = INTEL_GUC_ACTION_UPDATE_SCHEDULING_POLICIES_KLV; 4455 policy->max_words = ARRAY_SIZE(policy->h2g.data); 4456 policy->num_words = 0; 4457 policy->count = 0; 4458 4459 return policy; 4460 } 4461 4462 static void __guc_scheduling_policy_add_klv(struct scheduling_policy *policy, 4463 u32 action, u32 *data, u32 len) 4464 { 4465 u32 *klv_ptr = policy->h2g.data + policy->num_words; 4466 4467 GEM_BUG_ON((policy->num_words + 1 + len) > policy->max_words); 4468 *(klv_ptr++) = FIELD_PREP(GUC_KLV_0_KEY, action) | 4469 FIELD_PREP(GUC_KLV_0_LEN, len); 4470 memcpy(klv_ptr, data, sizeof(u32) * len); 4471 policy->num_words += 1 + len; 4472 policy->count++; 4473 } 4474 4475 static int __guc_action_set_scheduling_policies(struct intel_guc *guc, 4476 struct scheduling_policy *policy) 4477 { 4478 int ret; 4479 4480 ret = intel_guc_send(guc, (u32 *)&policy->h2g, 4481 __guc_scheduling_policy_action_size(policy)); 4482 if (ret < 0) { 4483 guc_probe_error(guc, "Failed to configure global scheduling policies: %pe!\n", 4484 ERR_PTR(ret)); 4485 return ret; 4486 } 4487 4488 if (ret != policy->count) { 4489 guc_warn(guc, "global scheduler policy processed %d of %d KLVs!", 4490 ret, policy->count); 4491 if (ret > policy->count) 4492 return -EPROTO; 4493 } 4494 4495 return 0; 4496 } 4497 4498 static int guc_init_global_schedule_policy(struct intel_guc *guc) 4499 { 4500 struct scheduling_policy policy; 4501 struct intel_gt *gt = guc_to_gt(guc); 4502 intel_wakeref_t wakeref; 4503 int ret; 4504 4505 if (GUC_SUBMIT_VER(guc) < MAKE_GUC_VER(1, 1, 0)) 4506 return 0; 4507 4508 __guc_scheduling_policy_start_klv(&policy); 4509 4510 with_intel_runtime_pm(>->i915->runtime_pm, wakeref) { 4511 u32 yield[] = { 4512 GLOBAL_SCHEDULE_POLICY_RC_YIELD_DURATION, 4513 GLOBAL_SCHEDULE_POLICY_RC_YIELD_RATIO, 4514 }; 4515 4516 __guc_scheduling_policy_add_klv(&policy, 4517 GUC_SCHEDULING_POLICIES_KLV_ID_RENDER_COMPUTE_YIELD, 4518 yield, ARRAY_SIZE(yield)); 4519 4520 ret = __guc_action_set_scheduling_policies(guc, &policy); 4521 } 4522 4523 return ret; 4524 } 4525 4526 static void guc_route_semaphores(struct intel_guc *guc, bool to_guc) 4527 { 4528 struct intel_gt *gt = guc_to_gt(guc); 4529 u32 val; 4530 4531 if (GRAPHICS_VER(gt->i915) < 12) 4532 return; 4533 4534 if (to_guc) 4535 val = GUC_SEM_INTR_ROUTE_TO_GUC | GUC_SEM_INTR_ENABLE_ALL; 4536 else 4537 val = 0; 4538 4539 intel_uncore_write(gt->uncore, GEN12_GUC_SEM_INTR_ENABLES, val); 4540 } 4541 4542 int intel_guc_submission_enable(struct intel_guc *guc) 4543 { 4544 int ret; 4545 4546 /* Semaphore interrupt enable and route to GuC */ 4547 guc_route_semaphores(guc, true); 4548 4549 ret = guc_init_submission(guc); 4550 if (ret) 4551 goto fail_sem; 4552 4553 ret = guc_init_engine_stats(guc); 4554 if (ret) 4555 goto fail_sem; 4556 4557 ret = guc_init_global_schedule_policy(guc); 4558 if (ret) 4559 goto fail_stats; 4560 4561 return 0; 4562 4563 fail_stats: 4564 guc_fini_engine_stats(guc); 4565 fail_sem: 4566 guc_route_semaphores(guc, false); 4567 return ret; 4568 } 4569 4570 /* Note: By the time we're here, GuC may have already been reset */ 4571 void intel_guc_submission_disable(struct intel_guc *guc) 4572 { 4573 guc_cancel_busyness_worker(guc); 4574 4575 /* Semaphore interrupt disable and route to host */ 4576 guc_route_semaphores(guc, false); 4577 } 4578 4579 static bool __guc_submission_supported(struct intel_guc *guc) 4580 { 4581 /* GuC submission is unavailable for pre-Gen11 */ 4582 return intel_guc_is_supported(guc) && 4583 GRAPHICS_VER(guc_to_gt(guc)->i915) >= 11; 4584 } 4585 4586 static bool __guc_submission_selected(struct intel_guc *guc) 4587 { 4588 struct drm_i915_private *i915 = guc_to_gt(guc)->i915; 4589 4590 if (!intel_guc_submission_is_supported(guc)) 4591 return false; 4592 4593 return i915->params.enable_guc & ENABLE_GUC_SUBMISSION; 4594 } 4595 4596 int intel_guc_sched_disable_gucid_threshold_max(struct intel_guc *guc) 4597 { 4598 return guc->submission_state.num_guc_ids - NUMBER_MULTI_LRC_GUC_ID(guc); 4599 } 4600 4601 /* 4602 * This default value of 33 milisecs (+1 milisec round up) ensures 30fps or higher 4603 * workloads are able to enjoy the latency reduction when delaying the schedule-disable 4604 * operation. This matches the 30fps game-render + encode (real world) workload this 4605 * knob was tested against. 4606 */ 4607 #define SCHED_DISABLE_DELAY_MS 34 4608 4609 /* 4610 * A threshold of 75% is a reasonable starting point considering that real world apps 4611 * generally don't get anywhere near this. 4612 */ 4613 #define NUM_SCHED_DISABLE_GUCIDS_DEFAULT_THRESHOLD(__guc) \ 4614 (((intel_guc_sched_disable_gucid_threshold_max(guc)) * 3) / 4) 4615 4616 void intel_guc_submission_init_early(struct intel_guc *guc) 4617 { 4618 xa_init_flags(&guc->context_lookup, XA_FLAGS_LOCK_IRQ); 4619 4620 mtx_init(&guc->submission_state.lock, IPL_TTY); 4621 INIT_LIST_HEAD(&guc->submission_state.guc_id_list); 4622 ida_init(&guc->submission_state.guc_ids); 4623 INIT_LIST_HEAD(&guc->submission_state.destroyed_contexts); 4624 INIT_WORK(&guc->submission_state.destroyed_worker, 4625 destroyed_worker_func); 4626 INIT_WORK(&guc->submission_state.reset_fail_worker, 4627 reset_fail_worker_func); 4628 4629 mtx_init(&guc->timestamp.lock, IPL_TTY); 4630 INIT_DELAYED_WORK(&guc->timestamp.work, guc_timestamp_ping); 4631 4632 guc->submission_state.sched_disable_delay_ms = SCHED_DISABLE_DELAY_MS; 4633 guc->submission_state.num_guc_ids = GUC_MAX_CONTEXT_ID; 4634 guc->submission_state.sched_disable_gucid_threshold = 4635 NUM_SCHED_DISABLE_GUCIDS_DEFAULT_THRESHOLD(guc); 4636 guc->submission_supported = __guc_submission_supported(guc); 4637 guc->submission_selected = __guc_submission_selected(guc); 4638 } 4639 4640 static inline struct intel_context * 4641 g2h_context_lookup(struct intel_guc *guc, u32 ctx_id) 4642 { 4643 struct intel_context *ce; 4644 4645 if (unlikely(ctx_id >= GUC_MAX_CONTEXT_ID)) { 4646 guc_err(guc, "Invalid ctx_id %u\n", ctx_id); 4647 return NULL; 4648 } 4649 4650 ce = __get_context(guc, ctx_id); 4651 if (unlikely(!ce)) { 4652 guc_err(guc, "Context is NULL, ctx_id %u\n", ctx_id); 4653 return NULL; 4654 } 4655 4656 if (unlikely(intel_context_is_child(ce))) { 4657 guc_err(guc, "Context is child, ctx_id %u\n", ctx_id); 4658 return NULL; 4659 } 4660 4661 return ce; 4662 } 4663 4664 int intel_guc_deregister_done_process_msg(struct intel_guc *guc, 4665 const u32 *msg, 4666 u32 len) 4667 { 4668 struct intel_context *ce; 4669 u32 ctx_id; 4670 4671 if (unlikely(len < 1)) { 4672 guc_err(guc, "Invalid length %u\n", len); 4673 return -EPROTO; 4674 } 4675 ctx_id = msg[0]; 4676 4677 ce = g2h_context_lookup(guc, ctx_id); 4678 if (unlikely(!ce)) 4679 return -EPROTO; 4680 4681 trace_intel_context_deregister_done(ce); 4682 4683 #ifdef CONFIG_DRM_I915_SELFTEST 4684 if (unlikely(ce->drop_deregister)) { 4685 ce->drop_deregister = false; 4686 return 0; 4687 } 4688 #endif 4689 4690 if (context_wait_for_deregister_to_register(ce)) { 4691 struct intel_runtime_pm *runtime_pm = 4692 &ce->engine->gt->i915->runtime_pm; 4693 intel_wakeref_t wakeref; 4694 4695 /* 4696 * Previous owner of this guc_id has been deregistered, now safe 4697 * register this context. 4698 */ 4699 with_intel_runtime_pm(runtime_pm, wakeref) 4700 register_context(ce, true); 4701 guc_signal_context_fence(ce); 4702 intel_context_put(ce); 4703 } else if (context_destroyed(ce)) { 4704 /* Context has been destroyed */ 4705 intel_gt_pm_put_async(guc_to_gt(guc)); 4706 release_guc_id(guc, ce); 4707 __guc_context_destroy(ce); 4708 } 4709 4710 decr_outstanding_submission_g2h(guc); 4711 4712 return 0; 4713 } 4714 4715 int intel_guc_sched_done_process_msg(struct intel_guc *guc, 4716 const u32 *msg, 4717 u32 len) 4718 { 4719 struct intel_context *ce; 4720 unsigned long flags; 4721 u32 ctx_id; 4722 4723 if (unlikely(len < 2)) { 4724 guc_err(guc, "Invalid length %u\n", len); 4725 return -EPROTO; 4726 } 4727 ctx_id = msg[0]; 4728 4729 ce = g2h_context_lookup(guc, ctx_id); 4730 if (unlikely(!ce)) 4731 return -EPROTO; 4732 4733 if (unlikely(context_destroyed(ce) || 4734 (!context_pending_enable(ce) && 4735 !context_pending_disable(ce)))) { 4736 guc_err(guc, "Bad context sched_state 0x%x, ctx_id %u\n", 4737 ce->guc_state.sched_state, ctx_id); 4738 return -EPROTO; 4739 } 4740 4741 trace_intel_context_sched_done(ce); 4742 4743 if (context_pending_enable(ce)) { 4744 #ifdef CONFIG_DRM_I915_SELFTEST 4745 if (unlikely(ce->drop_schedule_enable)) { 4746 ce->drop_schedule_enable = false; 4747 return 0; 4748 } 4749 #endif 4750 4751 spin_lock_irqsave(&ce->guc_state.lock, flags); 4752 clr_context_pending_enable(ce); 4753 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 4754 } else if (context_pending_disable(ce)) { 4755 bool banned; 4756 4757 #ifdef CONFIG_DRM_I915_SELFTEST 4758 if (unlikely(ce->drop_schedule_disable)) { 4759 ce->drop_schedule_disable = false; 4760 return 0; 4761 } 4762 #endif 4763 4764 /* 4765 * Unpin must be done before __guc_signal_context_fence, 4766 * otherwise a race exists between the requests getting 4767 * submitted + retired before this unpin completes resulting in 4768 * the pin_count going to zero and the context still being 4769 * enabled. 4770 */ 4771 intel_context_sched_disable_unpin(ce); 4772 4773 spin_lock_irqsave(&ce->guc_state.lock, flags); 4774 banned = context_banned(ce); 4775 clr_context_banned(ce); 4776 clr_context_pending_disable(ce); 4777 __guc_signal_context_fence(ce); 4778 guc_blocked_fence_complete(ce); 4779 spin_unlock_irqrestore(&ce->guc_state.lock, flags); 4780 4781 if (banned) { 4782 guc_cancel_context_requests(ce); 4783 intel_engine_signal_breadcrumbs(ce->engine); 4784 } 4785 } 4786 4787 decr_outstanding_submission_g2h(guc); 4788 intel_context_put(ce); 4789 4790 return 0; 4791 } 4792 4793 static void capture_error_state(struct intel_guc *guc, 4794 struct intel_context *ce) 4795 { 4796 struct intel_gt *gt = guc_to_gt(guc); 4797 struct drm_i915_private *i915 = gt->i915; 4798 intel_wakeref_t wakeref; 4799 intel_engine_mask_t engine_mask; 4800 4801 if (intel_engine_is_virtual(ce->engine)) { 4802 struct intel_engine_cs *e; 4803 intel_engine_mask_t tmp, virtual_mask = ce->engine->mask; 4804 4805 engine_mask = 0; 4806 for_each_engine_masked(e, ce->engine->gt, virtual_mask, tmp) { 4807 bool match = intel_guc_capture_is_matching_engine(gt, ce, e); 4808 4809 if (match) { 4810 intel_engine_set_hung_context(e, ce); 4811 engine_mask |= e->mask; 4812 i915_increase_reset_engine_count(&i915->gpu_error, 4813 e); 4814 } 4815 } 4816 4817 if (!engine_mask) { 4818 guc_warn(guc, "No matching physical engine capture for virtual engine context 0x%04X / %s", 4819 ce->guc_id.id, ce->engine->name); 4820 engine_mask = ~0U; 4821 } 4822 } else { 4823 intel_engine_set_hung_context(ce->engine, ce); 4824 engine_mask = ce->engine->mask; 4825 i915_increase_reset_engine_count(&i915->gpu_error, ce->engine); 4826 } 4827 4828 with_intel_runtime_pm(&i915->runtime_pm, wakeref) 4829 i915_capture_error_state(gt, engine_mask, CORE_DUMP_FLAG_IS_GUC_CAPTURE); 4830 } 4831 4832 static void guc_context_replay(struct intel_context *ce) 4833 { 4834 struct i915_sched_engine *sched_engine = ce->engine->sched_engine; 4835 4836 __guc_reset_context(ce, ce->engine->mask); 4837 tasklet_hi_schedule(&sched_engine->tasklet); 4838 } 4839 4840 static void guc_handle_context_reset(struct intel_guc *guc, 4841 struct intel_context *ce) 4842 { 4843 trace_intel_context_reset(ce); 4844 4845 guc_dbg(guc, "Got context reset notification: 0x%04X on %s, exiting = %s, banned = %s\n", 4846 ce->guc_id.id, ce->engine->name, 4847 str_yes_no(intel_context_is_exiting(ce)), 4848 str_yes_no(intel_context_is_banned(ce))); 4849 4850 if (likely(intel_context_is_schedulable(ce))) { 4851 capture_error_state(guc, ce); 4852 guc_context_replay(ce); 4853 } else { 4854 guc_info(guc, "Ignoring context reset notification of exiting context 0x%04X on %s", 4855 ce->guc_id.id, ce->engine->name); 4856 } 4857 } 4858 4859 int intel_guc_context_reset_process_msg(struct intel_guc *guc, 4860 const u32 *msg, u32 len) 4861 { 4862 struct intel_context *ce; 4863 unsigned long flags; 4864 int ctx_id; 4865 4866 if (unlikely(len != 1)) { 4867 guc_err(guc, "Invalid length %u", len); 4868 return -EPROTO; 4869 } 4870 4871 ctx_id = msg[0]; 4872 4873 /* 4874 * The context lookup uses the xarray but lookups only require an RCU lock 4875 * not the full spinlock. So take the lock explicitly and keep it until the 4876 * context has been reference count locked to ensure it can't be destroyed 4877 * asynchronously until the reset is done. 4878 */ 4879 xa_lock_irqsave(&guc->context_lookup, flags); 4880 ce = g2h_context_lookup(guc, ctx_id); 4881 if (ce) 4882 intel_context_get(ce); 4883 xa_unlock_irqrestore(&guc->context_lookup, flags); 4884 4885 if (unlikely(!ce)) 4886 return -EPROTO; 4887 4888 guc_handle_context_reset(guc, ce); 4889 intel_context_put(ce); 4890 4891 return 0; 4892 } 4893 4894 int intel_guc_error_capture_process_msg(struct intel_guc *guc, 4895 const u32 *msg, u32 len) 4896 { 4897 u32 status; 4898 4899 if (unlikely(len != 1)) { 4900 guc_dbg(guc, "Invalid length %u", len); 4901 return -EPROTO; 4902 } 4903 4904 status = msg[0] & INTEL_GUC_STATE_CAPTURE_EVENT_STATUS_MASK; 4905 if (status == INTEL_GUC_STATE_CAPTURE_EVENT_STATUS_NOSPACE) 4906 guc_warn(guc, "No space for error capture"); 4907 4908 intel_guc_capture_process(guc); 4909 4910 return 0; 4911 } 4912 4913 struct intel_engine_cs * 4914 intel_guc_lookup_engine(struct intel_guc *guc, u8 guc_class, u8 instance) 4915 { 4916 struct intel_gt *gt = guc_to_gt(guc); 4917 u8 engine_class = guc_class_to_engine_class(guc_class); 4918 4919 /* Class index is checked in class converter */ 4920 GEM_BUG_ON(instance > MAX_ENGINE_INSTANCE); 4921 4922 return gt->engine_class[engine_class][instance]; 4923 } 4924 4925 static void reset_fail_worker_func(struct work_struct *w) 4926 { 4927 struct intel_guc *guc = container_of(w, struct intel_guc, 4928 submission_state.reset_fail_worker); 4929 struct intel_gt *gt = guc_to_gt(guc); 4930 intel_engine_mask_t reset_fail_mask; 4931 unsigned long flags; 4932 4933 spin_lock_irqsave(&guc->submission_state.lock, flags); 4934 reset_fail_mask = guc->submission_state.reset_fail_mask; 4935 guc->submission_state.reset_fail_mask = 0; 4936 spin_unlock_irqrestore(&guc->submission_state.lock, flags); 4937 4938 if (likely(reset_fail_mask)) { 4939 struct intel_engine_cs *engine; 4940 enum intel_engine_id id; 4941 4942 /* 4943 * GuC is toast at this point - it dead loops after sending the failed 4944 * reset notification. So need to manually determine the guilty context. 4945 * Note that it should be reliable to do this here because the GuC is 4946 * toast and will not be scheduling behind the KMD's back. 4947 */ 4948 for_each_engine_masked(engine, gt, reset_fail_mask, id) 4949 intel_guc_find_hung_context(engine); 4950 4951 intel_gt_handle_error(gt, reset_fail_mask, 4952 I915_ERROR_CAPTURE, 4953 "GuC failed to reset engine mask=0x%x", 4954 reset_fail_mask); 4955 } 4956 } 4957 4958 int intel_guc_engine_failure_process_msg(struct intel_guc *guc, 4959 const u32 *msg, u32 len) 4960 { 4961 struct intel_engine_cs *engine; 4962 u8 guc_class, instance; 4963 u32 reason; 4964 unsigned long flags; 4965 4966 if (unlikely(len != 3)) { 4967 guc_err(guc, "Invalid length %u", len); 4968 return -EPROTO; 4969 } 4970 4971 guc_class = msg[0]; 4972 instance = msg[1]; 4973 reason = msg[2]; 4974 4975 engine = intel_guc_lookup_engine(guc, guc_class, instance); 4976 if (unlikely(!engine)) { 4977 guc_err(guc, "Invalid engine %d:%d", guc_class, instance); 4978 return -EPROTO; 4979 } 4980 4981 /* 4982 * This is an unexpected failure of a hardware feature. So, log a real 4983 * error message not just the informational that comes with the reset. 4984 */ 4985 guc_err(guc, "Engine reset failed on %d:%d (%s) because 0x%08X", 4986 guc_class, instance, engine->name, reason); 4987 4988 spin_lock_irqsave(&guc->submission_state.lock, flags); 4989 guc->submission_state.reset_fail_mask |= engine->mask; 4990 spin_unlock_irqrestore(&guc->submission_state.lock, flags); 4991 4992 /* 4993 * A GT reset flushes this worker queue (G2H handler) so we must use 4994 * another worker to trigger a GT reset. 4995 */ 4996 queue_work(system_unbound_wq, &guc->submission_state.reset_fail_worker); 4997 4998 return 0; 4999 } 5000 5001 void intel_guc_find_hung_context(struct intel_engine_cs *engine) 5002 { 5003 struct intel_guc *guc = &engine->gt->uc.guc; 5004 struct intel_context *ce; 5005 struct i915_request *rq; 5006 unsigned long index; 5007 unsigned long flags; 5008 5009 /* Reset called during driver load? GuC not yet initialised! */ 5010 if (unlikely(!guc_submission_initialized(guc))) 5011 return; 5012 5013 xa_lock_irqsave(&guc->context_lookup, flags); 5014 xa_for_each(&guc->context_lookup, index, ce) { 5015 bool found; 5016 5017 if (!kref_get_unless_zero(&ce->ref)) 5018 continue; 5019 5020 xa_unlock(&guc->context_lookup); 5021 5022 if (!intel_context_is_pinned(ce)) 5023 goto next; 5024 5025 if (intel_engine_is_virtual(ce->engine)) { 5026 if (!(ce->engine->mask & engine->mask)) 5027 goto next; 5028 } else { 5029 if (ce->engine != engine) 5030 goto next; 5031 } 5032 5033 found = false; 5034 spin_lock(&ce->guc_state.lock); 5035 list_for_each_entry(rq, &ce->guc_state.requests, sched.link) { 5036 if (i915_test_request_state(rq) != I915_REQUEST_ACTIVE) 5037 continue; 5038 5039 found = true; 5040 break; 5041 } 5042 spin_unlock(&ce->guc_state.lock); 5043 5044 if (found) { 5045 intel_engine_set_hung_context(engine, ce); 5046 5047 /* Can only cope with one hang at a time... */ 5048 intel_context_put(ce); 5049 xa_lock(&guc->context_lookup); 5050 goto done; 5051 } 5052 5053 next: 5054 intel_context_put(ce); 5055 xa_lock(&guc->context_lookup); 5056 } 5057 done: 5058 xa_unlock_irqrestore(&guc->context_lookup, flags); 5059 } 5060 5061 void intel_guc_dump_active_requests(struct intel_engine_cs *engine, 5062 struct i915_request *hung_rq, 5063 struct drm_printer *m) 5064 { 5065 struct intel_guc *guc = &engine->gt->uc.guc; 5066 struct intel_context *ce; 5067 unsigned long index; 5068 unsigned long flags; 5069 5070 /* Reset called during driver load? GuC not yet initialised! */ 5071 if (unlikely(!guc_submission_initialized(guc))) 5072 return; 5073 5074 xa_lock_irqsave(&guc->context_lookup, flags); 5075 xa_for_each(&guc->context_lookup, index, ce) { 5076 if (!kref_get_unless_zero(&ce->ref)) 5077 continue; 5078 5079 xa_unlock(&guc->context_lookup); 5080 5081 if (!intel_context_is_pinned(ce)) 5082 goto next; 5083 5084 if (intel_engine_is_virtual(ce->engine)) { 5085 if (!(ce->engine->mask & engine->mask)) 5086 goto next; 5087 } else { 5088 if (ce->engine != engine) 5089 goto next; 5090 } 5091 5092 spin_lock(&ce->guc_state.lock); 5093 intel_engine_dump_active_requests(&ce->guc_state.requests, 5094 hung_rq, m); 5095 spin_unlock(&ce->guc_state.lock); 5096 5097 next: 5098 intel_context_put(ce); 5099 xa_lock(&guc->context_lookup); 5100 } 5101 xa_unlock_irqrestore(&guc->context_lookup, flags); 5102 } 5103 5104 void intel_guc_submission_print_info(struct intel_guc *guc, 5105 struct drm_printer *p) 5106 { 5107 struct i915_sched_engine *sched_engine = guc->sched_engine; 5108 struct rb_node *rb; 5109 unsigned long flags; 5110 5111 if (!sched_engine) 5112 return; 5113 5114 drm_printf(p, "GuC Submission API Version: %d.%d.%d\n", 5115 guc->submission_version.major, guc->submission_version.minor, 5116 guc->submission_version.patch); 5117 drm_printf(p, "GuC Number Outstanding Submission G2H: %u\n", 5118 atomic_read(&guc->outstanding_submission_g2h)); 5119 drm_printf(p, "GuC tasklet count: %u\n", 5120 atomic_read(&sched_engine->tasklet.count)); 5121 5122 spin_lock_irqsave(&sched_engine->lock, flags); 5123 drm_printf(p, "Requests in GuC submit tasklet:\n"); 5124 for (rb = rb_first_cached(&sched_engine->queue); rb; rb = rb_next(rb)) { 5125 struct i915_priolist *pl = to_priolist(rb); 5126 struct i915_request *rq; 5127 5128 priolist_for_each_request(rq, pl) 5129 drm_printf(p, "guc_id=%u, seqno=%llu\n", 5130 rq->context->guc_id.id, 5131 rq->fence.seqno); 5132 } 5133 spin_unlock_irqrestore(&sched_engine->lock, flags); 5134 drm_printf(p, "\n"); 5135 } 5136 5137 static inline void guc_log_context_priority(struct drm_printer *p, 5138 struct intel_context *ce) 5139 { 5140 int i; 5141 5142 drm_printf(p, "\t\tPriority: %d\n", ce->guc_state.prio); 5143 drm_printf(p, "\t\tNumber Requests (lower index == higher priority)\n"); 5144 for (i = GUC_CLIENT_PRIORITY_KMD_HIGH; 5145 i < GUC_CLIENT_PRIORITY_NUM; ++i) { 5146 drm_printf(p, "\t\tNumber requests in priority band[%d]: %d\n", 5147 i, ce->guc_state.prio_count[i]); 5148 } 5149 drm_printf(p, "\n"); 5150 } 5151 5152 static inline void guc_log_context(struct drm_printer *p, 5153 struct intel_context *ce) 5154 { 5155 drm_printf(p, "GuC lrc descriptor %u:\n", ce->guc_id.id); 5156 drm_printf(p, "\tHW Context Desc: 0x%08x\n", ce->lrc.lrca); 5157 drm_printf(p, "\t\tLRC Head: Internal %u, Memory %u\n", 5158 ce->ring->head, 5159 ce->lrc_reg_state[CTX_RING_HEAD]); 5160 drm_printf(p, "\t\tLRC Tail: Internal %u, Memory %u\n", 5161 ce->ring->tail, 5162 ce->lrc_reg_state[CTX_RING_TAIL]); 5163 drm_printf(p, "\t\tContext Pin Count: %u\n", 5164 atomic_read(&ce->pin_count)); 5165 drm_printf(p, "\t\tGuC ID Ref Count: %u\n", 5166 atomic_read(&ce->guc_id.ref)); 5167 drm_printf(p, "\t\tSchedule State: 0x%x\n", 5168 ce->guc_state.sched_state); 5169 } 5170 5171 void intel_guc_submission_print_context_info(struct intel_guc *guc, 5172 struct drm_printer *p) 5173 { 5174 struct intel_context *ce; 5175 unsigned long index; 5176 unsigned long flags; 5177 5178 xa_lock_irqsave(&guc->context_lookup, flags); 5179 xa_for_each(&guc->context_lookup, index, ce) { 5180 GEM_BUG_ON(intel_context_is_child(ce)); 5181 5182 guc_log_context(p, ce); 5183 guc_log_context_priority(p, ce); 5184 5185 if (intel_context_is_parent(ce)) { 5186 struct intel_context *child; 5187 5188 drm_printf(p, "\t\tNumber children: %u\n", 5189 ce->parallel.number_children); 5190 5191 if (ce->parallel.guc.wq_status) { 5192 drm_printf(p, "\t\tWQI Head: %u\n", 5193 READ_ONCE(*ce->parallel.guc.wq_head)); 5194 drm_printf(p, "\t\tWQI Tail: %u\n", 5195 READ_ONCE(*ce->parallel.guc.wq_tail)); 5196 drm_printf(p, "\t\tWQI Status: %u\n", 5197 READ_ONCE(*ce->parallel.guc.wq_status)); 5198 } 5199 5200 if (ce->engine->emit_bb_start == 5201 emit_bb_start_parent_no_preempt_mid_batch) { 5202 u8 i; 5203 5204 drm_printf(p, "\t\tChildren Go: %u\n", 5205 get_children_go_value(ce)); 5206 for (i = 0; i < ce->parallel.number_children; ++i) 5207 drm_printf(p, "\t\tChildren Join: %u\n", 5208 get_children_join_value(ce, i)); 5209 } 5210 5211 for_each_child(ce, child) 5212 guc_log_context(p, child); 5213 } 5214 } 5215 xa_unlock_irqrestore(&guc->context_lookup, flags); 5216 } 5217 5218 static inline u32 get_children_go_addr(struct intel_context *ce) 5219 { 5220 GEM_BUG_ON(!intel_context_is_parent(ce)); 5221 5222 return i915_ggtt_offset(ce->state) + 5223 __get_parent_scratch_offset(ce) + 5224 offsetof(struct parent_scratch, go.semaphore); 5225 } 5226 5227 static inline u32 get_children_join_addr(struct intel_context *ce, 5228 u8 child_index) 5229 { 5230 GEM_BUG_ON(!intel_context_is_parent(ce)); 5231 5232 return i915_ggtt_offset(ce->state) + 5233 __get_parent_scratch_offset(ce) + 5234 offsetof(struct parent_scratch, join[child_index].semaphore); 5235 } 5236 5237 #define PARENT_GO_BB 1 5238 #define PARENT_GO_FINI_BREADCRUMB 0 5239 #define CHILD_GO_BB 1 5240 #define CHILD_GO_FINI_BREADCRUMB 0 5241 static int emit_bb_start_parent_no_preempt_mid_batch(struct i915_request *rq, 5242 u64 offset, u32 len, 5243 const unsigned int flags) 5244 { 5245 struct intel_context *ce = rq->context; 5246 u32 *cs; 5247 u8 i; 5248 5249 GEM_BUG_ON(!intel_context_is_parent(ce)); 5250 5251 cs = intel_ring_begin(rq, 10 + 4 * ce->parallel.number_children); 5252 if (IS_ERR(cs)) 5253 return PTR_ERR(cs); 5254 5255 /* Wait on children */ 5256 for (i = 0; i < ce->parallel.number_children; ++i) { 5257 *cs++ = (MI_SEMAPHORE_WAIT | 5258 MI_SEMAPHORE_GLOBAL_GTT | 5259 MI_SEMAPHORE_POLL | 5260 MI_SEMAPHORE_SAD_EQ_SDD); 5261 *cs++ = PARENT_GO_BB; 5262 *cs++ = get_children_join_addr(ce, i); 5263 *cs++ = 0; 5264 } 5265 5266 /* Turn off preemption */ 5267 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; 5268 *cs++ = MI_NOOP; 5269 5270 /* Tell children go */ 5271 cs = gen8_emit_ggtt_write(cs, 5272 CHILD_GO_BB, 5273 get_children_go_addr(ce), 5274 0); 5275 5276 /* Jump to batch */ 5277 *cs++ = MI_BATCH_BUFFER_START_GEN8 | 5278 (flags & I915_DISPATCH_SECURE ? 0 : BIT(8)); 5279 *cs++ = lower_32_bits(offset); 5280 *cs++ = upper_32_bits(offset); 5281 *cs++ = MI_NOOP; 5282 5283 intel_ring_advance(rq, cs); 5284 5285 return 0; 5286 } 5287 5288 static int emit_bb_start_child_no_preempt_mid_batch(struct i915_request *rq, 5289 u64 offset, u32 len, 5290 const unsigned int flags) 5291 { 5292 struct intel_context *ce = rq->context; 5293 struct intel_context *parent = intel_context_to_parent(ce); 5294 u32 *cs; 5295 5296 GEM_BUG_ON(!intel_context_is_child(ce)); 5297 5298 cs = intel_ring_begin(rq, 12); 5299 if (IS_ERR(cs)) 5300 return PTR_ERR(cs); 5301 5302 /* Signal parent */ 5303 cs = gen8_emit_ggtt_write(cs, 5304 PARENT_GO_BB, 5305 get_children_join_addr(parent, 5306 ce->parallel.child_index), 5307 0); 5308 5309 /* Wait on parent for go */ 5310 *cs++ = (MI_SEMAPHORE_WAIT | 5311 MI_SEMAPHORE_GLOBAL_GTT | 5312 MI_SEMAPHORE_POLL | 5313 MI_SEMAPHORE_SAD_EQ_SDD); 5314 *cs++ = CHILD_GO_BB; 5315 *cs++ = get_children_go_addr(parent); 5316 *cs++ = 0; 5317 5318 /* Turn off preemption */ 5319 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; 5320 5321 /* Jump to batch */ 5322 *cs++ = MI_BATCH_BUFFER_START_GEN8 | 5323 (flags & I915_DISPATCH_SECURE ? 0 : BIT(8)); 5324 *cs++ = lower_32_bits(offset); 5325 *cs++ = upper_32_bits(offset); 5326 5327 intel_ring_advance(rq, cs); 5328 5329 return 0; 5330 } 5331 5332 static u32 * 5333 __emit_fini_breadcrumb_parent_no_preempt_mid_batch(struct i915_request *rq, 5334 u32 *cs) 5335 { 5336 struct intel_context *ce = rq->context; 5337 u8 i; 5338 5339 GEM_BUG_ON(!intel_context_is_parent(ce)); 5340 5341 /* Wait on children */ 5342 for (i = 0; i < ce->parallel.number_children; ++i) { 5343 *cs++ = (MI_SEMAPHORE_WAIT | 5344 MI_SEMAPHORE_GLOBAL_GTT | 5345 MI_SEMAPHORE_POLL | 5346 MI_SEMAPHORE_SAD_EQ_SDD); 5347 *cs++ = PARENT_GO_FINI_BREADCRUMB; 5348 *cs++ = get_children_join_addr(ce, i); 5349 *cs++ = 0; 5350 } 5351 5352 /* Turn on preemption */ 5353 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; 5354 *cs++ = MI_NOOP; 5355 5356 /* Tell children go */ 5357 cs = gen8_emit_ggtt_write(cs, 5358 CHILD_GO_FINI_BREADCRUMB, 5359 get_children_go_addr(ce), 5360 0); 5361 5362 return cs; 5363 } 5364 5365 /* 5366 * If this true, a submission of multi-lrc requests had an error and the 5367 * requests need to be skipped. The front end (execuf IOCTL) should've called 5368 * i915_request_skip which squashes the BB but we still need to emit the fini 5369 * breadrcrumbs seqno write. At this point we don't know how many of the 5370 * requests in the multi-lrc submission were generated so we can't do the 5371 * handshake between the parent and children (e.g. if 4 requests should be 5372 * generated but 2nd hit an error only 1 would be seen by the GuC backend). 5373 * Simply skip the handshake, but still emit the breadcrumbd seqno, if an error 5374 * has occurred on any of the requests in submission / relationship. 5375 */ 5376 static inline bool skip_handshake(struct i915_request *rq) 5377 { 5378 return test_bit(I915_FENCE_FLAG_SKIP_PARALLEL, &rq->fence.flags); 5379 } 5380 5381 #define NON_SKIP_LEN 6 5382 static u32 * 5383 emit_fini_breadcrumb_parent_no_preempt_mid_batch(struct i915_request *rq, 5384 u32 *cs) 5385 { 5386 struct intel_context *ce = rq->context; 5387 __maybe_unused u32 *before_fini_breadcrumb_user_interrupt_cs; 5388 __maybe_unused u32 *start_fini_breadcrumb_cs = cs; 5389 5390 GEM_BUG_ON(!intel_context_is_parent(ce)); 5391 5392 if (unlikely(skip_handshake(rq))) { 5393 /* 5394 * NOP everything in __emit_fini_breadcrumb_parent_no_preempt_mid_batch, 5395 * the NON_SKIP_LEN comes from the length of the emits below. 5396 */ 5397 memset(cs, 0, sizeof(u32) * 5398 (ce->engine->emit_fini_breadcrumb_dw - NON_SKIP_LEN)); 5399 cs += ce->engine->emit_fini_breadcrumb_dw - NON_SKIP_LEN; 5400 } else { 5401 cs = __emit_fini_breadcrumb_parent_no_preempt_mid_batch(rq, cs); 5402 } 5403 5404 /* Emit fini breadcrumb */ 5405 before_fini_breadcrumb_user_interrupt_cs = cs; 5406 cs = gen8_emit_ggtt_write(cs, 5407 rq->fence.seqno, 5408 i915_request_active_timeline(rq)->hwsp_offset, 5409 0); 5410 5411 /* User interrupt */ 5412 *cs++ = MI_USER_INTERRUPT; 5413 *cs++ = MI_NOOP; 5414 5415 /* Ensure our math for skip + emit is correct */ 5416 GEM_BUG_ON(before_fini_breadcrumb_user_interrupt_cs + NON_SKIP_LEN != 5417 cs); 5418 GEM_BUG_ON(start_fini_breadcrumb_cs + 5419 ce->engine->emit_fini_breadcrumb_dw != cs); 5420 5421 rq->tail = intel_ring_offset(rq, cs); 5422 5423 return cs; 5424 } 5425 5426 static u32 * 5427 __emit_fini_breadcrumb_child_no_preempt_mid_batch(struct i915_request *rq, 5428 u32 *cs) 5429 { 5430 struct intel_context *ce = rq->context; 5431 struct intel_context *parent = intel_context_to_parent(ce); 5432 5433 GEM_BUG_ON(!intel_context_is_child(ce)); 5434 5435 /* Turn on preemption */ 5436 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; 5437 *cs++ = MI_NOOP; 5438 5439 /* Signal parent */ 5440 cs = gen8_emit_ggtt_write(cs, 5441 PARENT_GO_FINI_BREADCRUMB, 5442 get_children_join_addr(parent, 5443 ce->parallel.child_index), 5444 0); 5445 5446 /* Wait parent on for go */ 5447 *cs++ = (MI_SEMAPHORE_WAIT | 5448 MI_SEMAPHORE_GLOBAL_GTT | 5449 MI_SEMAPHORE_POLL | 5450 MI_SEMAPHORE_SAD_EQ_SDD); 5451 *cs++ = CHILD_GO_FINI_BREADCRUMB; 5452 *cs++ = get_children_go_addr(parent); 5453 *cs++ = 0; 5454 5455 return cs; 5456 } 5457 5458 static u32 * 5459 emit_fini_breadcrumb_child_no_preempt_mid_batch(struct i915_request *rq, 5460 u32 *cs) 5461 { 5462 struct intel_context *ce = rq->context; 5463 __maybe_unused u32 *before_fini_breadcrumb_user_interrupt_cs; 5464 __maybe_unused u32 *start_fini_breadcrumb_cs = cs; 5465 5466 GEM_BUG_ON(!intel_context_is_child(ce)); 5467 5468 if (unlikely(skip_handshake(rq))) { 5469 /* 5470 * NOP everything in __emit_fini_breadcrumb_child_no_preempt_mid_batch, 5471 * the NON_SKIP_LEN comes from the length of the emits below. 5472 */ 5473 memset(cs, 0, sizeof(u32) * 5474 (ce->engine->emit_fini_breadcrumb_dw - NON_SKIP_LEN)); 5475 cs += ce->engine->emit_fini_breadcrumb_dw - NON_SKIP_LEN; 5476 } else { 5477 cs = __emit_fini_breadcrumb_child_no_preempt_mid_batch(rq, cs); 5478 } 5479 5480 /* Emit fini breadcrumb */ 5481 before_fini_breadcrumb_user_interrupt_cs = cs; 5482 cs = gen8_emit_ggtt_write(cs, 5483 rq->fence.seqno, 5484 i915_request_active_timeline(rq)->hwsp_offset, 5485 0); 5486 5487 /* User interrupt */ 5488 *cs++ = MI_USER_INTERRUPT; 5489 *cs++ = MI_NOOP; 5490 5491 /* Ensure our math for skip + emit is correct */ 5492 GEM_BUG_ON(before_fini_breadcrumb_user_interrupt_cs + NON_SKIP_LEN != 5493 cs); 5494 GEM_BUG_ON(start_fini_breadcrumb_cs + 5495 ce->engine->emit_fini_breadcrumb_dw != cs); 5496 5497 rq->tail = intel_ring_offset(rq, cs); 5498 5499 return cs; 5500 } 5501 5502 #undef NON_SKIP_LEN 5503 5504 static struct intel_context * 5505 guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count, 5506 unsigned long flags) 5507 { 5508 struct guc_virtual_engine *ve; 5509 struct intel_guc *guc; 5510 unsigned int n; 5511 int err; 5512 5513 ve = kzalloc(sizeof(*ve), GFP_KERNEL); 5514 if (!ve) 5515 return ERR_PTR(-ENOMEM); 5516 5517 guc = &siblings[0]->gt->uc.guc; 5518 5519 ve->base.i915 = siblings[0]->i915; 5520 ve->base.gt = siblings[0]->gt; 5521 ve->base.uncore = siblings[0]->uncore; 5522 ve->base.id = -1; 5523 5524 ve->base.uabi_class = I915_ENGINE_CLASS_INVALID; 5525 ve->base.instance = I915_ENGINE_CLASS_INVALID_VIRTUAL; 5526 ve->base.uabi_instance = I915_ENGINE_CLASS_INVALID_VIRTUAL; 5527 ve->base.saturated = ALL_ENGINES; 5528 5529 snprintf(ve->base.name, sizeof(ve->base.name), "virtual"); 5530 5531 ve->base.sched_engine = i915_sched_engine_get(guc->sched_engine); 5532 5533 ve->base.cops = &virtual_guc_context_ops; 5534 ve->base.request_alloc = guc_request_alloc; 5535 ve->base.bump_serial = virtual_guc_bump_serial; 5536 5537 ve->base.submit_request = guc_submit_request; 5538 5539 ve->base.flags = I915_ENGINE_IS_VIRTUAL; 5540 5541 #ifdef notyet 5542 BUILD_BUG_ON(ilog2(VIRTUAL_ENGINES) < I915_NUM_ENGINES); 5543 #endif 5544 ve->base.mask = VIRTUAL_ENGINES; 5545 5546 intel_context_init(&ve->context, &ve->base); 5547 5548 for (n = 0; n < count; n++) { 5549 struct intel_engine_cs *sibling = siblings[n]; 5550 5551 GEM_BUG_ON(!is_power_of_2(sibling->mask)); 5552 if (sibling->mask & ve->base.mask) { 5553 guc_dbg(guc, "duplicate %s entry in load balancer\n", 5554 sibling->name); 5555 err = -EINVAL; 5556 goto err_put; 5557 } 5558 5559 ve->base.mask |= sibling->mask; 5560 ve->base.logical_mask |= sibling->logical_mask; 5561 5562 if (n != 0 && ve->base.class != sibling->class) { 5563 guc_dbg(guc, "invalid mixing of engine class, sibling %d, already %d\n", 5564 sibling->class, ve->base.class); 5565 err = -EINVAL; 5566 goto err_put; 5567 } else if (n == 0) { 5568 ve->base.class = sibling->class; 5569 ve->base.uabi_class = sibling->uabi_class; 5570 snprintf(ve->base.name, sizeof(ve->base.name), 5571 "v%dx%d", ve->base.class, count); 5572 ve->base.context_size = sibling->context_size; 5573 5574 ve->base.add_active_request = 5575 sibling->add_active_request; 5576 ve->base.remove_active_request = 5577 sibling->remove_active_request; 5578 ve->base.emit_bb_start = sibling->emit_bb_start; 5579 ve->base.emit_flush = sibling->emit_flush; 5580 ve->base.emit_init_breadcrumb = 5581 sibling->emit_init_breadcrumb; 5582 ve->base.emit_fini_breadcrumb = 5583 sibling->emit_fini_breadcrumb; 5584 ve->base.emit_fini_breadcrumb_dw = 5585 sibling->emit_fini_breadcrumb_dw; 5586 ve->base.breadcrumbs = 5587 intel_breadcrumbs_get(sibling->breadcrumbs); 5588 5589 ve->base.flags |= sibling->flags; 5590 5591 ve->base.props.timeslice_duration_ms = 5592 sibling->props.timeslice_duration_ms; 5593 ve->base.props.preempt_timeout_ms = 5594 sibling->props.preempt_timeout_ms; 5595 } 5596 } 5597 5598 return &ve->context; 5599 5600 err_put: 5601 intel_context_put(&ve->context); 5602 return ERR_PTR(err); 5603 } 5604 5605 bool intel_guc_virtual_engine_has_heartbeat(const struct intel_engine_cs *ve) 5606 { 5607 struct intel_engine_cs *engine; 5608 intel_engine_mask_t tmp, mask = ve->mask; 5609 5610 for_each_engine_masked(engine, ve->gt, mask, tmp) 5611 if (READ_ONCE(engine->props.heartbeat_interval_ms)) 5612 return true; 5613 5614 return false; 5615 } 5616 5617 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) 5618 #include "selftest_guc.c" 5619 #include "selftest_guc_multi_lrc.c" 5620 #include "selftest_guc_hangcheck.c" 5621 #endif 5622