1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2019 Intel Corporation 4 */ 5 6 #include "gem/i915_gem_context.h" 7 #include "gem/i915_gem_pm.h" 8 9 #include "i915_drv.h" 10 #include "i915_trace.h" 11 12 #include "intel_context.h" 13 #include "intel_engine.h" 14 #include "intel_engine_pm.h" 15 #include "intel_ring.h" 16 17 static struct pool slab_ce; 18 19 static struct intel_context *intel_context_alloc(void) 20 { 21 #ifdef __linux__ 22 return kmem_cache_zalloc(slab_ce, GFP_KERNEL); 23 #else 24 return pool_get(&slab_ce, PR_WAITOK | PR_ZERO); 25 #endif 26 } 27 28 static void rcu_context_free(struct rcu_head *rcu) 29 { 30 struct intel_context *ce = container_of(rcu, typeof(*ce), rcu); 31 32 trace_intel_context_free(ce); 33 #ifdef __linux__ 34 kmem_cache_free(slab_ce, ce); 35 #else 36 pool_put(&slab_ce, ce); 37 #endif 38 } 39 40 void intel_context_free(struct intel_context *ce) 41 { 42 call_rcu(&ce->rcu, rcu_context_free); 43 } 44 45 struct intel_context * 46 intel_context_create(struct intel_engine_cs *engine) 47 { 48 struct intel_context *ce; 49 50 ce = intel_context_alloc(); 51 if (!ce) 52 return ERR_PTR(-ENOMEM); 53 54 intel_context_init(ce, engine); 55 trace_intel_context_create(ce); 56 return ce; 57 } 58 59 int intel_context_alloc_state(struct intel_context *ce) 60 { 61 int err = 0; 62 63 if (mutex_lock_interruptible(&ce->pin_mutex)) 64 return -EINTR; 65 66 if (!test_bit(CONTEXT_ALLOC_BIT, &ce->flags)) { 67 if (intel_context_is_banned(ce)) { 68 err = -EIO; 69 goto unlock; 70 } 71 72 err = ce->ops->alloc(ce); 73 if (unlikely(err)) 74 goto unlock; 75 76 set_bit(CONTEXT_ALLOC_BIT, &ce->flags); 77 } 78 79 unlock: 80 mutex_unlock(&ce->pin_mutex); 81 return err; 82 } 83 84 static int intel_context_active_acquire(struct intel_context *ce) 85 { 86 int err; 87 88 __i915_active_acquire(&ce->active); 89 90 if (intel_context_is_barrier(ce) || intel_engine_uses_guc(ce->engine) || 91 intel_context_is_parallel(ce)) 92 return 0; 93 94 /* Preallocate tracking nodes */ 95 err = i915_active_acquire_preallocate_barrier(&ce->active, 96 ce->engine); 97 if (err) 98 i915_active_release(&ce->active); 99 100 return err; 101 } 102 103 static void intel_context_active_release(struct intel_context *ce) 104 { 105 /* Nodes preallocated in intel_context_active() */ 106 i915_active_acquire_barrier(&ce->active); 107 i915_active_release(&ce->active); 108 } 109 110 static int __context_pin_state(struct i915_vma *vma, struct i915_gem_ww_ctx *ww) 111 { 112 unsigned int bias = i915_ggtt_pin_bias(vma) | PIN_OFFSET_BIAS; 113 int err; 114 115 err = i915_ggtt_pin(vma, ww, 0, bias | PIN_HIGH); 116 if (err) 117 return err; 118 119 err = i915_active_acquire(&vma->active); 120 if (err) 121 goto err_unpin; 122 123 /* 124 * And mark it as a globally pinned object to let the shrinker know 125 * it cannot reclaim the object until we release it. 126 */ 127 i915_vma_make_unshrinkable(vma); 128 vma->obj->mm.dirty = true; 129 130 return 0; 131 132 err_unpin: 133 i915_vma_unpin(vma); 134 return err; 135 } 136 137 static void __context_unpin_state(struct i915_vma *vma) 138 { 139 i915_vma_make_shrinkable(vma); 140 i915_active_release(&vma->active); 141 __i915_vma_unpin(vma); 142 } 143 144 static int __ring_active(struct intel_ring *ring, 145 struct i915_gem_ww_ctx *ww) 146 { 147 int err; 148 149 err = intel_ring_pin(ring, ww); 150 if (err) 151 return err; 152 153 err = i915_active_acquire(&ring->vma->active); 154 if (err) 155 goto err_pin; 156 157 return 0; 158 159 err_pin: 160 intel_ring_unpin(ring); 161 return err; 162 } 163 164 static void __ring_retire(struct intel_ring *ring) 165 { 166 i915_active_release(&ring->vma->active); 167 intel_ring_unpin(ring); 168 } 169 170 static int intel_context_pre_pin(struct intel_context *ce, 171 struct i915_gem_ww_ctx *ww) 172 { 173 int err; 174 175 CE_TRACE(ce, "active\n"); 176 177 err = __ring_active(ce->ring, ww); 178 if (err) 179 return err; 180 181 err = intel_timeline_pin(ce->timeline, ww); 182 if (err) 183 goto err_ring; 184 185 if (!ce->state) 186 return 0; 187 188 err = __context_pin_state(ce->state, ww); 189 if (err) 190 goto err_timeline; 191 192 193 return 0; 194 195 err_timeline: 196 intel_timeline_unpin(ce->timeline); 197 err_ring: 198 __ring_retire(ce->ring); 199 return err; 200 } 201 202 static void intel_context_post_unpin(struct intel_context *ce) 203 { 204 if (ce->state) 205 __context_unpin_state(ce->state); 206 207 intel_timeline_unpin(ce->timeline); 208 __ring_retire(ce->ring); 209 } 210 211 int __intel_context_do_pin_ww(struct intel_context *ce, 212 struct i915_gem_ww_ctx *ww) 213 { 214 bool handoff = false; 215 void *vaddr; 216 int err = 0; 217 218 if (unlikely(!test_bit(CONTEXT_ALLOC_BIT, &ce->flags))) { 219 err = intel_context_alloc_state(ce); 220 if (err) 221 return err; 222 } 223 224 /* 225 * We always pin the context/ring/timeline here, to ensure a pin 226 * refcount for __intel_context_active(), which prevent a lock 227 * inversion of ce->pin_mutex vs dma_resv_lock(). 228 */ 229 230 err = i915_gem_object_lock(ce->timeline->hwsp_ggtt->obj, ww); 231 if (!err) 232 err = i915_gem_object_lock(ce->ring->vma->obj, ww); 233 if (!err && ce->state) 234 err = i915_gem_object_lock(ce->state->obj, ww); 235 if (!err) 236 err = intel_context_pre_pin(ce, ww); 237 if (err) 238 return err; 239 240 err = ce->ops->pre_pin(ce, ww, &vaddr); 241 if (err) 242 goto err_ctx_unpin; 243 244 err = i915_active_acquire(&ce->active); 245 if (err) 246 goto err_post_unpin; 247 248 err = mutex_lock_interruptible(&ce->pin_mutex); 249 if (err) 250 goto err_release; 251 252 intel_engine_pm_might_get(ce->engine); 253 254 if (unlikely(intel_context_is_closed(ce))) { 255 err = -ENOENT; 256 goto err_unlock; 257 } 258 259 if (likely(!atomic_add_unless(&ce->pin_count, 1, 0))) { 260 err = intel_context_active_acquire(ce); 261 if (unlikely(err)) 262 goto err_unlock; 263 264 err = ce->ops->pin(ce, vaddr); 265 if (err) { 266 intel_context_active_release(ce); 267 goto err_unlock; 268 } 269 270 CE_TRACE(ce, "pin ring:{start:%08x, head:%04x, tail:%04x}\n", 271 i915_ggtt_offset(ce->ring->vma), 272 ce->ring->head, ce->ring->tail); 273 274 handoff = true; 275 smp_mb__before_atomic(); /* flush pin before it is visible */ 276 atomic_inc(&ce->pin_count); 277 } 278 279 GEM_BUG_ON(!intel_context_is_pinned(ce)); /* no overflow! */ 280 281 trace_intel_context_do_pin(ce); 282 283 err_unlock: 284 mutex_unlock(&ce->pin_mutex); 285 err_release: 286 i915_active_release(&ce->active); 287 err_post_unpin: 288 if (!handoff) 289 ce->ops->post_unpin(ce); 290 err_ctx_unpin: 291 intel_context_post_unpin(ce); 292 293 /* 294 * Unlock the hwsp_ggtt object since it's shared. 295 * In principle we can unlock all the global state locked above 296 * since it's pinned and doesn't need fencing, and will 297 * thus remain resident until it is explicitly unpinned. 298 */ 299 i915_gem_ww_unlock_single(ce->timeline->hwsp_ggtt->obj); 300 301 return err; 302 } 303 304 int __intel_context_do_pin(struct intel_context *ce) 305 { 306 struct i915_gem_ww_ctx ww; 307 int err; 308 309 i915_gem_ww_ctx_init(&ww, true); 310 retry: 311 err = __intel_context_do_pin_ww(ce, &ww); 312 if (err == -EDEADLK) { 313 err = i915_gem_ww_ctx_backoff(&ww); 314 if (!err) 315 goto retry; 316 } 317 i915_gem_ww_ctx_fini(&ww); 318 return err; 319 } 320 321 void __intel_context_do_unpin(struct intel_context *ce, int sub) 322 { 323 if (!atomic_sub_and_test(sub, &ce->pin_count)) 324 return; 325 326 CE_TRACE(ce, "unpin\n"); 327 ce->ops->unpin(ce); 328 ce->ops->post_unpin(ce); 329 330 /* 331 * Once released, we may asynchronously drop the active reference. 332 * As that may be the only reference keeping the context alive, 333 * take an extra now so that it is not freed before we finish 334 * dereferencing it. 335 */ 336 intel_context_get(ce); 337 intel_context_active_release(ce); 338 trace_intel_context_do_unpin(ce); 339 intel_context_put(ce); 340 } 341 342 static void __intel_context_retire(struct i915_active *active) 343 { 344 struct intel_context *ce = container_of(active, typeof(*ce), active); 345 346 CE_TRACE(ce, "retire runtime: { total:%lluns, avg:%lluns }\n", 347 intel_context_get_total_runtime_ns(ce), 348 intel_context_get_avg_runtime_ns(ce)); 349 350 set_bit(CONTEXT_VALID_BIT, &ce->flags); 351 intel_context_post_unpin(ce); 352 intel_context_put(ce); 353 } 354 355 static int __intel_context_active(struct i915_active *active) 356 { 357 struct intel_context *ce = container_of(active, typeof(*ce), active); 358 359 intel_context_get(ce); 360 361 /* everything should already be activated by intel_context_pre_pin() */ 362 GEM_WARN_ON(!i915_active_acquire_if_busy(&ce->ring->vma->active)); 363 __intel_ring_pin(ce->ring); 364 365 __intel_timeline_pin(ce->timeline); 366 367 if (ce->state) { 368 GEM_WARN_ON(!i915_active_acquire_if_busy(&ce->state->active)); 369 __i915_vma_pin(ce->state); 370 i915_vma_make_unshrinkable(ce->state); 371 } 372 373 return 0; 374 } 375 376 static int 377 sw_fence_dummy_notify(struct i915_sw_fence *sf, 378 enum i915_sw_fence_notify state) 379 { 380 return NOTIFY_DONE; 381 } 382 383 void 384 intel_context_init(struct intel_context *ce, struct intel_engine_cs *engine) 385 { 386 GEM_BUG_ON(!engine->cops); 387 GEM_BUG_ON(!engine->gt->vm); 388 389 kref_init(&ce->ref); 390 391 ce->engine = engine; 392 ce->ops = engine->cops; 393 ce->sseu = engine->sseu; 394 ce->ring = NULL; 395 ce->ring_size = SZ_4K; 396 397 ewma_runtime_init(&ce->stats.runtime.avg); 398 399 ce->vm = i915_vm_get(engine->gt->vm); 400 401 /* NB ce->signal_link/lock is used under RCU */ 402 mtx_init(&ce->signal_lock, IPL_NONE); 403 INIT_LIST_HEAD(&ce->signals); 404 405 rw_init(&ce->pin_mutex, "cepin"); 406 407 mtx_init(&ce->guc_state.lock, IPL_TTY); 408 INIT_LIST_HEAD(&ce->guc_state.fences); 409 INIT_LIST_HEAD(&ce->guc_state.requests); 410 411 ce->guc_id.id = GUC_INVALID_CONTEXT_ID; 412 INIT_LIST_HEAD(&ce->guc_id.link); 413 414 INIT_LIST_HEAD(&ce->destroyed_link); 415 416 INIT_LIST_HEAD(&ce->parallel.child_list); 417 418 /* 419 * Initialize fence to be complete as this is expected to be complete 420 * unless there is a pending schedule disable outstanding. 421 */ 422 i915_sw_fence_init(&ce->guc_state.blocked, 423 sw_fence_dummy_notify); 424 i915_sw_fence_commit(&ce->guc_state.blocked); 425 426 i915_active_init(&ce->active, 427 __intel_context_active, __intel_context_retire, 0); 428 } 429 430 void intel_context_fini(struct intel_context *ce) 431 { 432 struct intel_context *child, *next; 433 434 if (ce->timeline) 435 intel_timeline_put(ce->timeline); 436 i915_vm_put(ce->vm); 437 438 /* Need to put the creation ref for the children */ 439 if (intel_context_is_parent(ce)) 440 for_each_child_safe(ce, child, next) 441 intel_context_put(child); 442 443 mutex_destroy(&ce->pin_mutex); 444 i915_active_fini(&ce->active); 445 i915_sw_fence_fini(&ce->guc_state.blocked); 446 } 447 448 void i915_context_module_exit(void) 449 { 450 #ifdef __linux__ 451 kmem_cache_destroy(slab_ce); 452 #else 453 pool_destroy(&slab_ce); 454 #endif 455 } 456 457 int __init i915_context_module_init(void) 458 { 459 #ifdef __linux__ 460 slab_ce = KMEM_CACHE(intel_context, SLAB_HWCACHE_ALIGN); 461 if (!slab_ce) 462 return -ENOMEM; 463 #else 464 pool_init(&slab_ce, sizeof(struct intel_context), 465 CACHELINESIZE, IPL_TTY, 0, "ictx", NULL); 466 #endif 467 468 return 0; 469 } 470 471 void intel_context_enter_engine(struct intel_context *ce) 472 { 473 intel_engine_pm_get(ce->engine); 474 intel_timeline_enter(ce->timeline); 475 } 476 477 void intel_context_exit_engine(struct intel_context *ce) 478 { 479 intel_timeline_exit(ce->timeline); 480 intel_engine_pm_put(ce->engine); 481 } 482 483 int intel_context_prepare_remote_request(struct intel_context *ce, 484 struct i915_request *rq) 485 { 486 struct intel_timeline *tl = ce->timeline; 487 int err; 488 489 /* Only suitable for use in remotely modifying this context */ 490 GEM_BUG_ON(rq->context == ce); 491 492 if (rcu_access_pointer(rq->timeline) != tl) { /* timeline sharing! */ 493 /* Queue this switch after current activity by this context. */ 494 err = i915_active_fence_set(&tl->last_request, rq); 495 if (err) 496 return err; 497 } 498 499 /* 500 * Guarantee context image and the timeline remains pinned until the 501 * modifying request is retired by setting the ce activity tracker. 502 * 503 * But we only need to take one pin on the account of it. Or in other 504 * words transfer the pinned ce object to tracked active request. 505 */ 506 GEM_BUG_ON(i915_active_is_idle(&ce->active)); 507 return i915_active_add_request(&ce->active, rq); 508 } 509 510 struct i915_request *intel_context_create_request(struct intel_context *ce) 511 { 512 struct i915_gem_ww_ctx ww; 513 struct i915_request *rq; 514 int err; 515 516 i915_gem_ww_ctx_init(&ww, true); 517 retry: 518 err = intel_context_pin_ww(ce, &ww); 519 if (!err) { 520 rq = i915_request_create(ce); 521 intel_context_unpin(ce); 522 } else if (err == -EDEADLK) { 523 err = i915_gem_ww_ctx_backoff(&ww); 524 if (!err) 525 goto retry; 526 rq = ERR_PTR(err); 527 } else { 528 rq = ERR_PTR(err); 529 } 530 531 i915_gem_ww_ctx_fini(&ww); 532 533 if (IS_ERR(rq)) 534 return rq; 535 536 /* 537 * timeline->mutex should be the inner lock, but is used as outer lock. 538 * Hack around this to shut up lockdep in selftests.. 539 */ 540 lockdep_unpin_lock(&ce->timeline->mutex, rq->cookie); 541 mutex_release(&ce->timeline->mutex.dep_map, _RET_IP_); 542 mutex_acquire(&ce->timeline->mutex.dep_map, SINGLE_DEPTH_NESTING, 0, _RET_IP_); 543 rq->cookie = lockdep_pin_lock(&ce->timeline->mutex); 544 545 return rq; 546 } 547 548 struct i915_request *intel_context_get_active_request(struct intel_context *ce) 549 { 550 struct intel_context *parent = intel_context_to_parent(ce); 551 struct i915_request *rq, *active = NULL; 552 unsigned long flags; 553 554 GEM_BUG_ON(!intel_engine_uses_guc(ce->engine)); 555 556 /* 557 * We search the parent list to find an active request on the submitted 558 * context. The parent list contains the requests for all the contexts 559 * in the relationship so we have to do a compare of each request's 560 * context. 561 */ 562 spin_lock_irqsave(&parent->guc_state.lock, flags); 563 list_for_each_entry_reverse(rq, &parent->guc_state.requests, 564 sched.link) { 565 if (rq->context != ce) 566 continue; 567 if (i915_request_completed(rq)) 568 break; 569 570 active = rq; 571 } 572 if (active) 573 active = i915_request_get_rcu(active); 574 spin_unlock_irqrestore(&parent->guc_state.lock, flags); 575 576 return active; 577 } 578 579 void intel_context_bind_parent_child(struct intel_context *parent, 580 struct intel_context *child) 581 { 582 /* 583 * Callers responsibility to validate that this function is used 584 * correctly but we use GEM_BUG_ON here ensure that they do. 585 */ 586 GEM_BUG_ON(intel_context_is_pinned(parent)); 587 GEM_BUG_ON(intel_context_is_child(parent)); 588 GEM_BUG_ON(intel_context_is_pinned(child)); 589 GEM_BUG_ON(intel_context_is_child(child)); 590 GEM_BUG_ON(intel_context_is_parent(child)); 591 592 parent->parallel.child_index = parent->parallel.number_children++; 593 list_add_tail(&child->parallel.child_link, 594 &parent->parallel.child_list); 595 child->parallel.parent = parent; 596 } 597 598 u64 intel_context_get_total_runtime_ns(struct intel_context *ce) 599 { 600 u64 total, active; 601 602 if (ce->ops->update_stats) 603 ce->ops->update_stats(ce); 604 605 total = ce->stats.runtime.total; 606 if (ce->ops->flags & COPS_RUNTIME_CYCLES) 607 total *= ce->engine->gt->clock_period_ns; 608 609 active = READ_ONCE(ce->stats.active); 610 if (active) 611 active = intel_context_clock() - active; 612 613 return total + active; 614 } 615 616 u64 intel_context_get_avg_runtime_ns(struct intel_context *ce) 617 { 618 u64 avg = ewma_runtime_read(&ce->stats.runtime.avg); 619 620 if (ce->ops->flags & COPS_RUNTIME_CYCLES) 621 avg *= ce->engine->gt->clock_period_ns; 622 623 return avg; 624 } 625 626 bool intel_context_ban(struct intel_context *ce, struct i915_request *rq) 627 { 628 bool ret = intel_context_set_banned(ce); 629 630 trace_intel_context_ban(ce); 631 632 if (ce->ops->revoke) 633 ce->ops->revoke(ce, rq, 634 INTEL_CONTEXT_BANNED_PREEMPT_TIMEOUT_MS); 635 636 return ret; 637 } 638 639 bool intel_context_revoke(struct intel_context *ce) 640 { 641 bool ret = intel_context_set_exiting(ce); 642 643 if (ce->ops->revoke) 644 ce->ops->revoke(ce, NULL, ce->engine->props.preempt_timeout_ms); 645 646 return ret; 647 } 648 649 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) 650 #include "selftest_context.c" 651 #endif 652