1 /* 2 * Copyright © 2011-2012 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Ben Widawsky <ben@bwidawsk.net> 25 * 26 */ 27 28 /* 29 * This file implements HW context support. On gen5+ a HW context consists of an 30 * opaque GPU object which is referenced at times of context saves and restores. 31 * With RC6 enabled, the context is also referenced as the GPU enters and exists 32 * from RC6 (GPU has it's own internal power context, except on gen5). Though 33 * something like a context does exist for the media ring, the code only 34 * supports contexts for the render ring. 35 * 36 * In software, there is a distinction between contexts created by the user, 37 * and the default HW context. The default HW context is used by GPU clients 38 * that do not request setup of their own hardware context. The default 39 * context's state is never restored to help prevent programming errors. This 40 * would happen if a client ran and piggy-backed off another clients GPU state. 41 * The default context only exists to give the GPU some offset to load as the 42 * current to invoke a save of the context we actually care about. In fact, the 43 * code could likely be constructed, albeit in a more complicated fashion, to 44 * never use the default context, though that limits the driver's ability to 45 * swap out, and/or destroy other contexts. 46 * 47 * All other contexts are created as a request by the GPU client. These contexts 48 * store GPU state, and thus allow GPU clients to not re-emit state (and 49 * potentially query certain state) at any time. The kernel driver makes 50 * certain that the appropriate commands are inserted. 51 * 52 * The context life cycle is semi-complicated in that context BOs may live 53 * longer than the context itself because of the way the hardware, and object 54 * tracking works. Below is a very crude representation of the state machine 55 * describing the context life. 56 * refcount pincount active 57 * S0: initial state 0 0 0 58 * S1: context created 1 0 0 59 * S2: context is currently running 2 1 X 60 * S3: GPU referenced, but not current 2 0 1 61 * S4: context is current, but destroyed 1 1 0 62 * S5: like S3, but destroyed 1 0 1 63 * 64 * The most common (but not all) transitions: 65 * S0->S1: client creates a context 66 * S1->S2: client submits execbuf with context 67 * S2->S3: other clients submits execbuf with context 68 * S3->S1: context object was retired 69 * S3->S2: clients submits another execbuf 70 * S2->S4: context destroy called with current context 71 * S3->S5->S0: destroy path 72 * S4->S5->S0: destroy path on current context 73 * 74 * There are two confusing terms used above: 75 * The "current context" means the context which is currently running on the 76 * GPU. The GPU has loaded its state already and has stored away the gtt 77 * offset of the BO. The GPU is not actively referencing the data at this 78 * offset, but it will on the next context switch. The only way to avoid this 79 * is to do a GPU reset. 80 * 81 * An "active context' is one which was previously the "current context" and is 82 * on the active list waiting for the next context switch to occur. Until this 83 * happens, the object must remain at the same gtt offset. It is therefore 84 * possible to destroy a context, but it is still active. 85 * 86 */ 87 88 #include <drm/drmP.h> 89 #include <drm/i915_drm.h> 90 #include "i915_drv.h" 91 #include "i915_trace.h" 92 93 #define ALL_L3_SLICES(dev) (1 << NUM_L3_SLICES(dev)) - 1 94 95 /* This is a HW constraint. The value below is the largest known requirement 96 * I've seen in a spec to date, and that was a workaround for a non-shipping 97 * part. It should be safe to decrease this, but it's more future proof as is. 98 */ 99 #define GEN6_CONTEXT_ALIGN (64<<10) 100 #define GEN7_CONTEXT_ALIGN 4096 101 102 static size_t get_context_alignment(struct drm_i915_private *dev_priv) 103 { 104 if (IS_GEN6(dev_priv)) 105 return GEN6_CONTEXT_ALIGN; 106 107 return GEN7_CONTEXT_ALIGN; 108 } 109 110 static int get_context_size(struct drm_i915_private *dev_priv) 111 { 112 int ret; 113 u32 reg; 114 115 switch (INTEL_GEN(dev_priv)) { 116 case 6: 117 reg = I915_READ(CXT_SIZE); 118 ret = GEN6_CXT_TOTAL_SIZE(reg) * 64; 119 break; 120 case 7: 121 reg = I915_READ(GEN7_CXT_SIZE); 122 if (IS_HASWELL(dev_priv)) 123 ret = HSW_CXT_TOTAL_SIZE; 124 else 125 ret = GEN7_CXT_TOTAL_SIZE(reg) * 64; 126 break; 127 case 8: 128 ret = GEN8_CXT_TOTAL_SIZE; 129 break; 130 default: 131 BUG(); 132 } 133 134 return ret; 135 } 136 137 void i915_gem_context_free(struct kref *ctx_ref) 138 { 139 struct i915_gem_context *ctx = container_of(ctx_ref, typeof(*ctx), ref); 140 int i; 141 142 lockdep_assert_held(&ctx->i915->drm.struct_mutex); 143 trace_i915_context_free(ctx); 144 GEM_BUG_ON(!ctx->closed); 145 146 i915_ppgtt_put(ctx->ppgtt); 147 148 for (i = 0; i < I915_NUM_ENGINES; i++) { 149 struct intel_context *ce = &ctx->engine[i]; 150 151 if (!ce->state) 152 continue; 153 154 WARN_ON(ce->pin_count); 155 if (ce->ring) 156 intel_ring_free(ce->ring); 157 158 i915_gem_object_put(ce->state); 159 } 160 161 list_del(&ctx->link); 162 163 ida_simple_remove(&ctx->i915->context_hw_ida, ctx->hw_id); 164 kfree(ctx); 165 } 166 167 struct drm_i915_gem_object * 168 i915_gem_alloc_context_obj(struct drm_device *dev, size_t size) 169 { 170 struct drm_i915_gem_object *obj; 171 int ret; 172 173 lockdep_assert_held(&dev->struct_mutex); 174 175 obj = i915_gem_object_create(dev, size); 176 if (IS_ERR(obj)) 177 return obj; 178 179 /* 180 * Try to make the context utilize L3 as well as LLC. 181 * 182 * On VLV we don't have L3 controls in the PTEs so we 183 * shouldn't touch the cache level, especially as that 184 * would make the object snooped which might have a 185 * negative performance impact. 186 * 187 * Snooping is required on non-llc platforms in execlist 188 * mode, but since all GGTT accesses use PAT entry 0 we 189 * get snooping anyway regardless of cache_level. 190 * 191 * This is only applicable for Ivy Bridge devices since 192 * later platforms don't have L3 control bits in the PTE. 193 */ 194 if (IS_IVYBRIDGE(dev)) { 195 ret = i915_gem_object_set_cache_level(obj, I915_CACHE_L3_LLC); 196 /* Failure shouldn't ever happen this early */ 197 if (WARN_ON(ret)) { 198 i915_gem_object_put(obj); 199 return ERR_PTR(ret); 200 } 201 } 202 203 return obj; 204 } 205 206 static void i915_ppgtt_close(struct i915_address_space *vm) 207 { 208 struct list_head *phases[] = { 209 &vm->active_list, 210 &vm->inactive_list, 211 &vm->unbound_list, 212 NULL, 213 }, **phase; 214 215 GEM_BUG_ON(vm->closed); 216 vm->closed = true; 217 218 for (phase = phases; *phase; phase++) { 219 struct i915_vma *vma, *vn; 220 221 list_for_each_entry_safe(vma, vn, *phase, vm_link) 222 if (!i915_vma_is_closed(vma)) 223 i915_vma_close(vma); 224 } 225 } 226 227 static void context_close(struct i915_gem_context *ctx) 228 { 229 GEM_BUG_ON(ctx->closed); 230 ctx->closed = true; 231 if (ctx->ppgtt) 232 i915_ppgtt_close(&ctx->ppgtt->base); 233 ctx->file_priv = ERR_PTR(-EBADF); 234 i915_gem_context_put(ctx); 235 } 236 237 static int assign_hw_id(struct drm_i915_private *dev_priv, unsigned *out) 238 { 239 int ret; 240 241 ret = ida_simple_get(&dev_priv->context_hw_ida, 242 0, MAX_CONTEXT_HW_ID, GFP_KERNEL); 243 if (ret < 0) { 244 /* Contexts are only released when no longer active. 245 * Flush any pending retires to hopefully release some 246 * stale contexts and try again. 247 */ 248 i915_gem_retire_requests(dev_priv); 249 ret = ida_simple_get(&dev_priv->context_hw_ida, 250 0, MAX_CONTEXT_HW_ID, GFP_KERNEL); 251 if (ret < 0) 252 return ret; 253 } 254 255 *out = ret; 256 return 0; 257 } 258 259 static struct i915_gem_context * 260 __create_hw_context(struct drm_device *dev, 261 struct drm_i915_file_private *file_priv) 262 { 263 struct drm_i915_private *dev_priv = to_i915(dev); 264 struct i915_gem_context *ctx; 265 int ret; 266 267 ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); 268 if (ctx == NULL) 269 return ERR_PTR(-ENOMEM); 270 271 ret = assign_hw_id(dev_priv, &ctx->hw_id); 272 if (ret) { 273 kfree(ctx); 274 return ERR_PTR(ret); 275 } 276 277 kref_init(&ctx->ref); 278 list_add_tail(&ctx->link, &dev_priv->context_list); 279 ctx->i915 = dev_priv; 280 281 ctx->ggtt_alignment = get_context_alignment(dev_priv); 282 283 if (dev_priv->hw_context_size) { 284 struct drm_i915_gem_object *obj = 285 i915_gem_alloc_context_obj(dev, dev_priv->hw_context_size); 286 if (IS_ERR(obj)) { 287 ret = PTR_ERR(obj); 288 goto err_out; 289 } 290 ctx->engine[RCS].state = obj; 291 } 292 293 /* Default context will never have a file_priv */ 294 if (file_priv != NULL) { 295 ret = idr_alloc(&file_priv->context_idr, ctx, 296 DEFAULT_CONTEXT_HANDLE, 0, GFP_KERNEL); 297 if (ret < 0) 298 goto err_out; 299 } else 300 ret = DEFAULT_CONTEXT_HANDLE; 301 302 ctx->file_priv = file_priv; 303 ctx->user_handle = ret; 304 /* NB: Mark all slices as needing a remap so that when the context first 305 * loads it will restore whatever remap state already exists. If there 306 * is no remap info, it will be a NOP. */ 307 ctx->remap_slice = ALL_L3_SLICES(dev_priv); 308 309 ctx->hang_stats.ban_period_seconds = DRM_I915_CTX_BAN_PERIOD; 310 ctx->ring_size = 4 * PAGE_SIZE; 311 ctx->desc_template = GEN8_CTX_ADDRESSING_MODE(dev_priv) << 312 GEN8_CTX_ADDRESSING_MODE_SHIFT; 313 ATOMIC_INIT_NOTIFIER_HEAD(&ctx->status_notifier); 314 315 return ctx; 316 317 err_out: 318 context_close(ctx); 319 return ERR_PTR(ret); 320 } 321 322 /** 323 * The default context needs to exist per ring that uses contexts. It stores the 324 * context state of the GPU for applications that don't utilize HW contexts, as 325 * well as an idle case. 326 */ 327 static struct i915_gem_context * 328 i915_gem_create_context(struct drm_device *dev, 329 struct drm_i915_file_private *file_priv) 330 { 331 struct i915_gem_context *ctx; 332 333 lockdep_assert_held(&dev->struct_mutex); 334 335 ctx = __create_hw_context(dev, file_priv); 336 if (IS_ERR(ctx)) 337 return ctx; 338 339 if (USES_FULL_PPGTT(dev)) { 340 struct i915_hw_ppgtt *ppgtt = 341 i915_ppgtt_create(to_i915(dev), file_priv); 342 343 if (IS_ERR(ppgtt)) { 344 DRM_DEBUG_DRIVER("PPGTT setup failed (%ld)\n", 345 PTR_ERR(ppgtt)); 346 idr_remove(&file_priv->context_idr, ctx->user_handle); 347 context_close(ctx); 348 return ERR_CAST(ppgtt); 349 } 350 351 ctx->ppgtt = ppgtt; 352 } 353 354 trace_i915_context_create(ctx); 355 356 return ctx; 357 } 358 359 /** 360 * i915_gem_context_create_gvt - create a GVT GEM context 361 * @dev: drm device * 362 * 363 * This function is used to create a GVT specific GEM context. 364 * 365 * Returns: 366 * pointer to i915_gem_context on success, error pointer if failed 367 * 368 */ 369 struct i915_gem_context * 370 i915_gem_context_create_gvt(struct drm_device *dev) 371 { 372 struct i915_gem_context *ctx; 373 int ret; 374 375 if (!IS_ENABLED(CONFIG_DRM_I915_GVT)) 376 return ERR_PTR(-ENODEV); 377 378 ret = i915_mutex_lock_interruptible(dev); 379 if (ret) 380 return ERR_PTR(ret); 381 382 ctx = i915_gem_create_context(dev, NULL); 383 if (IS_ERR(ctx)) 384 goto out; 385 386 ctx->execlists_force_single_submission = true; 387 ctx->ring_size = 512 * PAGE_SIZE; /* Max ring buffer size */ 388 out: 389 mutex_unlock(&dev->struct_mutex); 390 return ctx; 391 } 392 393 static void i915_gem_context_unpin(struct i915_gem_context *ctx, 394 struct intel_engine_cs *engine) 395 { 396 if (i915.enable_execlists) { 397 intel_lr_context_unpin(ctx, engine); 398 } else { 399 struct intel_context *ce = &ctx->engine[engine->id]; 400 401 if (ce->state) 402 i915_gem_object_ggtt_unpin(ce->state); 403 404 i915_gem_context_put(ctx); 405 } 406 } 407 408 void i915_gem_context_reset(struct drm_device *dev) 409 { 410 struct drm_i915_private *dev_priv = to_i915(dev); 411 412 lockdep_assert_held(&dev->struct_mutex); 413 414 if (i915.enable_execlists) { 415 struct i915_gem_context *ctx; 416 417 list_for_each_entry(ctx, &dev_priv->context_list, link) 418 intel_lr_context_reset(dev_priv, ctx); 419 } 420 421 i915_gem_context_lost(dev_priv); 422 } 423 424 int i915_gem_context_init(struct drm_device *dev) 425 { 426 struct drm_i915_private *dev_priv = to_i915(dev); 427 struct i915_gem_context *ctx; 428 429 /* Init should only be called once per module load. Eventually the 430 * restriction on the context_disabled check can be loosened. */ 431 if (WARN_ON(dev_priv->kernel_context)) 432 return 0; 433 434 if (intel_vgpu_active(dev_priv) && 435 HAS_LOGICAL_RING_CONTEXTS(dev_priv)) { 436 if (!i915.enable_execlists) { 437 DRM_INFO("Only EXECLIST mode is supported in vgpu.\n"); 438 return -EINVAL; 439 } 440 } 441 442 /* Using the simple ida interface, the max is limited by sizeof(int) */ 443 BUILD_BUG_ON(MAX_CONTEXT_HW_ID > INT_MAX); 444 ida_init(&dev_priv->context_hw_ida); 445 446 if (i915.enable_execlists) { 447 /* NB: intentionally left blank. We will allocate our own 448 * backing objects as we need them, thank you very much */ 449 dev_priv->hw_context_size = 0; 450 } else if (HAS_HW_CONTEXTS(dev_priv)) { 451 dev_priv->hw_context_size = 452 round_up(get_context_size(dev_priv), 4096); 453 if (dev_priv->hw_context_size > (1<<20)) { 454 DRM_DEBUG_DRIVER("Disabling HW Contexts; invalid size %d\n", 455 dev_priv->hw_context_size); 456 dev_priv->hw_context_size = 0; 457 } 458 } 459 460 ctx = i915_gem_create_context(dev, NULL); 461 if (IS_ERR(ctx)) { 462 DRM_ERROR("Failed to create default global context (error %ld)\n", 463 PTR_ERR(ctx)); 464 return PTR_ERR(ctx); 465 } 466 467 dev_priv->kernel_context = ctx; 468 469 DRM_DEBUG_DRIVER("%s context support initialized\n", 470 i915.enable_execlists ? "LR" : 471 dev_priv->hw_context_size ? "HW" : "fake"); 472 return 0; 473 } 474 475 void i915_gem_context_lost(struct drm_i915_private *dev_priv) 476 { 477 struct intel_engine_cs *engine; 478 479 lockdep_assert_held(&dev_priv->drm.struct_mutex); 480 481 for_each_engine(engine, dev_priv) { 482 if (engine->last_context) { 483 i915_gem_context_unpin(engine->last_context, engine); 484 engine->last_context = NULL; 485 } 486 } 487 488 /* Force the GPU state to be restored on enabling */ 489 if (!i915.enable_execlists) { 490 struct i915_gem_context *ctx; 491 492 list_for_each_entry(ctx, &dev_priv->context_list, link) { 493 if (!i915_gem_context_is_default(ctx)) 494 continue; 495 496 for_each_engine(engine, dev_priv) 497 ctx->engine[engine->id].initialised = false; 498 499 ctx->remap_slice = ALL_L3_SLICES(dev_priv); 500 } 501 502 for_each_engine(engine, dev_priv) { 503 struct intel_context *kce = 504 &dev_priv->kernel_context->engine[engine->id]; 505 506 kce->initialised = true; 507 } 508 } 509 } 510 511 void i915_gem_context_fini(struct drm_device *dev) 512 { 513 struct drm_i915_private *dev_priv = to_i915(dev); 514 struct i915_gem_context *dctx = dev_priv->kernel_context; 515 516 lockdep_assert_held(&dev->struct_mutex); 517 518 context_close(dctx); 519 dev_priv->kernel_context = NULL; 520 521 ida_destroy(&dev_priv->context_hw_ida); 522 } 523 524 static int context_idr_cleanup(int id, void *p, void *data) 525 { 526 struct i915_gem_context *ctx = p; 527 528 context_close(ctx); 529 return 0; 530 } 531 532 int i915_gem_context_open(struct drm_device *dev, struct drm_file *file) 533 { 534 struct drm_i915_file_private *file_priv = file->driver_priv; 535 struct i915_gem_context *ctx; 536 537 idr_init(&file_priv->context_idr); 538 539 mutex_lock(&dev->struct_mutex); 540 ctx = i915_gem_create_context(dev, file_priv); 541 mutex_unlock(&dev->struct_mutex); 542 543 if (IS_ERR(ctx)) { 544 idr_destroy(&file_priv->context_idr); 545 return PTR_ERR(ctx); 546 } 547 548 return 0; 549 } 550 551 void i915_gem_context_close(struct drm_device *dev, struct drm_file *file) 552 { 553 struct drm_i915_file_private *file_priv = file->driver_priv; 554 555 lockdep_assert_held(&dev->struct_mutex); 556 557 idr_for_each(&file_priv->context_idr, context_idr_cleanup, NULL); 558 idr_destroy(&file_priv->context_idr); 559 } 560 561 static inline int 562 mi_set_context(struct drm_i915_gem_request *req, u32 hw_flags) 563 { 564 struct drm_i915_private *dev_priv = req->i915; 565 struct intel_ring *ring = req->ring; 566 struct intel_engine_cs *engine = req->engine; 567 u32 flags = hw_flags | MI_MM_SPACE_GTT; 568 const int num_rings = 569 /* Use an extended w/a on ivb+ if signalling from other rings */ 570 i915.semaphores ? 571 hweight32(INTEL_INFO(dev_priv)->ring_mask) - 1 : 572 0; 573 int len, ret; 574 575 /* w/a: If Flush TLB Invalidation Mode is enabled, driver must do a TLB 576 * invalidation prior to MI_SET_CONTEXT. On GEN6 we don't set the value 577 * explicitly, so we rely on the value at ring init, stored in 578 * itlb_before_ctx_switch. 579 */ 580 if (IS_GEN6(dev_priv)) { 581 ret = engine->emit_flush(req, EMIT_INVALIDATE); 582 if (ret) 583 return ret; 584 } 585 586 /* These flags are for resource streamer on HSW+ */ 587 if (IS_HASWELL(dev_priv) || INTEL_GEN(dev_priv) >= 8) 588 flags |= (HSW_MI_RS_SAVE_STATE_EN | HSW_MI_RS_RESTORE_STATE_EN); 589 else if (INTEL_GEN(dev_priv) < 8) 590 flags |= (MI_SAVE_EXT_STATE_EN | MI_RESTORE_EXT_STATE_EN); 591 592 593 len = 4; 594 if (INTEL_GEN(dev_priv) >= 7) 595 len += 2 + (num_rings ? 4*num_rings + 6 : 0); 596 597 ret = intel_ring_begin(req, len); 598 if (ret) 599 return ret; 600 601 /* WaProgramMiArbOnOffAroundMiSetContext:ivb,vlv,hsw,bdw,chv */ 602 if (INTEL_GEN(dev_priv) >= 7) { 603 intel_ring_emit(ring, MI_ARB_ON_OFF | MI_ARB_DISABLE); 604 if (num_rings) { 605 struct intel_engine_cs *signaller; 606 607 intel_ring_emit(ring, 608 MI_LOAD_REGISTER_IMM(num_rings)); 609 for_each_engine(signaller, dev_priv) { 610 if (signaller == engine) 611 continue; 612 613 intel_ring_emit_reg(ring, 614 RING_PSMI_CTL(signaller->mmio_base)); 615 intel_ring_emit(ring, 616 _MASKED_BIT_ENABLE(GEN6_PSMI_SLEEP_MSG_DISABLE)); 617 } 618 } 619 } 620 621 intel_ring_emit(ring, MI_NOOP); 622 intel_ring_emit(ring, MI_SET_CONTEXT); 623 intel_ring_emit(ring, 624 i915_gem_obj_ggtt_offset(req->ctx->engine[RCS].state) | 625 flags); 626 /* 627 * w/a: MI_SET_CONTEXT must always be followed by MI_NOOP 628 * WaMiSetContext_Hang:snb,ivb,vlv 629 */ 630 intel_ring_emit(ring, MI_NOOP); 631 632 if (INTEL_GEN(dev_priv) >= 7) { 633 if (num_rings) { 634 struct intel_engine_cs *signaller; 635 i915_reg_t last_reg = {}; /* keep gcc quiet */ 636 637 intel_ring_emit(ring, 638 MI_LOAD_REGISTER_IMM(num_rings)); 639 for_each_engine(signaller, dev_priv) { 640 if (signaller == engine) 641 continue; 642 643 last_reg = RING_PSMI_CTL(signaller->mmio_base); 644 intel_ring_emit_reg(ring, last_reg); 645 intel_ring_emit(ring, 646 _MASKED_BIT_DISABLE(GEN6_PSMI_SLEEP_MSG_DISABLE)); 647 } 648 649 /* Insert a delay before the next switch! */ 650 intel_ring_emit(ring, 651 MI_STORE_REGISTER_MEM | 652 MI_SRM_LRM_GLOBAL_GTT); 653 intel_ring_emit_reg(ring, last_reg); 654 intel_ring_emit(ring, engine->scratch.gtt_offset); 655 intel_ring_emit(ring, MI_NOOP); 656 } 657 intel_ring_emit(ring, MI_ARB_ON_OFF | MI_ARB_ENABLE); 658 } 659 660 intel_ring_advance(ring); 661 662 return ret; 663 } 664 665 static int remap_l3(struct drm_i915_gem_request *req, int slice) 666 { 667 u32 *remap_info = req->i915->l3_parity.remap_info[slice]; 668 struct intel_ring *ring = req->ring; 669 int i, ret; 670 671 if (!remap_info) 672 return 0; 673 674 ret = intel_ring_begin(req, GEN7_L3LOG_SIZE/4 * 2 + 2); 675 if (ret) 676 return ret; 677 678 /* 679 * Note: We do not worry about the concurrent register cacheline hang 680 * here because no other code should access these registers other than 681 * at initialization time. 682 */ 683 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(GEN7_L3LOG_SIZE/4)); 684 for (i = 0; i < GEN7_L3LOG_SIZE/4; i++) { 685 intel_ring_emit_reg(ring, GEN7_L3LOG(slice, i)); 686 intel_ring_emit(ring, remap_info[i]); 687 } 688 intel_ring_emit(ring, MI_NOOP); 689 intel_ring_advance(ring); 690 691 return 0; 692 } 693 694 static inline bool skip_rcs_switch(struct i915_hw_ppgtt *ppgtt, 695 struct intel_engine_cs *engine, 696 struct i915_gem_context *to) 697 { 698 if (to->remap_slice) 699 return false; 700 701 if (!to->engine[RCS].initialised) 702 return false; 703 704 if (ppgtt && (intel_engine_flag(engine) & ppgtt->pd_dirty_rings)) 705 return false; 706 707 return to == engine->last_context; 708 } 709 710 static bool 711 needs_pd_load_pre(struct i915_hw_ppgtt *ppgtt, 712 struct intel_engine_cs *engine, 713 struct i915_gem_context *to) 714 { 715 if (!ppgtt) 716 return false; 717 718 /* Always load the ppgtt on first use */ 719 if (!engine->last_context) 720 return true; 721 722 /* Same context without new entries, skip */ 723 if (engine->last_context == to && 724 !(intel_engine_flag(engine) & ppgtt->pd_dirty_rings)) 725 return false; 726 727 if (engine->id != RCS) 728 return true; 729 730 if (INTEL_GEN(engine->i915) < 8) 731 return true; 732 733 return false; 734 } 735 736 static bool 737 needs_pd_load_post(struct i915_hw_ppgtt *ppgtt, 738 struct i915_gem_context *to, 739 u32 hw_flags) 740 { 741 if (!ppgtt) 742 return false; 743 744 if (!IS_GEN8(to->i915)) 745 return false; 746 747 if (hw_flags & MI_RESTORE_INHIBIT) 748 return true; 749 750 return false; 751 } 752 753 static int do_rcs_switch(struct drm_i915_gem_request *req) 754 { 755 struct i915_gem_context *to = req->ctx; 756 struct intel_engine_cs *engine = req->engine; 757 struct i915_hw_ppgtt *ppgtt = to->ppgtt ?: req->i915->mm.aliasing_ppgtt; 758 struct i915_gem_context *from; 759 u32 hw_flags; 760 int ret, i; 761 762 if (skip_rcs_switch(ppgtt, engine, to)) 763 return 0; 764 765 /* Trying to pin first makes error handling easier. */ 766 ret = i915_gem_object_ggtt_pin(to->engine[RCS].state, NULL, 0, 767 to->ggtt_alignment, 0); 768 if (ret) 769 return ret; 770 771 /* 772 * Pin can switch back to the default context if we end up calling into 773 * evict_everything - as a last ditch gtt defrag effort that also 774 * switches to the default context. Hence we need to reload from here. 775 * 776 * XXX: Doing so is painfully broken! 777 */ 778 from = engine->last_context; 779 780 /* 781 * Clear this page out of any CPU caches for coherent swap-in/out. Note 782 * that thanks to write = false in this call and us not setting any gpu 783 * write domains when putting a context object onto the active list 784 * (when switching away from it), this won't block. 785 * 786 * XXX: We need a real interface to do this instead of trickery. 787 */ 788 ret = i915_gem_object_set_to_gtt_domain(to->engine[RCS].state, false); 789 if (ret) 790 goto unpin_out; 791 792 if (needs_pd_load_pre(ppgtt, engine, to)) { 793 /* Older GENs and non render rings still want the load first, 794 * "PP_DCLV followed by PP_DIR_BASE register through Load 795 * Register Immediate commands in Ring Buffer before submitting 796 * a context."*/ 797 trace_switch_mm(engine, to); 798 ret = ppgtt->switch_mm(ppgtt, req); 799 if (ret) 800 goto unpin_out; 801 } 802 803 if (!to->engine[RCS].initialised || i915_gem_context_is_default(to)) 804 /* NB: If we inhibit the restore, the context is not allowed to 805 * die because future work may end up depending on valid address 806 * space. This means we must enforce that a page table load 807 * occur when this occurs. */ 808 hw_flags = MI_RESTORE_INHIBIT; 809 else if (ppgtt && intel_engine_flag(engine) & ppgtt->pd_dirty_rings) 810 hw_flags = MI_FORCE_RESTORE; 811 else 812 hw_flags = 0; 813 814 if (to != from || (hw_flags & MI_FORCE_RESTORE)) { 815 ret = mi_set_context(req, hw_flags); 816 if (ret) 817 goto unpin_out; 818 } 819 820 /* The backing object for the context is done after switching to the 821 * *next* context. Therefore we cannot retire the previous context until 822 * the next context has already started running. In fact, the below code 823 * is a bit suboptimal because the retiring can occur simply after the 824 * MI_SET_CONTEXT instead of when the next seqno has completed. 825 */ 826 if (from != NULL) { 827 struct drm_i915_gem_object *obj = from->engine[RCS].state; 828 829 /* As long as MI_SET_CONTEXT is serializing, ie. it flushes the 830 * whole damn pipeline, we don't need to explicitly mark the 831 * object dirty. The only exception is that the context must be 832 * correct in case the object gets swapped out. Ideally we'd be 833 * able to defer doing this until we know the object would be 834 * swapped, but there is no way to do that yet. 835 */ 836 obj->base.read_domains = I915_GEM_DOMAIN_INSTRUCTION; 837 i915_vma_move_to_active(i915_gem_obj_to_ggtt(obj), req, 0); 838 839 /* obj is kept alive until the next request by its active ref */ 840 i915_gem_object_ggtt_unpin(obj); 841 i915_gem_context_put(from); 842 } 843 engine->last_context = i915_gem_context_get(to); 844 845 /* GEN8 does *not* require an explicit reload if the PDPs have been 846 * setup, and we do not wish to move them. 847 */ 848 if (needs_pd_load_post(ppgtt, to, hw_flags)) { 849 trace_switch_mm(engine, to); 850 ret = ppgtt->switch_mm(ppgtt, req); 851 /* The hardware context switch is emitted, but we haven't 852 * actually changed the state - so it's probably safe to bail 853 * here. Still, let the user know something dangerous has 854 * happened. 855 */ 856 if (ret) 857 return ret; 858 } 859 860 if (ppgtt) 861 ppgtt->pd_dirty_rings &= ~intel_engine_flag(engine); 862 863 for (i = 0; i < MAX_L3_SLICES; i++) { 864 if (!(to->remap_slice & (1<<i))) 865 continue; 866 867 ret = remap_l3(req, i); 868 if (ret) 869 return ret; 870 871 to->remap_slice &= ~(1<<i); 872 } 873 874 if (!to->engine[RCS].initialised) { 875 if (engine->init_context) { 876 ret = engine->init_context(req); 877 if (ret) 878 return ret; 879 } 880 to->engine[RCS].initialised = true; 881 } 882 883 return 0; 884 885 unpin_out: 886 i915_gem_object_ggtt_unpin(to->engine[RCS].state); 887 return ret; 888 } 889 890 /** 891 * i915_switch_context() - perform a GPU context switch. 892 * @req: request for which we'll execute the context switch 893 * 894 * The context life cycle is simple. The context refcount is incremented and 895 * decremented by 1 and create and destroy. If the context is in use by the GPU, 896 * it will have a refcount > 1. This allows us to destroy the context abstract 897 * object while letting the normal object tracking destroy the backing BO. 898 * 899 * This function should not be used in execlists mode. Instead the context is 900 * switched by writing to the ELSP and requests keep a reference to their 901 * context. 902 */ 903 int i915_switch_context(struct drm_i915_gem_request *req) 904 { 905 struct intel_engine_cs *engine = req->engine; 906 907 lockdep_assert_held(&req->i915->drm.struct_mutex); 908 if (i915.enable_execlists) 909 return 0; 910 911 if (!req->ctx->engine[engine->id].state) { 912 struct i915_gem_context *to = req->ctx; 913 struct i915_hw_ppgtt *ppgtt = 914 to->ppgtt ?: req->i915->mm.aliasing_ppgtt; 915 916 if (needs_pd_load_pre(ppgtt, engine, to)) { 917 int ret; 918 919 trace_switch_mm(engine, to); 920 ret = ppgtt->switch_mm(ppgtt, req); 921 if (ret) 922 return ret; 923 924 ppgtt->pd_dirty_rings &= ~intel_engine_flag(engine); 925 } 926 927 if (to != engine->last_context) { 928 if (engine->last_context) 929 i915_gem_context_put(engine->last_context); 930 engine->last_context = i915_gem_context_get(to); 931 } 932 933 return 0; 934 } 935 936 return do_rcs_switch(req); 937 } 938 939 int i915_gem_switch_to_kernel_context(struct drm_i915_private *dev_priv) 940 { 941 struct intel_engine_cs *engine; 942 943 for_each_engine(engine, dev_priv) { 944 struct drm_i915_gem_request *req; 945 int ret; 946 947 if (engine->last_context == NULL) 948 continue; 949 950 if (engine->last_context == dev_priv->kernel_context) 951 continue; 952 953 req = i915_gem_request_alloc(engine, dev_priv->kernel_context); 954 if (IS_ERR(req)) 955 return PTR_ERR(req); 956 957 ret = i915_switch_context(req); 958 i915_add_request_no_flush(req); 959 if (ret) 960 return ret; 961 } 962 963 return 0; 964 } 965 966 static bool contexts_enabled(struct drm_device *dev) 967 { 968 return i915.enable_execlists || to_i915(dev)->hw_context_size; 969 } 970 971 int i915_gem_context_create_ioctl(struct drm_device *dev, void *data, 972 struct drm_file *file) 973 { 974 struct drm_i915_gem_context_create *args = data; 975 struct drm_i915_file_private *file_priv = file->driver_priv; 976 struct i915_gem_context *ctx; 977 int ret; 978 979 if (!contexts_enabled(dev)) 980 return -ENODEV; 981 982 if (args->pad != 0) 983 return -EINVAL; 984 985 ret = i915_mutex_lock_interruptible(dev); 986 if (ret) 987 return ret; 988 989 ctx = i915_gem_create_context(dev, file_priv); 990 mutex_unlock(&dev->struct_mutex); 991 if (IS_ERR(ctx)) 992 return PTR_ERR(ctx); 993 994 args->ctx_id = ctx->user_handle; 995 DRM_DEBUG_DRIVER("HW context %d created\n", args->ctx_id); 996 997 return 0; 998 } 999 1000 int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data, 1001 struct drm_file *file) 1002 { 1003 struct drm_i915_gem_context_destroy *args = data; 1004 struct drm_i915_file_private *file_priv = file->driver_priv; 1005 struct i915_gem_context *ctx; 1006 int ret; 1007 1008 if (args->pad != 0) 1009 return -EINVAL; 1010 1011 if (args->ctx_id == DEFAULT_CONTEXT_HANDLE) 1012 return -ENOENT; 1013 1014 ret = i915_mutex_lock_interruptible(dev); 1015 if (ret) 1016 return ret; 1017 1018 ctx = i915_gem_context_lookup(file_priv, args->ctx_id); 1019 if (IS_ERR(ctx)) { 1020 mutex_unlock(&dev->struct_mutex); 1021 return PTR_ERR(ctx); 1022 } 1023 1024 idr_remove(&file_priv->context_idr, ctx->user_handle); 1025 context_close(ctx); 1026 mutex_unlock(&dev->struct_mutex); 1027 1028 DRM_DEBUG_DRIVER("HW context %d destroyed\n", args->ctx_id); 1029 return 0; 1030 } 1031 1032 int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data, 1033 struct drm_file *file) 1034 { 1035 struct drm_i915_file_private *file_priv = file->driver_priv; 1036 struct drm_i915_gem_context_param *args = data; 1037 struct i915_gem_context *ctx; 1038 int ret; 1039 1040 ret = i915_mutex_lock_interruptible(dev); 1041 if (ret) 1042 return ret; 1043 1044 ctx = i915_gem_context_lookup(file_priv, args->ctx_id); 1045 if (IS_ERR(ctx)) { 1046 mutex_unlock(&dev->struct_mutex); 1047 return PTR_ERR(ctx); 1048 } 1049 1050 args->size = 0; 1051 switch (args->param) { 1052 case I915_CONTEXT_PARAM_BAN_PERIOD: 1053 args->value = ctx->hang_stats.ban_period_seconds; 1054 break; 1055 case I915_CONTEXT_PARAM_NO_ZEROMAP: 1056 args->value = ctx->flags & CONTEXT_NO_ZEROMAP; 1057 break; 1058 case I915_CONTEXT_PARAM_GTT_SIZE: 1059 if (ctx->ppgtt) 1060 args->value = ctx->ppgtt->base.total; 1061 else if (to_i915(dev)->mm.aliasing_ppgtt) 1062 args->value = to_i915(dev)->mm.aliasing_ppgtt->base.total; 1063 else 1064 args->value = to_i915(dev)->ggtt.base.total; 1065 break; 1066 case I915_CONTEXT_PARAM_NO_ERROR_CAPTURE: 1067 args->value = !!(ctx->flags & CONTEXT_NO_ERROR_CAPTURE); 1068 break; 1069 default: 1070 ret = -EINVAL; 1071 break; 1072 } 1073 mutex_unlock(&dev->struct_mutex); 1074 1075 return ret; 1076 } 1077 1078 int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data, 1079 struct drm_file *file) 1080 { 1081 struct drm_i915_file_private *file_priv = file->driver_priv; 1082 struct drm_i915_gem_context_param *args = data; 1083 struct i915_gem_context *ctx; 1084 int ret; 1085 1086 ret = i915_mutex_lock_interruptible(dev); 1087 if (ret) 1088 return ret; 1089 1090 ctx = i915_gem_context_lookup(file_priv, args->ctx_id); 1091 if (IS_ERR(ctx)) { 1092 mutex_unlock(&dev->struct_mutex); 1093 return PTR_ERR(ctx); 1094 } 1095 1096 switch (args->param) { 1097 case I915_CONTEXT_PARAM_BAN_PERIOD: 1098 if (args->size) 1099 ret = -EINVAL; 1100 else if (args->value < ctx->hang_stats.ban_period_seconds && 1101 !capable(CAP_SYS_ADMIN)) 1102 ret = -EPERM; 1103 else 1104 ctx->hang_stats.ban_period_seconds = args->value; 1105 break; 1106 case I915_CONTEXT_PARAM_NO_ZEROMAP: 1107 if (args->size) { 1108 ret = -EINVAL; 1109 } else { 1110 ctx->flags &= ~CONTEXT_NO_ZEROMAP; 1111 ctx->flags |= args->value ? CONTEXT_NO_ZEROMAP : 0; 1112 } 1113 break; 1114 case I915_CONTEXT_PARAM_NO_ERROR_CAPTURE: 1115 if (args->size) { 1116 ret = -EINVAL; 1117 } else { 1118 if (args->value) 1119 ctx->flags |= CONTEXT_NO_ERROR_CAPTURE; 1120 else 1121 ctx->flags &= ~CONTEXT_NO_ERROR_CAPTURE; 1122 } 1123 break; 1124 default: 1125 ret = -EINVAL; 1126 break; 1127 } 1128 mutex_unlock(&dev->struct_mutex); 1129 1130 return ret; 1131 } 1132 1133 int i915_gem_context_reset_stats_ioctl(struct drm_device *dev, 1134 void *data, struct drm_file *file) 1135 { 1136 struct drm_i915_private *dev_priv = to_i915(dev); 1137 struct drm_i915_reset_stats *args = data; 1138 struct i915_ctx_hang_stats *hs; 1139 struct i915_gem_context *ctx; 1140 int ret; 1141 1142 if (args->flags || args->pad) 1143 return -EINVAL; 1144 1145 if (args->ctx_id == DEFAULT_CONTEXT_HANDLE && !capable(CAP_SYS_ADMIN)) 1146 return -EPERM; 1147 1148 ret = i915_mutex_lock_interruptible(dev); 1149 if (ret) 1150 return ret; 1151 1152 ctx = i915_gem_context_lookup(file->driver_priv, args->ctx_id); 1153 if (IS_ERR(ctx)) { 1154 mutex_unlock(&dev->struct_mutex); 1155 return PTR_ERR(ctx); 1156 } 1157 hs = &ctx->hang_stats; 1158 1159 if (capable(CAP_SYS_ADMIN)) 1160 args->reset_count = i915_reset_count(&dev_priv->gpu_error); 1161 else 1162 args->reset_count = 0; 1163 1164 args->batch_active = hs->batch_active; 1165 args->batch_pending = hs->batch_pending; 1166 1167 mutex_unlock(&dev->struct_mutex); 1168 1169 return 0; 1170 } 1171