1 /* 2 * Copyright © 2008-2010 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Eric Anholt <eric@anholt.net> 25 * Zou Nan hai <nanhai.zou@intel.com> 26 * Xiang Hai hao<haihao.xiang@intel.com> 27 * 28 */ 29 30 #include <drm/drmP.h> 31 #include "i915_drv.h" 32 #include <drm/i915_drm.h> 33 #include "i915_trace.h" 34 #include "intel_drv.h" 35 36 /* Early gen2 devices have a cacheline of just 32 bytes, using 64 is overkill, 37 * but keeps the logic simple. Indeed, the whole purpose of this macro is just 38 * to give some inclination as to some of the magic values used in the various 39 * workarounds! 40 */ 41 #define CACHELINE_BYTES 64 42 43 static inline int __ring_space(int head, int tail, int size) 44 { 45 int space = head - (tail + I915_RING_FREE_SPACE); 46 if (space < 0) 47 space += size; 48 return space; 49 } 50 51 static inline int ring_space(struct intel_engine_cs *ring) 52 { 53 struct intel_ringbuffer *ringbuf = ring->buffer; 54 return __ring_space(ringbuf->head & HEAD_ADDR, ringbuf->tail, ringbuf->size); 55 } 56 57 static bool intel_ring_stopped(struct intel_engine_cs *ring) 58 { 59 struct drm_i915_private *dev_priv = ring->dev->dev_private; 60 return dev_priv->gpu_error.stop_rings & intel_ring_flag(ring); 61 } 62 63 void __intel_ring_advance(struct intel_engine_cs *ring) 64 { 65 struct intel_ringbuffer *ringbuf = ring->buffer; 66 ringbuf->tail &= ringbuf->size - 1; 67 if (intel_ring_stopped(ring)) 68 return; 69 ring->write_tail(ring, ringbuf->tail); 70 } 71 72 static int 73 gen2_render_ring_flush(struct intel_engine_cs *ring, 74 u32 invalidate_domains, 75 u32 flush_domains) 76 { 77 u32 cmd; 78 int ret; 79 80 cmd = MI_FLUSH; 81 if (((invalidate_domains|flush_domains) & I915_GEM_DOMAIN_RENDER) == 0) 82 cmd |= MI_NO_WRITE_FLUSH; 83 84 if (invalidate_domains & I915_GEM_DOMAIN_SAMPLER) 85 cmd |= MI_READ_FLUSH; 86 87 ret = intel_ring_begin(ring, 2); 88 if (ret) 89 return ret; 90 91 intel_ring_emit(ring, cmd); 92 intel_ring_emit(ring, MI_NOOP); 93 intel_ring_advance(ring); 94 95 return 0; 96 } 97 98 static int 99 gen4_render_ring_flush(struct intel_engine_cs *ring, 100 u32 invalidate_domains, 101 u32 flush_domains) 102 { 103 struct drm_device *dev = ring->dev; 104 u32 cmd; 105 int ret; 106 107 /* 108 * read/write caches: 109 * 110 * I915_GEM_DOMAIN_RENDER is always invalidated, but is 111 * only flushed if MI_NO_WRITE_FLUSH is unset. On 965, it is 112 * also flushed at 2d versus 3d pipeline switches. 113 * 114 * read-only caches: 115 * 116 * I915_GEM_DOMAIN_SAMPLER is flushed on pre-965 if 117 * MI_READ_FLUSH is set, and is always flushed on 965. 118 * 119 * I915_GEM_DOMAIN_COMMAND may not exist? 120 * 121 * I915_GEM_DOMAIN_INSTRUCTION, which exists on 965, is 122 * invalidated when MI_EXE_FLUSH is set. 123 * 124 * I915_GEM_DOMAIN_VERTEX, which exists on 965, is 125 * invalidated with every MI_FLUSH. 126 * 127 * TLBs: 128 * 129 * On 965, TLBs associated with I915_GEM_DOMAIN_COMMAND 130 * and I915_GEM_DOMAIN_CPU in are invalidated at PTE write and 131 * I915_GEM_DOMAIN_RENDER and I915_GEM_DOMAIN_SAMPLER 132 * are flushed at any MI_FLUSH. 133 */ 134 135 cmd = MI_FLUSH | MI_NO_WRITE_FLUSH; 136 if ((invalidate_domains|flush_domains) & I915_GEM_DOMAIN_RENDER) 137 cmd &= ~MI_NO_WRITE_FLUSH; 138 if (invalidate_domains & I915_GEM_DOMAIN_INSTRUCTION) 139 cmd |= MI_EXE_FLUSH; 140 141 if (invalidate_domains & I915_GEM_DOMAIN_COMMAND && 142 (IS_G4X(dev) || IS_GEN5(dev))) 143 cmd |= MI_INVALIDATE_ISP; 144 145 ret = intel_ring_begin(ring, 2); 146 if (ret) 147 return ret; 148 149 intel_ring_emit(ring, cmd); 150 intel_ring_emit(ring, MI_NOOP); 151 intel_ring_advance(ring); 152 153 return 0; 154 } 155 156 /** 157 * Emits a PIPE_CONTROL with a non-zero post-sync operation, for 158 * implementing two workarounds on gen6. From section 1.4.7.1 159 * "PIPE_CONTROL" of the Sandy Bridge PRM volume 2 part 1: 160 * 161 * [DevSNB-C+{W/A}] Before any depth stall flush (including those 162 * produced by non-pipelined state commands), software needs to first 163 * send a PIPE_CONTROL with no bits set except Post-Sync Operation != 164 * 0. 165 * 166 * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush Enable 167 * =1, a PIPE_CONTROL with any non-zero post-sync-op is required. 168 * 169 * And the workaround for these two requires this workaround first: 170 * 171 * [Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent 172 * BEFORE the pipe-control with a post-sync op and no write-cache 173 * flushes. 174 * 175 * And this last workaround is tricky because of the requirements on 176 * that bit. From section 1.4.7.2.3 "Stall" of the Sandy Bridge PRM 177 * volume 2 part 1: 178 * 179 * "1 of the following must also be set: 180 * - Render Target Cache Flush Enable ([12] of DW1) 181 * - Depth Cache Flush Enable ([0] of DW1) 182 * - Stall at Pixel Scoreboard ([1] of DW1) 183 * - Depth Stall ([13] of DW1) 184 * - Post-Sync Operation ([13] of DW1) 185 * - Notify Enable ([8] of DW1)" 186 * 187 * The cache flushes require the workaround flush that triggered this 188 * one, so we can't use it. Depth stall would trigger the same. 189 * Post-sync nonzero is what triggered this second workaround, so we 190 * can't use that one either. Notify enable is IRQs, which aren't 191 * really our business. That leaves only stall at scoreboard. 192 */ 193 static int 194 intel_emit_post_sync_nonzero_flush(struct intel_engine_cs *ring) 195 { 196 u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES; 197 int ret; 198 199 200 ret = intel_ring_begin(ring, 6); 201 if (ret) 202 return ret; 203 204 intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(5)); 205 intel_ring_emit(ring, PIPE_CONTROL_CS_STALL | 206 PIPE_CONTROL_STALL_AT_SCOREBOARD); 207 intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); /* address */ 208 intel_ring_emit(ring, 0); /* low dword */ 209 intel_ring_emit(ring, 0); /* high dword */ 210 intel_ring_emit(ring, MI_NOOP); 211 intel_ring_advance(ring); 212 213 ret = intel_ring_begin(ring, 6); 214 if (ret) 215 return ret; 216 217 intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(5)); 218 intel_ring_emit(ring, PIPE_CONTROL_QW_WRITE); 219 intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); /* address */ 220 intel_ring_emit(ring, 0); 221 intel_ring_emit(ring, 0); 222 intel_ring_emit(ring, MI_NOOP); 223 intel_ring_advance(ring); 224 225 return 0; 226 } 227 228 static int 229 gen6_render_ring_flush(struct intel_engine_cs *ring, 230 u32 invalidate_domains, u32 flush_domains) 231 { 232 u32 flags = 0; 233 u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES; 234 int ret; 235 236 /* Force SNB workarounds for PIPE_CONTROL flushes */ 237 ret = intel_emit_post_sync_nonzero_flush(ring); 238 if (ret) 239 return ret; 240 241 /* Just flush everything. Experiments have shown that reducing the 242 * number of bits based on the write domains has little performance 243 * impact. 244 */ 245 if (flush_domains) { 246 flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; 247 flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; 248 /* 249 * Ensure that any following seqno writes only happen 250 * when the render cache is indeed flushed. 251 */ 252 flags |= PIPE_CONTROL_CS_STALL; 253 } 254 if (invalidate_domains) { 255 flags |= PIPE_CONTROL_TLB_INVALIDATE; 256 flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE; 257 flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE; 258 flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE; 259 flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE; 260 flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE; 261 /* 262 * TLB invalidate requires a post-sync write. 263 */ 264 flags |= PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_CS_STALL; 265 } 266 267 ret = intel_ring_begin(ring, 4); 268 if (ret) 269 return ret; 270 271 intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4)); 272 intel_ring_emit(ring, flags); 273 intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); 274 intel_ring_emit(ring, 0); 275 intel_ring_advance(ring); 276 277 return 0; 278 } 279 280 static int 281 gen7_render_ring_cs_stall_wa(struct intel_engine_cs *ring) 282 { 283 int ret; 284 285 ret = intel_ring_begin(ring, 4); 286 if (ret) 287 return ret; 288 289 intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4)); 290 intel_ring_emit(ring, PIPE_CONTROL_CS_STALL | 291 PIPE_CONTROL_STALL_AT_SCOREBOARD); 292 intel_ring_emit(ring, 0); 293 intel_ring_emit(ring, 0); 294 intel_ring_advance(ring); 295 296 return 0; 297 } 298 299 static int gen7_ring_fbc_flush(struct intel_engine_cs *ring, u32 value) 300 { 301 int ret; 302 303 if (!ring->fbc_dirty) 304 return 0; 305 306 ret = intel_ring_begin(ring, 6); 307 if (ret) 308 return ret; 309 /* WaFbcNukeOn3DBlt:ivb/hsw */ 310 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); 311 intel_ring_emit(ring, MSG_FBC_REND_STATE); 312 intel_ring_emit(ring, value); 313 intel_ring_emit(ring, MI_STORE_REGISTER_MEM(1) | MI_SRM_LRM_GLOBAL_GTT); 314 intel_ring_emit(ring, MSG_FBC_REND_STATE); 315 intel_ring_emit(ring, ring->scratch.gtt_offset + 256); 316 intel_ring_advance(ring); 317 318 ring->fbc_dirty = false; 319 return 0; 320 } 321 322 static int 323 gen7_render_ring_flush(struct intel_engine_cs *ring, 324 u32 invalidate_domains, u32 flush_domains) 325 { 326 u32 flags = 0; 327 u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES; 328 int ret; 329 330 /* 331 * Ensure that any following seqno writes only happen when the render 332 * cache is indeed flushed. 333 * 334 * Workaround: 4th PIPE_CONTROL command (except the ones with only 335 * read-cache invalidate bits set) must have the CS_STALL bit set. We 336 * don't try to be clever and just set it unconditionally. 337 */ 338 flags |= PIPE_CONTROL_CS_STALL; 339 340 /* Just flush everything. Experiments have shown that reducing the 341 * number of bits based on the write domains has little performance 342 * impact. 343 */ 344 if (flush_domains) { 345 flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; 346 flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; 347 } 348 if (invalidate_domains) { 349 flags |= PIPE_CONTROL_TLB_INVALIDATE; 350 flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE; 351 flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE; 352 flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE; 353 flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE; 354 flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE; 355 /* 356 * TLB invalidate requires a post-sync write. 357 */ 358 flags |= PIPE_CONTROL_QW_WRITE; 359 flags |= PIPE_CONTROL_GLOBAL_GTT_IVB; 360 361 /* Workaround: we must issue a pipe_control with CS-stall bit 362 * set before a pipe_control command that has the state cache 363 * invalidate bit set. */ 364 gen7_render_ring_cs_stall_wa(ring); 365 } 366 367 ret = intel_ring_begin(ring, 4); 368 if (ret) 369 return ret; 370 371 intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4)); 372 intel_ring_emit(ring, flags); 373 intel_ring_emit(ring, scratch_addr); 374 intel_ring_emit(ring, 0); 375 intel_ring_advance(ring); 376 377 if (!invalidate_domains && flush_domains) 378 return gen7_ring_fbc_flush(ring, FBC_REND_NUKE); 379 380 return 0; 381 } 382 383 static int 384 gen8_render_ring_flush(struct intel_engine_cs *ring, 385 u32 invalidate_domains, u32 flush_domains) 386 { 387 u32 flags = 0; 388 u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES; 389 int ret; 390 391 flags |= PIPE_CONTROL_CS_STALL; 392 393 if (flush_domains) { 394 flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; 395 flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; 396 } 397 if (invalidate_domains) { 398 flags |= PIPE_CONTROL_TLB_INVALIDATE; 399 flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE; 400 flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE; 401 flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE; 402 flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE; 403 flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE; 404 flags |= PIPE_CONTROL_QW_WRITE; 405 flags |= PIPE_CONTROL_GLOBAL_GTT_IVB; 406 } 407 408 ret = intel_ring_begin(ring, 6); 409 if (ret) 410 return ret; 411 412 intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(6)); 413 intel_ring_emit(ring, flags); 414 intel_ring_emit(ring, scratch_addr); 415 intel_ring_emit(ring, 0); 416 intel_ring_emit(ring, 0); 417 intel_ring_emit(ring, 0); 418 intel_ring_advance(ring); 419 420 return 0; 421 422 } 423 424 static void ring_write_tail(struct intel_engine_cs *ring, 425 u32 value) 426 { 427 struct drm_i915_private *dev_priv = ring->dev->dev_private; 428 I915_WRITE_TAIL(ring, value); 429 } 430 431 u64 intel_ring_get_active_head(struct intel_engine_cs *ring) 432 { 433 struct drm_i915_private *dev_priv = ring->dev->dev_private; 434 u64 acthd; 435 436 if (INTEL_INFO(ring->dev)->gen >= 8) 437 acthd = I915_READ64_2x32(RING_ACTHD(ring->mmio_base), 438 RING_ACTHD_UDW(ring->mmio_base)); 439 else if (INTEL_INFO(ring->dev)->gen >= 4) 440 acthd = I915_READ(RING_ACTHD(ring->mmio_base)); 441 else 442 acthd = I915_READ(ACTHD); 443 444 return acthd; 445 } 446 447 static void ring_setup_phys_status_page(struct intel_engine_cs *ring) 448 { 449 struct drm_i915_private *dev_priv = ring->dev->dev_private; 450 u32 addr; 451 452 addr = dev_priv->status_page_dmah->busaddr; 453 if (INTEL_INFO(ring->dev)->gen >= 4) 454 addr |= (dev_priv->status_page_dmah->busaddr >> 28) & 0xf0; 455 I915_WRITE(HWS_PGA, addr); 456 } 457 458 static bool stop_ring(struct intel_engine_cs *ring) 459 { 460 struct drm_i915_private *dev_priv = to_i915(ring->dev); 461 462 if (!IS_GEN2(ring->dev)) { 463 I915_WRITE_MODE(ring, _MASKED_BIT_ENABLE(STOP_RING)); 464 if (wait_for_atomic((I915_READ_MODE(ring) & MODE_IDLE) != 0, 1000)) { 465 DRM_ERROR("%s :timed out trying to stop ring\n", ring->name); 466 return false; 467 } 468 } 469 470 I915_WRITE_CTL(ring, 0); 471 I915_WRITE_HEAD(ring, 0); 472 ring->write_tail(ring, 0); 473 474 if (!IS_GEN2(ring->dev)) { 475 (void)I915_READ_CTL(ring); 476 I915_WRITE_MODE(ring, _MASKED_BIT_DISABLE(STOP_RING)); 477 } 478 479 return (I915_READ_HEAD(ring) & HEAD_ADDR) == 0; 480 } 481 482 static int init_ring_common(struct intel_engine_cs *ring) 483 { 484 struct drm_device *dev = ring->dev; 485 struct drm_i915_private *dev_priv = dev->dev_private; 486 struct intel_ringbuffer *ringbuf = ring->buffer; 487 struct drm_i915_gem_object *obj = ringbuf->obj; 488 int ret = 0; 489 490 gen6_gt_force_wake_get(dev_priv, FORCEWAKE_ALL); 491 492 if (!stop_ring(ring)) { 493 /* G45 ring initialization often fails to reset head to zero */ 494 DRM_DEBUG_KMS("%s head not reset to zero " 495 "ctl %08x head %08x tail %08x start %08x\n", 496 ring->name, 497 I915_READ_CTL(ring), 498 I915_READ_HEAD(ring), 499 I915_READ_TAIL(ring), 500 I915_READ_START(ring)); 501 502 if (!stop_ring(ring)) { 503 DRM_ERROR("failed to set %s head to zero " 504 "ctl %08x head %08x tail %08x start %08x\n", 505 ring->name, 506 I915_READ_CTL(ring), 507 I915_READ_HEAD(ring), 508 I915_READ_TAIL(ring), 509 I915_READ_START(ring)); 510 ret = -EIO; 511 goto out; 512 } 513 } 514 515 if (I915_NEED_GFX_HWS(dev)) 516 intel_ring_setup_status_page(ring); 517 else 518 ring_setup_phys_status_page(ring); 519 520 /* Enforce ordering by reading HEAD register back */ 521 I915_READ_HEAD(ring); 522 523 /* Initialize the ring. This must happen _after_ we've cleared the ring 524 * registers with the above sequence (the readback of the HEAD registers 525 * also enforces ordering), otherwise the hw might lose the new ring 526 * register values. */ 527 I915_WRITE_START(ring, i915_gem_obj_ggtt_offset(obj)); 528 I915_WRITE_CTL(ring, 529 ((ringbuf->size - PAGE_SIZE) & RING_NR_PAGES) 530 | RING_VALID); 531 532 /* If the head is still not zero, the ring is dead */ 533 if (wait_for((I915_READ_CTL(ring) & RING_VALID) != 0 && 534 I915_READ_START(ring) == i915_gem_obj_ggtt_offset(obj) && 535 (I915_READ_HEAD(ring) & HEAD_ADDR) == 0, 50)) { 536 DRM_ERROR("%s initialization failed " 537 "ctl %08x (valid? %d) head %08x tail %08x start %08x [expected %08lx]\n", 538 ring->name, 539 I915_READ_CTL(ring), I915_READ_CTL(ring) & RING_VALID, 540 I915_READ_HEAD(ring), I915_READ_TAIL(ring), 541 I915_READ_START(ring), (unsigned long)i915_gem_obj_ggtt_offset(obj)); 542 ret = -EIO; 543 goto out; 544 } 545 546 if (!drm_core_check_feature(ring->dev, DRIVER_MODESET)) 547 i915_kernel_lost_context(ring->dev); 548 else { 549 ringbuf->head = I915_READ_HEAD(ring); 550 ringbuf->tail = I915_READ_TAIL(ring) & TAIL_ADDR; 551 ringbuf->space = ring_space(ring); 552 ringbuf->last_retired_head = -1; 553 } 554 555 memset(&ring->hangcheck, 0, sizeof(ring->hangcheck)); 556 557 out: 558 gen6_gt_force_wake_put(dev_priv, FORCEWAKE_ALL); 559 560 return ret; 561 } 562 563 static int 564 init_pipe_control(struct intel_engine_cs *ring) 565 { 566 int ret; 567 568 if (ring->scratch.obj) 569 return 0; 570 571 ring->scratch.obj = i915_gem_alloc_object(ring->dev, 4096); 572 if (ring->scratch.obj == NULL) { 573 DRM_ERROR("Failed to allocate seqno page\n"); 574 ret = -ENOMEM; 575 goto err; 576 } 577 578 ret = i915_gem_object_set_cache_level(ring->scratch.obj, I915_CACHE_LLC); 579 if (ret) 580 goto err_unref; 581 582 ret = i915_gem_obj_ggtt_pin(ring->scratch.obj, 4096, 0); 583 if (ret) 584 goto err_unref; 585 586 ring->scratch.gtt_offset = i915_gem_obj_ggtt_offset(ring->scratch.obj); 587 ring->scratch.cpu_page = kmap(ring->scratch.obj->pages[0]); 588 if (ring->scratch.cpu_page == NULL) { 589 ret = -ENOMEM; 590 goto err_unpin; 591 } 592 593 DRM_DEBUG_DRIVER("%s pipe control offset: 0x%08x\n", 594 ring->name, ring->scratch.gtt_offset); 595 return 0; 596 597 err_unpin: 598 i915_gem_object_ggtt_unpin(ring->scratch.obj); 599 err_unref: 600 drm_gem_object_unreference(&ring->scratch.obj->base); 601 err: 602 return ret; 603 } 604 605 static int init_render_ring(struct intel_engine_cs *ring) 606 { 607 struct drm_device *dev = ring->dev; 608 struct drm_i915_private *dev_priv = dev->dev_private; 609 int ret = init_ring_common(ring); 610 611 /* WaTimedSingleVertexDispatch:cl,bw,ctg,elk,ilk,snb */ 612 if (INTEL_INFO(dev)->gen >= 4 && INTEL_INFO(dev)->gen < 7) 613 I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(VS_TIMER_DISPATCH)); 614 615 /* We need to disable the AsyncFlip performance optimisations in order 616 * to use MI_WAIT_FOR_EVENT within the CS. It should already be 617 * programmed to '1' on all products. 618 * 619 * WaDisableAsyncFlipPerfMode:snb,ivb,hsw,vlv,bdw,chv 620 */ 621 if (INTEL_INFO(dev)->gen >= 6) 622 I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(ASYNC_FLIP_PERF_DISABLE)); 623 624 /* Required for the hardware to program scanline values for waiting */ 625 /* WaEnableFlushTlbInvalidationMode:snb */ 626 if (INTEL_INFO(dev)->gen == 6) 627 I915_WRITE(GFX_MODE, 628 _MASKED_BIT_ENABLE(GFX_TLB_INVALIDATE_EXPLICIT)); 629 630 /* WaBCSVCSTlbInvalidationMode:ivb,vlv,hsw */ 631 if (IS_GEN7(dev)) 632 I915_WRITE(GFX_MODE_GEN7, 633 _MASKED_BIT_ENABLE(GFX_TLB_INVALIDATE_EXPLICIT) | 634 _MASKED_BIT_ENABLE(GFX_REPLAY_MODE)); 635 636 if (INTEL_INFO(dev)->gen >= 5) { 637 ret = init_pipe_control(ring); 638 if (ret) 639 return ret; 640 } 641 642 if (IS_GEN6(dev)) { 643 /* From the Sandybridge PRM, volume 1 part 3, page 24: 644 * "If this bit is set, STCunit will have LRA as replacement 645 * policy. [...] This bit must be reset. LRA replacement 646 * policy is not supported." 647 */ 648 I915_WRITE(CACHE_MODE_0, 649 _MASKED_BIT_DISABLE(CM0_STC_EVICT_DISABLE_LRA_SNB)); 650 } 651 652 if (INTEL_INFO(dev)->gen >= 6) 653 I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING)); 654 655 if (HAS_L3_DPF(dev)) 656 I915_WRITE_IMR(ring, ~GT_PARITY_ERROR(dev)); 657 658 return ret; 659 } 660 661 static void render_ring_cleanup(struct intel_engine_cs *ring) 662 { 663 struct drm_device *dev = ring->dev; 664 665 if (ring->scratch.obj == NULL) 666 return; 667 668 if (INTEL_INFO(dev)->gen >= 5) { 669 kunmap(ring->scratch.obj->pages[0]); 670 i915_gem_object_ggtt_unpin(ring->scratch.obj); 671 } 672 673 drm_gem_object_unreference(&ring->scratch.obj->base); 674 ring->scratch.obj = NULL; 675 } 676 677 static int gen6_signal(struct intel_engine_cs *signaller, 678 unsigned int num_dwords) 679 { 680 struct drm_device *dev = signaller->dev; 681 struct drm_i915_private *dev_priv = dev->dev_private; 682 struct intel_engine_cs *useless; 683 int i, ret; 684 685 /* NB: In order to be able to do semaphore MBOX updates for varying 686 * number of rings, it's easiest if we round up each individual update 687 * to a multiple of 2 (since ring updates must always be a multiple of 688 * 2) even though the actual update only requires 3 dwords. 689 */ 690 #define MBOX_UPDATE_DWORDS 4 691 if (i915_semaphore_is_enabled(dev)) 692 num_dwords += ((I915_NUM_RINGS-1) * MBOX_UPDATE_DWORDS); 693 else 694 return intel_ring_begin(signaller, num_dwords); 695 696 ret = intel_ring_begin(signaller, num_dwords); 697 if (ret) 698 return ret; 699 #undef MBOX_UPDATE_DWORDS 700 701 for_each_ring(useless, dev_priv, i) { 702 u32 mbox_reg = signaller->semaphore.mbox.signal[i]; 703 if (mbox_reg != GEN6_NOSYNC) { 704 intel_ring_emit(signaller, MI_LOAD_REGISTER_IMM(1)); 705 intel_ring_emit(signaller, mbox_reg); 706 intel_ring_emit(signaller, signaller->outstanding_lazy_seqno); 707 intel_ring_emit(signaller, MI_NOOP); 708 } else { 709 intel_ring_emit(signaller, MI_NOOP); 710 intel_ring_emit(signaller, MI_NOOP); 711 intel_ring_emit(signaller, MI_NOOP); 712 intel_ring_emit(signaller, MI_NOOP); 713 } 714 } 715 716 return 0; 717 } 718 719 /** 720 * gen6_add_request - Update the semaphore mailbox registers 721 * 722 * @ring - ring that is adding a request 723 * @seqno - return seqno stuck into the ring 724 * 725 * Update the mailbox registers in the *other* rings with the current seqno. 726 * This acts like a signal in the canonical semaphore. 727 */ 728 static int 729 gen6_add_request(struct intel_engine_cs *ring) 730 { 731 int ret; 732 733 ret = ring->semaphore.signal(ring, 4); 734 if (ret) 735 return ret; 736 737 intel_ring_emit(ring, MI_STORE_DWORD_INDEX); 738 intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT); 739 intel_ring_emit(ring, ring->outstanding_lazy_seqno); 740 intel_ring_emit(ring, MI_USER_INTERRUPT); 741 __intel_ring_advance(ring); 742 743 return 0; 744 } 745 746 static inline bool i915_gem_has_seqno_wrapped(struct drm_device *dev, 747 u32 seqno) 748 { 749 struct drm_i915_private *dev_priv = dev->dev_private; 750 return dev_priv->last_seqno < seqno; 751 } 752 753 /** 754 * intel_ring_sync - sync the waiter to the signaller on seqno 755 * 756 * @waiter - ring that is waiting 757 * @signaller - ring which has, or will signal 758 * @seqno - seqno which the waiter will block on 759 */ 760 static int 761 gen6_ring_sync(struct intel_engine_cs *waiter, 762 struct intel_engine_cs *signaller, 763 u32 seqno) 764 { 765 u32 dw1 = MI_SEMAPHORE_MBOX | 766 MI_SEMAPHORE_COMPARE | 767 MI_SEMAPHORE_REGISTER; 768 u32 wait_mbox = signaller->semaphore.mbox.wait[waiter->id]; 769 int ret; 770 771 /* Throughout all of the GEM code, seqno passed implies our current 772 * seqno is >= the last seqno executed. However for hardware the 773 * comparison is strictly greater than. 774 */ 775 seqno -= 1; 776 777 WARN_ON(wait_mbox == MI_SEMAPHORE_SYNC_INVALID); 778 779 ret = intel_ring_begin(waiter, 4); 780 if (ret) 781 return ret; 782 783 /* If seqno wrap happened, omit the wait with no-ops */ 784 if (likely(!i915_gem_has_seqno_wrapped(waiter->dev, seqno))) { 785 intel_ring_emit(waiter, dw1 | wait_mbox); 786 intel_ring_emit(waiter, seqno); 787 intel_ring_emit(waiter, 0); 788 intel_ring_emit(waiter, MI_NOOP); 789 } else { 790 intel_ring_emit(waiter, MI_NOOP); 791 intel_ring_emit(waiter, MI_NOOP); 792 intel_ring_emit(waiter, MI_NOOP); 793 intel_ring_emit(waiter, MI_NOOP); 794 } 795 intel_ring_advance(waiter); 796 797 return 0; 798 } 799 800 #define PIPE_CONTROL_FLUSH(ring__, addr__) \ 801 do { \ 802 intel_ring_emit(ring__, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE | \ 803 PIPE_CONTROL_DEPTH_STALL); \ 804 intel_ring_emit(ring__, (addr__) | PIPE_CONTROL_GLOBAL_GTT); \ 805 intel_ring_emit(ring__, 0); \ 806 intel_ring_emit(ring__, 0); \ 807 } while (0) 808 809 static int 810 pc_render_add_request(struct intel_engine_cs *ring) 811 { 812 u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES; 813 int ret; 814 815 /* For Ironlake, MI_USER_INTERRUPT was deprecated and apparently 816 * incoherent with writes to memory, i.e. completely fubar, 817 * so we need to use PIPE_NOTIFY instead. 818 * 819 * However, we also need to workaround the qword write 820 * incoherence by flushing the 6 PIPE_NOTIFY buffers out to 821 * memory before requesting an interrupt. 822 */ 823 ret = intel_ring_begin(ring, 32); 824 if (ret) 825 return ret; 826 827 intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE | 828 PIPE_CONTROL_WRITE_FLUSH | 829 PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE); 830 intel_ring_emit(ring, ring->scratch.gtt_offset | PIPE_CONTROL_GLOBAL_GTT); 831 intel_ring_emit(ring, ring->outstanding_lazy_seqno); 832 intel_ring_emit(ring, 0); 833 PIPE_CONTROL_FLUSH(ring, scratch_addr); 834 scratch_addr += 2 * CACHELINE_BYTES; /* write to separate cachelines */ 835 PIPE_CONTROL_FLUSH(ring, scratch_addr); 836 scratch_addr += 2 * CACHELINE_BYTES; 837 PIPE_CONTROL_FLUSH(ring, scratch_addr); 838 scratch_addr += 2 * CACHELINE_BYTES; 839 PIPE_CONTROL_FLUSH(ring, scratch_addr); 840 scratch_addr += 2 * CACHELINE_BYTES; 841 PIPE_CONTROL_FLUSH(ring, scratch_addr); 842 scratch_addr += 2 * CACHELINE_BYTES; 843 PIPE_CONTROL_FLUSH(ring, scratch_addr); 844 845 intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE | 846 PIPE_CONTROL_WRITE_FLUSH | 847 PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE | 848 PIPE_CONTROL_NOTIFY); 849 intel_ring_emit(ring, ring->scratch.gtt_offset | PIPE_CONTROL_GLOBAL_GTT); 850 intel_ring_emit(ring, ring->outstanding_lazy_seqno); 851 intel_ring_emit(ring, 0); 852 __intel_ring_advance(ring); 853 854 return 0; 855 } 856 857 static u32 858 gen6_ring_get_seqno(struct intel_engine_cs *ring, bool lazy_coherency) 859 { 860 /* Workaround to force correct ordering between irq and seqno writes on 861 * ivb (and maybe also on snb) by reading from a CS register (like 862 * ACTHD) before reading the status page. */ 863 if (!lazy_coherency) { 864 struct drm_i915_private *dev_priv = ring->dev->dev_private; 865 POSTING_READ(RING_ACTHD(ring->mmio_base)); 866 } 867 868 return intel_read_status_page(ring, I915_GEM_HWS_INDEX); 869 } 870 871 static u32 872 ring_get_seqno(struct intel_engine_cs *ring, bool lazy_coherency) 873 { 874 return intel_read_status_page(ring, I915_GEM_HWS_INDEX); 875 } 876 877 static void 878 ring_set_seqno(struct intel_engine_cs *ring, u32 seqno) 879 { 880 intel_write_status_page(ring, I915_GEM_HWS_INDEX, seqno); 881 } 882 883 static u32 884 pc_render_get_seqno(struct intel_engine_cs *ring, bool lazy_coherency) 885 { 886 return ring->scratch.cpu_page[0]; 887 } 888 889 static void 890 pc_render_set_seqno(struct intel_engine_cs *ring, u32 seqno) 891 { 892 ring->scratch.cpu_page[0] = seqno; 893 } 894 895 static bool 896 gen5_ring_get_irq(struct intel_engine_cs *ring) 897 { 898 struct drm_device *dev = ring->dev; 899 struct drm_i915_private *dev_priv = dev->dev_private; 900 901 if (!dev->irq_enabled) 902 return false; 903 904 lockmgr(&dev_priv->irq_lock, LK_EXCLUSIVE); 905 if (ring->irq_refcount++ == 0) 906 ilk_enable_gt_irq(dev_priv, ring->irq_enable_mask); 907 lockmgr(&dev_priv->irq_lock, LK_RELEASE); 908 909 return true; 910 } 911 912 static void 913 gen5_ring_put_irq(struct intel_engine_cs *ring) 914 { 915 struct drm_device *dev = ring->dev; 916 struct drm_i915_private *dev_priv = dev->dev_private; 917 918 lockmgr(&dev_priv->irq_lock, LK_EXCLUSIVE); 919 if (--ring->irq_refcount == 0) 920 ilk_disable_gt_irq(dev_priv, ring->irq_enable_mask); 921 lockmgr(&dev_priv->irq_lock, LK_RELEASE); 922 } 923 924 static bool 925 i9xx_ring_get_irq(struct intel_engine_cs *ring) 926 { 927 struct drm_device *dev = ring->dev; 928 struct drm_i915_private *dev_priv = dev->dev_private; 929 930 if (!dev->irq_enabled) 931 return false; 932 933 lockmgr(&dev_priv->irq_lock, LK_EXCLUSIVE); 934 if (ring->irq_refcount++ == 0) { 935 dev_priv->irq_mask &= ~ring->irq_enable_mask; 936 I915_WRITE(IMR, dev_priv->irq_mask); 937 POSTING_READ(IMR); 938 } 939 lockmgr(&dev_priv->irq_lock, LK_RELEASE); 940 941 return true; 942 } 943 944 static void 945 i9xx_ring_put_irq(struct intel_engine_cs *ring) 946 { 947 struct drm_device *dev = ring->dev; 948 struct drm_i915_private *dev_priv = dev->dev_private; 949 950 lockmgr(&dev_priv->irq_lock, LK_EXCLUSIVE); 951 if (--ring->irq_refcount == 0) { 952 dev_priv->irq_mask |= ring->irq_enable_mask; 953 I915_WRITE(IMR, dev_priv->irq_mask); 954 POSTING_READ(IMR); 955 } 956 lockmgr(&dev_priv->irq_lock, LK_RELEASE); 957 } 958 959 static bool 960 i8xx_ring_get_irq(struct intel_engine_cs *ring) 961 { 962 struct drm_device *dev = ring->dev; 963 struct drm_i915_private *dev_priv = dev->dev_private; 964 965 if (!dev->irq_enabled) 966 return false; 967 968 lockmgr(&dev_priv->irq_lock, LK_EXCLUSIVE); 969 if (ring->irq_refcount++ == 0) { 970 dev_priv->irq_mask &= ~ring->irq_enable_mask; 971 I915_WRITE16(IMR, dev_priv->irq_mask); 972 POSTING_READ16(IMR); 973 } 974 lockmgr(&dev_priv->irq_lock, LK_RELEASE); 975 976 return true; 977 } 978 979 static void 980 i8xx_ring_put_irq(struct intel_engine_cs *ring) 981 { 982 struct drm_device *dev = ring->dev; 983 struct drm_i915_private *dev_priv = dev->dev_private; 984 985 lockmgr(&dev_priv->irq_lock, LK_EXCLUSIVE); 986 if (--ring->irq_refcount == 0) { 987 dev_priv->irq_mask |= ring->irq_enable_mask; 988 I915_WRITE16(IMR, dev_priv->irq_mask); 989 POSTING_READ16(IMR); 990 } 991 lockmgr(&dev_priv->irq_lock, LK_RELEASE); 992 } 993 994 void intel_ring_setup_status_page(struct intel_engine_cs *ring) 995 { 996 struct drm_device *dev = ring->dev; 997 struct drm_i915_private *dev_priv = ring->dev->dev_private; 998 u32 mmio = 0; 999 1000 /* The ring status page addresses are no longer next to the rest of 1001 * the ring registers as of gen7. 1002 */ 1003 if (IS_GEN7(dev)) { 1004 switch (ring->id) { 1005 case RCS: 1006 mmio = RENDER_HWS_PGA_GEN7; 1007 break; 1008 case BCS: 1009 mmio = BLT_HWS_PGA_GEN7; 1010 break; 1011 /* 1012 * VCS2 actually doesn't exist on Gen7. Only shut up 1013 * gcc switch check warning 1014 */ 1015 case VCS2: 1016 case VCS: 1017 mmio = BSD_HWS_PGA_GEN7; 1018 break; 1019 case VECS: 1020 mmio = VEBOX_HWS_PGA_GEN7; 1021 break; 1022 } 1023 } else if (IS_GEN6(ring->dev)) { 1024 mmio = RING_HWS_PGA_GEN6(ring->mmio_base); 1025 } else { 1026 /* XXX: gen8 returns to sanity */ 1027 mmio = RING_HWS_PGA(ring->mmio_base); 1028 } 1029 1030 I915_WRITE(mmio, (u32)ring->status_page.gfx_addr); 1031 POSTING_READ(mmio); 1032 1033 /* 1034 * Flush the TLB for this page 1035 * 1036 * FIXME: These two bits have disappeared on gen8, so a question 1037 * arises: do we still need this and if so how should we go about 1038 * invalidating the TLB? 1039 */ 1040 if (INTEL_INFO(dev)->gen >= 6 && INTEL_INFO(dev)->gen < 8) { 1041 u32 reg = RING_INSTPM(ring->mmio_base); 1042 1043 /* ring should be idle before issuing a sync flush*/ 1044 WARN_ON((I915_READ_MODE(ring) & MODE_IDLE) == 0); 1045 1046 I915_WRITE(reg, 1047 _MASKED_BIT_ENABLE(INSTPM_TLB_INVALIDATE | 1048 INSTPM_SYNC_FLUSH)); 1049 if (wait_for((I915_READ(reg) & INSTPM_SYNC_FLUSH) == 0, 1050 1000)) 1051 DRM_ERROR("%s: wait for SyncFlush to complete for TLB invalidation timed out\n", 1052 ring->name); 1053 } 1054 } 1055 1056 static int 1057 bsd_ring_flush(struct intel_engine_cs *ring, 1058 u32 invalidate_domains, 1059 u32 flush_domains) 1060 { 1061 int ret; 1062 1063 ret = intel_ring_begin(ring, 2); 1064 if (ret) 1065 return ret; 1066 1067 intel_ring_emit(ring, MI_FLUSH); 1068 intel_ring_emit(ring, MI_NOOP); 1069 intel_ring_advance(ring); 1070 return 0; 1071 } 1072 1073 static int 1074 i9xx_add_request(struct intel_engine_cs *ring) 1075 { 1076 int ret; 1077 1078 ret = intel_ring_begin(ring, 4); 1079 if (ret) 1080 return ret; 1081 1082 intel_ring_emit(ring, MI_STORE_DWORD_INDEX); 1083 intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT); 1084 intel_ring_emit(ring, ring->outstanding_lazy_seqno); 1085 intel_ring_emit(ring, MI_USER_INTERRUPT); 1086 __intel_ring_advance(ring); 1087 1088 return 0; 1089 } 1090 1091 static bool 1092 gen6_ring_get_irq(struct intel_engine_cs *ring) 1093 { 1094 struct drm_device *dev = ring->dev; 1095 struct drm_i915_private *dev_priv = dev->dev_private; 1096 1097 if (!dev->irq_enabled) 1098 return false; 1099 1100 lockmgr(&dev_priv->irq_lock, LK_EXCLUSIVE); 1101 if (ring->irq_refcount++ == 0) { 1102 if (HAS_L3_DPF(dev) && ring->id == RCS) 1103 I915_WRITE_IMR(ring, 1104 ~(ring->irq_enable_mask | 1105 GT_PARITY_ERROR(dev))); 1106 else 1107 I915_WRITE_IMR(ring, ~ring->irq_enable_mask); 1108 ilk_enable_gt_irq(dev_priv, ring->irq_enable_mask); 1109 } 1110 lockmgr(&dev_priv->irq_lock, LK_RELEASE); 1111 1112 return true; 1113 } 1114 1115 static void 1116 gen6_ring_put_irq(struct intel_engine_cs *ring) 1117 { 1118 struct drm_device *dev = ring->dev; 1119 struct drm_i915_private *dev_priv = dev->dev_private; 1120 1121 lockmgr(&dev_priv->irq_lock, LK_EXCLUSIVE); 1122 if (--ring->irq_refcount == 0) { 1123 if (HAS_L3_DPF(dev) && ring->id == RCS) 1124 I915_WRITE_IMR(ring, ~GT_PARITY_ERROR(dev)); 1125 else 1126 I915_WRITE_IMR(ring, ~0); 1127 ilk_disable_gt_irq(dev_priv, ring->irq_enable_mask); 1128 } 1129 lockmgr(&dev_priv->irq_lock, LK_RELEASE); 1130 } 1131 1132 static bool 1133 hsw_vebox_get_irq(struct intel_engine_cs *ring) 1134 { 1135 struct drm_device *dev = ring->dev; 1136 struct drm_i915_private *dev_priv = dev->dev_private; 1137 1138 if (!dev->irq_enabled) 1139 return false; 1140 1141 lockmgr(&dev_priv->irq_lock, LK_EXCLUSIVE); 1142 if (ring->irq_refcount++ == 0) { 1143 I915_WRITE_IMR(ring, ~ring->irq_enable_mask); 1144 snb_enable_pm_irq(dev_priv, ring->irq_enable_mask); 1145 } 1146 lockmgr(&dev_priv->irq_lock, LK_RELEASE); 1147 1148 return true; 1149 } 1150 1151 static void 1152 hsw_vebox_put_irq(struct intel_engine_cs *ring) 1153 { 1154 struct drm_device *dev = ring->dev; 1155 struct drm_i915_private *dev_priv = dev->dev_private; 1156 1157 if (!dev->irq_enabled) 1158 return; 1159 1160 lockmgr(&dev_priv->irq_lock, LK_EXCLUSIVE); 1161 if (--ring->irq_refcount == 0) { 1162 I915_WRITE_IMR(ring, ~0); 1163 snb_disable_pm_irq(dev_priv, ring->irq_enable_mask); 1164 } 1165 lockmgr(&dev_priv->irq_lock, LK_RELEASE); 1166 } 1167 1168 static bool 1169 gen8_ring_get_irq(struct intel_engine_cs *ring) 1170 { 1171 struct drm_device *dev = ring->dev; 1172 struct drm_i915_private *dev_priv = dev->dev_private; 1173 1174 if (!dev->irq_enabled) 1175 return false; 1176 1177 lockmgr(&dev_priv->irq_lock, LK_EXCLUSIVE); 1178 if (ring->irq_refcount++ == 0) { 1179 if (HAS_L3_DPF(dev) && ring->id == RCS) { 1180 I915_WRITE_IMR(ring, 1181 ~(ring->irq_enable_mask | 1182 GT_RENDER_L3_PARITY_ERROR_INTERRUPT)); 1183 } else { 1184 I915_WRITE_IMR(ring, ~ring->irq_enable_mask); 1185 } 1186 POSTING_READ(RING_IMR(ring->mmio_base)); 1187 } 1188 lockmgr(&dev_priv->irq_lock, LK_RELEASE); 1189 1190 return true; 1191 } 1192 1193 static void 1194 gen8_ring_put_irq(struct intel_engine_cs *ring) 1195 { 1196 struct drm_device *dev = ring->dev; 1197 struct drm_i915_private *dev_priv = dev->dev_private; 1198 1199 lockmgr(&dev_priv->irq_lock, LK_EXCLUSIVE); 1200 if (--ring->irq_refcount == 0) { 1201 if (HAS_L3_DPF(dev) && ring->id == RCS) { 1202 I915_WRITE_IMR(ring, 1203 ~GT_RENDER_L3_PARITY_ERROR_INTERRUPT); 1204 } else { 1205 I915_WRITE_IMR(ring, ~0); 1206 } 1207 POSTING_READ(RING_IMR(ring->mmio_base)); 1208 } 1209 lockmgr(&dev_priv->irq_lock, LK_RELEASE); 1210 } 1211 1212 static int 1213 i965_dispatch_execbuffer(struct intel_engine_cs *ring, 1214 u64 offset, u32 length, 1215 unsigned flags) 1216 { 1217 int ret; 1218 1219 ret = intel_ring_begin(ring, 2); 1220 if (ret) 1221 return ret; 1222 1223 intel_ring_emit(ring, 1224 MI_BATCH_BUFFER_START | 1225 MI_BATCH_GTT | 1226 (flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE_I965)); 1227 intel_ring_emit(ring, offset); 1228 intel_ring_advance(ring); 1229 1230 return 0; 1231 } 1232 1233 /* Just userspace ABI convention to limit the wa batch bo to a resonable size */ 1234 #define I830_BATCH_LIMIT (256*1024) 1235 static int 1236 i830_dispatch_execbuffer(struct intel_engine_cs *ring, 1237 u64 offset, u32 len, 1238 unsigned flags) 1239 { 1240 int ret; 1241 1242 if (flags & I915_DISPATCH_PINNED) { 1243 ret = intel_ring_begin(ring, 4); 1244 if (ret) 1245 return ret; 1246 1247 intel_ring_emit(ring, MI_BATCH_BUFFER); 1248 intel_ring_emit(ring, offset | (flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE)); 1249 intel_ring_emit(ring, offset + len - 8); 1250 intel_ring_emit(ring, MI_NOOP); 1251 intel_ring_advance(ring); 1252 } else { 1253 u32 cs_offset = ring->scratch.gtt_offset; 1254 1255 if (len > I830_BATCH_LIMIT) 1256 return -ENOSPC; 1257 1258 ret = intel_ring_begin(ring, 9+3); 1259 if (ret) 1260 return ret; 1261 /* Blit the batch (which has now all relocs applied) to the stable batch 1262 * scratch bo area (so that the CS never stumbles over its tlb 1263 * invalidation bug) ... */ 1264 intel_ring_emit(ring, XY_SRC_COPY_BLT_CMD | 1265 XY_SRC_COPY_BLT_WRITE_ALPHA | 1266 XY_SRC_COPY_BLT_WRITE_RGB); 1267 intel_ring_emit(ring, BLT_DEPTH_32 | BLT_ROP_GXCOPY | 4096); 1268 intel_ring_emit(ring, 0); 1269 intel_ring_emit(ring, (DIV_ROUND_UP(len, 4096) << 16) | 1024); 1270 intel_ring_emit(ring, cs_offset); 1271 intel_ring_emit(ring, 0); 1272 intel_ring_emit(ring, 4096); 1273 intel_ring_emit(ring, offset); 1274 intel_ring_emit(ring, MI_FLUSH); 1275 1276 /* ... and execute it. */ 1277 intel_ring_emit(ring, MI_BATCH_BUFFER); 1278 intel_ring_emit(ring, cs_offset | (flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE)); 1279 intel_ring_emit(ring, cs_offset + len - 8); 1280 intel_ring_advance(ring); 1281 } 1282 1283 return 0; 1284 } 1285 1286 static int 1287 i915_dispatch_execbuffer(struct intel_engine_cs *ring, 1288 u64 offset, u32 len, 1289 unsigned flags) 1290 { 1291 int ret; 1292 1293 ret = intel_ring_begin(ring, 2); 1294 if (ret) 1295 return ret; 1296 1297 intel_ring_emit(ring, MI_BATCH_BUFFER_START | MI_BATCH_GTT); 1298 intel_ring_emit(ring, offset | (flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE)); 1299 intel_ring_advance(ring); 1300 1301 return 0; 1302 } 1303 1304 static void cleanup_status_page(struct intel_engine_cs *ring) 1305 { 1306 struct drm_i915_gem_object *obj; 1307 1308 obj = ring->status_page.obj; 1309 if (obj == NULL) 1310 return; 1311 1312 kunmap(obj->pages[0]); 1313 i915_gem_object_ggtt_unpin(obj); 1314 drm_gem_object_unreference(&obj->base); 1315 ring->status_page.obj = NULL; 1316 } 1317 1318 static int init_status_page(struct intel_engine_cs *ring) 1319 { 1320 struct drm_i915_gem_object *obj; 1321 1322 if ((obj = ring->status_page.obj) == NULL) { 1323 int ret; 1324 1325 obj = i915_gem_alloc_object(ring->dev, 4096); 1326 if (obj == NULL) { 1327 DRM_ERROR("Failed to allocate status page\n"); 1328 return -ENOMEM; 1329 } 1330 1331 ret = i915_gem_object_set_cache_level(obj, I915_CACHE_LLC); 1332 if (ret) 1333 goto err_unref; 1334 1335 ret = i915_gem_obj_ggtt_pin(obj, 4096, 0); 1336 if (ret) { 1337 err_unref: 1338 drm_gem_object_unreference(&obj->base); 1339 return ret; 1340 } 1341 1342 ring->status_page.obj = obj; 1343 } 1344 1345 ring->status_page.gfx_addr = i915_gem_obj_ggtt_offset(obj); 1346 ring->status_page.page_addr = kmap(obj->pages[0]); 1347 memset(ring->status_page.page_addr, 0, PAGE_SIZE); 1348 1349 DRM_DEBUG_DRIVER("%s hws offset: 0x%08x\n", 1350 ring->name, ring->status_page.gfx_addr); 1351 1352 return 0; 1353 } 1354 1355 static int init_phys_status_page(struct intel_engine_cs *ring) 1356 { 1357 struct drm_i915_private *dev_priv = ring->dev->dev_private; 1358 1359 if (!dev_priv->status_page_dmah) { 1360 dev_priv->status_page_dmah = 1361 drm_pci_alloc(ring->dev, PAGE_SIZE, PAGE_SIZE); 1362 if (!dev_priv->status_page_dmah) 1363 return -ENOMEM; 1364 } 1365 1366 ring->status_page.page_addr = dev_priv->status_page_dmah->vaddr; 1367 memset(ring->status_page.page_addr, 0, PAGE_SIZE); 1368 1369 return 0; 1370 } 1371 1372 static int allocate_ring_buffer(struct intel_engine_cs *ring) 1373 { 1374 struct drm_device *dev = ring->dev; 1375 struct intel_ringbuffer *ringbuf = ring->buffer; 1376 struct drm_i915_gem_object *obj; 1377 int ret; 1378 1379 if (intel_ring_initialized(ring)) 1380 return 0; 1381 1382 obj = NULL; 1383 if (!HAS_LLC(dev)) 1384 obj = i915_gem_object_create_stolen(dev, ringbuf->size); 1385 if (obj == NULL) 1386 obj = i915_gem_alloc_object(dev, ringbuf->size); 1387 if (obj == NULL) 1388 return -ENOMEM; 1389 1390 ret = i915_gem_obj_ggtt_pin(obj, PAGE_SIZE, PIN_MAPPABLE); 1391 if (ret) 1392 goto err_unref; 1393 1394 ret = i915_gem_object_set_to_gtt_domain(obj, true); 1395 if (ret) 1396 goto err_unpin; 1397 1398 ringbuf->virtual_start = 1399 ioremap_wc(dev->agp->base + i915_gem_obj_ggtt_offset(obj), 1400 ringbuf->size); 1401 if (ringbuf->virtual_start == NULL) { 1402 ret = -EINVAL; 1403 goto err_unpin; 1404 } 1405 1406 ringbuf->obj = obj; 1407 return 0; 1408 1409 err_unpin: 1410 i915_gem_object_ggtt_unpin(obj); 1411 err_unref: 1412 drm_gem_object_unreference(&obj->base); 1413 return ret; 1414 } 1415 1416 static int intel_init_ring_buffer(struct drm_device *dev, 1417 struct intel_engine_cs *ring) 1418 { 1419 struct intel_ringbuffer *ringbuf = ring->buffer; 1420 int ret; 1421 1422 if (ringbuf == NULL) { 1423 ringbuf = kzalloc(sizeof(*ringbuf), GFP_KERNEL); 1424 if (!ringbuf) 1425 return -ENOMEM; 1426 ring->buffer = ringbuf; 1427 } 1428 1429 ring->dev = dev; 1430 INIT_LIST_HEAD(&ring->active_list); 1431 INIT_LIST_HEAD(&ring->request_list); 1432 ringbuf->size = 32 * PAGE_SIZE; 1433 memset(ring->semaphore.sync_seqno, 0, sizeof(ring->semaphore.sync_seqno)); 1434 1435 init_waitqueue_head(&ring->irq_queue); 1436 1437 if (I915_NEED_GFX_HWS(dev)) { 1438 ret = init_status_page(ring); 1439 if (ret) 1440 goto error; 1441 } else { 1442 BUG_ON(ring->id != RCS); 1443 ret = init_phys_status_page(ring); 1444 if (ret) 1445 goto error; 1446 } 1447 1448 ret = allocate_ring_buffer(ring); 1449 if (ret) { 1450 DRM_ERROR("Failed to allocate ringbuffer %s: %d\n", ring->name, ret); 1451 goto error; 1452 } 1453 1454 /* Workaround an erratum on the i830 which causes a hang if 1455 * the TAIL pointer points to within the last 2 cachelines 1456 * of the buffer. 1457 */ 1458 ringbuf->effective_size = ringbuf->size; 1459 if (IS_I830(dev) || IS_845G(dev)) 1460 ringbuf->effective_size -= 2 * CACHELINE_BYTES; 1461 1462 ret = i915_cmd_parser_init_ring(ring); 1463 if (ret) 1464 goto error; 1465 1466 ret = ring->init(ring); 1467 if (ret) 1468 goto error; 1469 1470 return 0; 1471 1472 error: 1473 kfree(ringbuf); 1474 ring->buffer = NULL; 1475 return ret; 1476 } 1477 1478 void intel_cleanup_ring_buffer(struct intel_engine_cs *ring) 1479 { 1480 struct drm_i915_private *dev_priv = to_i915(ring->dev); 1481 struct intel_ringbuffer *ringbuf = ring->buffer; 1482 1483 if (!intel_ring_initialized(ring)) 1484 return; 1485 1486 intel_stop_ring_buffer(ring); 1487 WARN_ON(!IS_GEN2(ring->dev) && (I915_READ_MODE(ring) & MODE_IDLE) == 0); 1488 1489 pmap_unmapdev((vm_offset_t)ringbuf->virtual_start, ringbuf->size); 1490 1491 i915_gem_object_ggtt_unpin(ringbuf->obj); 1492 drm_gem_object_unreference(&ringbuf->obj->base); 1493 ringbuf->obj = NULL; 1494 ring->preallocated_lazy_request = NULL; 1495 ring->outstanding_lazy_seqno = 0; 1496 1497 if (ring->cleanup) 1498 ring->cleanup(ring); 1499 1500 cleanup_status_page(ring); 1501 1502 i915_cmd_parser_fini_ring(ring); 1503 1504 kfree(ringbuf); 1505 ring->buffer = NULL; 1506 } 1507 1508 static int intel_ring_wait_request(struct intel_engine_cs *ring, int n) 1509 { 1510 struct intel_ringbuffer *ringbuf = ring->buffer; 1511 struct drm_i915_gem_request *request; 1512 u32 seqno = 0; 1513 int ret; 1514 1515 if (ringbuf->last_retired_head != -1) { 1516 ringbuf->head = ringbuf->last_retired_head; 1517 ringbuf->last_retired_head = -1; 1518 1519 ringbuf->space = ring_space(ring); 1520 if (ringbuf->space >= n) 1521 return 0; 1522 } 1523 1524 list_for_each_entry(request, &ring->request_list, list) { 1525 if (__ring_space(request->tail, ringbuf->tail, ringbuf->size) >= n) { 1526 seqno = request->seqno; 1527 break; 1528 } 1529 } 1530 1531 if (seqno == 0) 1532 return -ENOSPC; 1533 1534 ret = i915_wait_seqno(ring, seqno); 1535 if (ret) 1536 return ret; 1537 1538 i915_gem_retire_requests_ring(ring); 1539 ringbuf->head = ringbuf->last_retired_head; 1540 ringbuf->last_retired_head = -1; 1541 1542 ringbuf->space = ring_space(ring); 1543 return 0; 1544 } 1545 1546 static int ring_wait_for_space(struct intel_engine_cs *ring, int n) 1547 { 1548 struct drm_device *dev = ring->dev; 1549 struct drm_i915_private *dev_priv = dev->dev_private; 1550 struct intel_ringbuffer *ringbuf = ring->buffer; 1551 unsigned long end; 1552 int ret; 1553 1554 ret = intel_ring_wait_request(ring, n); 1555 if (ret != -ENOSPC) 1556 return ret; 1557 1558 /* force the tail write in case we have been skipping them */ 1559 __intel_ring_advance(ring); 1560 1561 /* With GEM the hangcheck timer should kick us out of the loop, 1562 * leaving it early runs the risk of corrupting GEM state (due 1563 * to running on almost untested codepaths). But on resume 1564 * timers don't work yet, so prevent a complete hang in that 1565 * case by choosing an insanely large timeout. */ 1566 end = jiffies + 60 * HZ; 1567 1568 trace_i915_ring_wait_begin(ring); 1569 do { 1570 ringbuf->head = I915_READ_HEAD(ring); 1571 ringbuf->space = ring_space(ring); 1572 if (ringbuf->space >= n) { 1573 ret = 0; 1574 break; 1575 } 1576 1577 #if 0 1578 if (!drm_core_check_feature(dev, DRIVER_MODESET) && 1579 dev->primary->master) { 1580 struct drm_i915_master_private *master_priv = dev->primary->master->driver_priv; 1581 if (master_priv->sarea_priv) 1582 master_priv->sarea_priv->perf_boxes |= I915_BOX_WAIT; 1583 } 1584 #else 1585 if (dev_priv->sarea_priv) 1586 dev_priv->sarea_priv->perf_boxes |= I915_BOX_WAIT; 1587 #endif 1588 1589 1590 msleep(1); 1591 1592 #if 0 1593 if (dev_priv->mm.interruptible && signal_pending(current)) { 1594 ret = -ERESTARTSYS; 1595 break; 1596 } 1597 #endif 1598 1599 ret = i915_gem_check_wedge(&dev_priv->gpu_error, 1600 dev_priv->mm.interruptible); 1601 if (ret) 1602 break; 1603 1604 if (time_after(jiffies, end)) { 1605 ret = -EBUSY; 1606 break; 1607 } 1608 } while (1); 1609 trace_i915_ring_wait_end(ring); 1610 return ret; 1611 } 1612 1613 static int intel_wrap_ring_buffer(struct intel_engine_cs *ring) 1614 { 1615 uint32_t __iomem *virt; 1616 struct intel_ringbuffer *ringbuf = ring->buffer; 1617 int rem = ringbuf->size - ringbuf->tail; 1618 1619 if (ringbuf->space < rem) { 1620 int ret = ring_wait_for_space(ring, rem); 1621 if (ret) 1622 return ret; 1623 } 1624 1625 virt = (unsigned int *)((char *)ringbuf->virtual_start + ringbuf->tail); 1626 rem /= 4; 1627 while (rem--) 1628 iowrite32(MI_NOOP, virt++); 1629 1630 ringbuf->tail = 0; 1631 ringbuf->space = ring_space(ring); 1632 1633 return 0; 1634 } 1635 1636 int intel_ring_idle(struct intel_engine_cs *ring) 1637 { 1638 u32 seqno; 1639 int ret; 1640 1641 /* We need to add any requests required to flush the objects and ring */ 1642 if (ring->outstanding_lazy_seqno) { 1643 ret = i915_add_request(ring, NULL); 1644 if (ret) 1645 return ret; 1646 } 1647 1648 /* Wait upon the last request to be completed */ 1649 if (list_empty(&ring->request_list)) 1650 return 0; 1651 1652 seqno = list_entry(ring->request_list.prev, 1653 struct drm_i915_gem_request, 1654 list)->seqno; 1655 1656 return i915_wait_seqno(ring, seqno); 1657 } 1658 1659 static int 1660 intel_ring_alloc_seqno(struct intel_engine_cs *ring) 1661 { 1662 if (ring->outstanding_lazy_seqno) 1663 return 0; 1664 1665 if (ring->preallocated_lazy_request == NULL) { 1666 struct drm_i915_gem_request *request; 1667 1668 request = kmalloc(sizeof(*request), M_DRM, M_WAITOK); 1669 if (request == NULL) 1670 return -ENOMEM; 1671 1672 ring->preallocated_lazy_request = request; 1673 } 1674 1675 return i915_gem_get_seqno(ring->dev, &ring->outstanding_lazy_seqno); 1676 } 1677 1678 static int __intel_ring_prepare(struct intel_engine_cs *ring, 1679 int bytes) 1680 { 1681 struct intel_ringbuffer *ringbuf = ring->buffer; 1682 int ret; 1683 1684 if (unlikely(ringbuf->tail + bytes > ringbuf->effective_size)) { 1685 ret = intel_wrap_ring_buffer(ring); 1686 if (unlikely(ret)) 1687 return ret; 1688 } 1689 1690 if (unlikely(ringbuf->space < bytes)) { 1691 ret = ring_wait_for_space(ring, bytes); 1692 if (unlikely(ret)) 1693 return ret; 1694 } 1695 1696 return 0; 1697 } 1698 1699 int intel_ring_begin(struct intel_engine_cs *ring, 1700 int num_dwords) 1701 { 1702 struct drm_i915_private *dev_priv = ring->dev->dev_private; 1703 int ret; 1704 1705 ret = i915_gem_check_wedge(&dev_priv->gpu_error, 1706 dev_priv->mm.interruptible); 1707 if (ret) 1708 return ret; 1709 1710 ret = __intel_ring_prepare(ring, num_dwords * sizeof(uint32_t)); 1711 if (ret) 1712 return ret; 1713 1714 /* Preallocate the olr before touching the ring */ 1715 ret = intel_ring_alloc_seqno(ring); 1716 if (ret) 1717 return ret; 1718 1719 ring->buffer->space -= num_dwords * sizeof(uint32_t); 1720 return 0; 1721 } 1722 1723 /* Align the ring tail to a cacheline boundary */ 1724 int intel_ring_cacheline_align(struct intel_engine_cs *ring) 1725 { 1726 int num_dwords = (ring->buffer->tail & (CACHELINE_BYTES - 1)) / sizeof(uint32_t); 1727 int ret; 1728 1729 if (num_dwords == 0) 1730 return 0; 1731 1732 num_dwords = CACHELINE_BYTES / sizeof(uint32_t) - num_dwords; 1733 ret = intel_ring_begin(ring, num_dwords); 1734 if (ret) 1735 return ret; 1736 1737 while (num_dwords--) 1738 intel_ring_emit(ring, MI_NOOP); 1739 1740 intel_ring_advance(ring); 1741 1742 return 0; 1743 } 1744 1745 void intel_ring_init_seqno(struct intel_engine_cs *ring, u32 seqno) 1746 { 1747 struct drm_i915_private *dev_priv = ring->dev->dev_private; 1748 1749 BUG_ON(ring->outstanding_lazy_seqno); 1750 1751 if (INTEL_INFO(ring->dev)->gen >= 6) { 1752 I915_WRITE(RING_SYNC_0(ring->mmio_base), 0); 1753 I915_WRITE(RING_SYNC_1(ring->mmio_base), 0); 1754 if (HAS_VEBOX(ring->dev)) 1755 I915_WRITE(RING_SYNC_2(ring->mmio_base), 0); 1756 } 1757 1758 ring->set_seqno(ring, seqno); 1759 ring->hangcheck.seqno = seqno; 1760 } 1761 1762 static void gen6_bsd_ring_write_tail(struct intel_engine_cs *ring, 1763 u32 value) 1764 { 1765 struct drm_i915_private *dev_priv = ring->dev->dev_private; 1766 1767 /* Every tail move must follow the sequence below */ 1768 1769 /* Disable notification that the ring is IDLE. The GT 1770 * will then assume that it is busy and bring it out of rc6. 1771 */ 1772 I915_WRITE(GEN6_BSD_SLEEP_PSMI_CONTROL, 1773 _MASKED_BIT_ENABLE(GEN6_BSD_SLEEP_MSG_DISABLE)); 1774 1775 /* Clear the context id. Here be magic! */ 1776 I915_WRITE64(GEN6_BSD_RNCID, 0x0); 1777 1778 /* Wait for the ring not to be idle, i.e. for it to wake up. */ 1779 if (wait_for((I915_READ(GEN6_BSD_SLEEP_PSMI_CONTROL) & 1780 GEN6_BSD_SLEEP_INDICATOR) == 0, 1781 50)) 1782 DRM_ERROR("timed out waiting for the BSD ring to wake up\n"); 1783 1784 /* Now that the ring is fully powered up, update the tail */ 1785 I915_WRITE_TAIL(ring, value); 1786 POSTING_READ(RING_TAIL(ring->mmio_base)); 1787 1788 /* Let the ring send IDLE messages to the GT again, 1789 * and so let it sleep to conserve power when idle. 1790 */ 1791 I915_WRITE(GEN6_BSD_SLEEP_PSMI_CONTROL, 1792 _MASKED_BIT_DISABLE(GEN6_BSD_SLEEP_MSG_DISABLE)); 1793 } 1794 1795 static int gen6_bsd_ring_flush(struct intel_engine_cs *ring, 1796 u32 invalidate, u32 flush) 1797 { 1798 uint32_t cmd; 1799 int ret; 1800 1801 ret = intel_ring_begin(ring, 4); 1802 if (ret) 1803 return ret; 1804 1805 cmd = MI_FLUSH_DW; 1806 if (INTEL_INFO(ring->dev)->gen >= 8) 1807 cmd += 1; 1808 /* 1809 * Bspec vol 1c.5 - video engine command streamer: 1810 * "If ENABLED, all TLBs will be invalidated once the flush 1811 * operation is complete. This bit is only valid when the 1812 * Post-Sync Operation field is a value of 1h or 3h." 1813 */ 1814 if (invalidate & I915_GEM_GPU_DOMAINS) 1815 cmd |= MI_INVALIDATE_TLB | MI_INVALIDATE_BSD | 1816 MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW; 1817 intel_ring_emit(ring, cmd); 1818 intel_ring_emit(ring, I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT); 1819 if (INTEL_INFO(ring->dev)->gen >= 8) { 1820 intel_ring_emit(ring, 0); /* upper addr */ 1821 intel_ring_emit(ring, 0); /* value */ 1822 } else { 1823 intel_ring_emit(ring, 0); 1824 intel_ring_emit(ring, MI_NOOP); 1825 } 1826 intel_ring_advance(ring); 1827 return 0; 1828 } 1829 1830 static int 1831 gen8_ring_dispatch_execbuffer(struct intel_engine_cs *ring, 1832 u64 offset, u32 len, 1833 unsigned flags) 1834 { 1835 struct drm_i915_private *dev_priv = ring->dev->dev_private; 1836 bool ppgtt = dev_priv->mm.aliasing_ppgtt != NULL && 1837 !(flags & I915_DISPATCH_SECURE); 1838 int ret; 1839 1840 ret = intel_ring_begin(ring, 4); 1841 if (ret) 1842 return ret; 1843 1844 /* FIXME(BDW): Address space and security selectors. */ 1845 intel_ring_emit(ring, MI_BATCH_BUFFER_START_GEN8 | (ppgtt<<8)); 1846 intel_ring_emit(ring, lower_32_bits(offset)); 1847 intel_ring_emit(ring, upper_32_bits(offset)); 1848 intel_ring_emit(ring, MI_NOOP); 1849 intel_ring_advance(ring); 1850 1851 return 0; 1852 } 1853 1854 static int 1855 hsw_ring_dispatch_execbuffer(struct intel_engine_cs *ring, 1856 u64 offset, u32 len, 1857 unsigned flags) 1858 { 1859 int ret; 1860 1861 ret = intel_ring_begin(ring, 2); 1862 if (ret) 1863 return ret; 1864 1865 intel_ring_emit(ring, 1866 MI_BATCH_BUFFER_START | MI_BATCH_PPGTT_HSW | 1867 (flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE_HSW)); 1868 /* bit0-7 is the length on GEN6+ */ 1869 intel_ring_emit(ring, offset); 1870 intel_ring_advance(ring); 1871 1872 return 0; 1873 } 1874 1875 static int 1876 gen6_ring_dispatch_execbuffer(struct intel_engine_cs *ring, 1877 u64 offset, u32 len, 1878 unsigned flags) 1879 { 1880 int ret; 1881 1882 ret = intel_ring_begin(ring, 2); 1883 if (ret) 1884 return ret; 1885 1886 intel_ring_emit(ring, 1887 MI_BATCH_BUFFER_START | 1888 (flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE_I965)); 1889 /* bit0-7 is the length on GEN6+ */ 1890 intel_ring_emit(ring, offset); 1891 intel_ring_advance(ring); 1892 1893 return 0; 1894 } 1895 1896 /* Blitter support (SandyBridge+) */ 1897 1898 static int gen6_ring_flush(struct intel_engine_cs *ring, 1899 u32 invalidate, u32 flush) 1900 { 1901 struct drm_device *dev = ring->dev; 1902 uint32_t cmd; 1903 int ret; 1904 1905 ret = intel_ring_begin(ring, 4); 1906 if (ret) 1907 return ret; 1908 1909 cmd = MI_FLUSH_DW; 1910 if (INTEL_INFO(ring->dev)->gen >= 8) 1911 cmd += 1; 1912 /* 1913 * Bspec vol 1c.3 - blitter engine command streamer: 1914 * "If ENABLED, all TLBs will be invalidated once the flush 1915 * operation is complete. This bit is only valid when the 1916 * Post-Sync Operation field is a value of 1h or 3h." 1917 */ 1918 if (invalidate & I915_GEM_DOMAIN_RENDER) 1919 cmd |= MI_INVALIDATE_TLB | MI_FLUSH_DW_STORE_INDEX | 1920 MI_FLUSH_DW_OP_STOREDW; 1921 intel_ring_emit(ring, cmd); 1922 intel_ring_emit(ring, I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT); 1923 if (INTEL_INFO(ring->dev)->gen >= 8) { 1924 intel_ring_emit(ring, 0); /* upper addr */ 1925 intel_ring_emit(ring, 0); /* value */ 1926 } else { 1927 intel_ring_emit(ring, 0); 1928 intel_ring_emit(ring, MI_NOOP); 1929 } 1930 intel_ring_advance(ring); 1931 1932 if (IS_GEN7(dev) && !invalidate && flush) 1933 return gen7_ring_fbc_flush(ring, FBC_REND_CACHE_CLEAN); 1934 1935 return 0; 1936 } 1937 1938 int intel_init_render_ring_buffer(struct drm_device *dev) 1939 { 1940 struct drm_i915_private *dev_priv = dev->dev_private; 1941 struct intel_engine_cs *ring = &dev_priv->ring[RCS]; 1942 1943 ring->name = "render ring"; 1944 ring->id = RCS; 1945 ring->mmio_base = RENDER_RING_BASE; 1946 1947 if (INTEL_INFO(dev)->gen >= 6) { 1948 ring->add_request = gen6_add_request; 1949 ring->flush = gen7_render_ring_flush; 1950 if (INTEL_INFO(dev)->gen == 6) 1951 ring->flush = gen6_render_ring_flush; 1952 if (INTEL_INFO(dev)->gen >= 8) { 1953 ring->flush = gen8_render_ring_flush; 1954 ring->irq_get = gen8_ring_get_irq; 1955 ring->irq_put = gen8_ring_put_irq; 1956 } else { 1957 ring->irq_get = gen6_ring_get_irq; 1958 ring->irq_put = gen6_ring_put_irq; 1959 } 1960 ring->irq_enable_mask = GT_RENDER_USER_INTERRUPT; 1961 ring->get_seqno = gen6_ring_get_seqno; 1962 ring->set_seqno = ring_set_seqno; 1963 ring->semaphore.sync_to = gen6_ring_sync; 1964 ring->semaphore.signal = gen6_signal; 1965 /* 1966 * The current semaphore is only applied on pre-gen8 platform. 1967 * And there is no VCS2 ring on the pre-gen8 platform. So the 1968 * semaphore between RCS and VCS2 is initialized as INVALID. 1969 * Gen8 will initialize the sema between VCS2 and RCS later. 1970 */ 1971 ring->semaphore.mbox.wait[RCS] = MI_SEMAPHORE_SYNC_INVALID; 1972 ring->semaphore.mbox.wait[VCS] = MI_SEMAPHORE_SYNC_RV; 1973 ring->semaphore.mbox.wait[BCS] = MI_SEMAPHORE_SYNC_RB; 1974 ring->semaphore.mbox.wait[VECS] = MI_SEMAPHORE_SYNC_RVE; 1975 ring->semaphore.mbox.wait[VCS2] = MI_SEMAPHORE_SYNC_INVALID; 1976 ring->semaphore.mbox.signal[RCS] = GEN6_NOSYNC; 1977 ring->semaphore.mbox.signal[VCS] = GEN6_VRSYNC; 1978 ring->semaphore.mbox.signal[BCS] = GEN6_BRSYNC; 1979 ring->semaphore.mbox.signal[VECS] = GEN6_VERSYNC; 1980 ring->semaphore.mbox.signal[VCS2] = GEN6_NOSYNC; 1981 } else if (IS_GEN5(dev)) { 1982 ring->add_request = pc_render_add_request; 1983 ring->flush = gen4_render_ring_flush; 1984 ring->get_seqno = pc_render_get_seqno; 1985 ring->set_seqno = pc_render_set_seqno; 1986 ring->irq_get = gen5_ring_get_irq; 1987 ring->irq_put = gen5_ring_put_irq; 1988 ring->irq_enable_mask = GT_RENDER_USER_INTERRUPT | 1989 GT_RENDER_PIPECTL_NOTIFY_INTERRUPT; 1990 } else { 1991 ring->add_request = i9xx_add_request; 1992 if (INTEL_INFO(dev)->gen < 4) 1993 ring->flush = gen2_render_ring_flush; 1994 else 1995 ring->flush = gen4_render_ring_flush; 1996 ring->get_seqno = ring_get_seqno; 1997 ring->set_seqno = ring_set_seqno; 1998 if (IS_GEN2(dev)) { 1999 ring->irq_get = i8xx_ring_get_irq; 2000 ring->irq_put = i8xx_ring_put_irq; 2001 } else { 2002 ring->irq_get = i9xx_ring_get_irq; 2003 ring->irq_put = i9xx_ring_put_irq; 2004 } 2005 ring->irq_enable_mask = I915_USER_INTERRUPT; 2006 } 2007 ring->write_tail = ring_write_tail; 2008 if (IS_HASWELL(dev)) 2009 ring->dispatch_execbuffer = hsw_ring_dispatch_execbuffer; 2010 else if (IS_GEN8(dev)) 2011 ring->dispatch_execbuffer = gen8_ring_dispatch_execbuffer; 2012 else if (INTEL_INFO(dev)->gen >= 6) 2013 ring->dispatch_execbuffer = gen6_ring_dispatch_execbuffer; 2014 else if (INTEL_INFO(dev)->gen >= 4) 2015 ring->dispatch_execbuffer = i965_dispatch_execbuffer; 2016 else if (IS_I830(dev) || IS_845G(dev)) 2017 ring->dispatch_execbuffer = i830_dispatch_execbuffer; 2018 else 2019 ring->dispatch_execbuffer = i915_dispatch_execbuffer; 2020 ring->init = init_render_ring; 2021 ring->cleanup = render_ring_cleanup; 2022 2023 /* Workaround batchbuffer to combat CS tlb bug. */ 2024 if (HAS_BROKEN_CS_TLB(dev)) { 2025 struct drm_i915_gem_object *obj; 2026 int ret; 2027 2028 obj = i915_gem_alloc_object(dev, I830_BATCH_LIMIT); 2029 if (obj == NULL) { 2030 DRM_ERROR("Failed to allocate batch bo\n"); 2031 return -ENOMEM; 2032 } 2033 2034 ret = i915_gem_obj_ggtt_pin(obj, 0, 0); 2035 if (ret != 0) { 2036 drm_gem_object_unreference(&obj->base); 2037 DRM_ERROR("Failed to ping batch bo\n"); 2038 return ret; 2039 } 2040 2041 ring->scratch.obj = obj; 2042 ring->scratch.gtt_offset = i915_gem_obj_ggtt_offset(obj); 2043 } 2044 2045 return intel_init_ring_buffer(dev, ring); 2046 } 2047 2048 int intel_render_ring_init_dri(struct drm_device *dev, u64 start, u32 size) 2049 { 2050 struct drm_i915_private *dev_priv = dev->dev_private; 2051 struct intel_engine_cs *ring = &dev_priv->ring[RCS]; 2052 struct intel_ringbuffer *ringbuf = ring->buffer; 2053 int ret; 2054 2055 if (ringbuf == NULL) { 2056 ringbuf = kzalloc(sizeof(*ringbuf), GFP_KERNEL); 2057 if (!ringbuf) 2058 return -ENOMEM; 2059 ring->buffer = ringbuf; 2060 } 2061 2062 ring->name = "render ring"; 2063 ring->id = RCS; 2064 ring->mmio_base = RENDER_RING_BASE; 2065 2066 if (INTEL_INFO(dev)->gen >= 6) { 2067 /* non-kms not supported on gen6+ */ 2068 ret = -ENODEV; 2069 goto err_ringbuf; 2070 } 2071 2072 /* Note: gem is not supported on gen5/ilk without kms (the corresponding 2073 * gem_init ioctl returns with -ENODEV). Hence we do not need to set up 2074 * the special gen5 functions. */ 2075 ring->add_request = i9xx_add_request; 2076 if (INTEL_INFO(dev)->gen < 4) 2077 ring->flush = gen2_render_ring_flush; 2078 else 2079 ring->flush = gen4_render_ring_flush; 2080 ring->get_seqno = ring_get_seqno; 2081 ring->set_seqno = ring_set_seqno; 2082 if (IS_GEN2(dev)) { 2083 ring->irq_get = i8xx_ring_get_irq; 2084 ring->irq_put = i8xx_ring_put_irq; 2085 } else { 2086 ring->irq_get = i9xx_ring_get_irq; 2087 ring->irq_put = i9xx_ring_put_irq; 2088 } 2089 ring->irq_enable_mask = I915_USER_INTERRUPT; 2090 ring->write_tail = ring_write_tail; 2091 if (INTEL_INFO(dev)->gen >= 4) 2092 ring->dispatch_execbuffer = i965_dispatch_execbuffer; 2093 else if (IS_I830(dev) || IS_845G(dev)) 2094 ring->dispatch_execbuffer = i830_dispatch_execbuffer; 2095 else 2096 ring->dispatch_execbuffer = i915_dispatch_execbuffer; 2097 ring->init = init_render_ring; 2098 ring->cleanup = render_ring_cleanup; 2099 2100 ring->dev = dev; 2101 INIT_LIST_HEAD(&ring->active_list); 2102 INIT_LIST_HEAD(&ring->request_list); 2103 2104 ringbuf->size = size; 2105 ringbuf->effective_size = ringbuf->size; 2106 if (IS_I830(ring->dev) || IS_845G(ring->dev)) 2107 ringbuf->effective_size -= 2 * CACHELINE_BYTES; 2108 2109 ringbuf->virtual_start = ioremap_wc(start, size); 2110 if (ringbuf->virtual_start == NULL) { 2111 DRM_ERROR("can not ioremap virtual address for" 2112 " ring buffer\n"); 2113 ret = -ENOMEM; 2114 goto err_ringbuf; 2115 } 2116 2117 if (!I915_NEED_GFX_HWS(dev)) { 2118 ret = init_phys_status_page(ring); 2119 if (ret) 2120 goto err_vstart; 2121 } 2122 2123 return 0; 2124 2125 err_vstart: 2126 pmap_unmapdev((vm_offset_t)ring->buffer->virtual_start, size); 2127 err_ringbuf: 2128 kfree(ringbuf); 2129 ring->buffer = NULL; 2130 return ret; 2131 } 2132 2133 int intel_init_bsd_ring_buffer(struct drm_device *dev) 2134 { 2135 struct drm_i915_private *dev_priv = dev->dev_private; 2136 struct intel_engine_cs *ring = &dev_priv->ring[VCS]; 2137 2138 ring->name = "bsd ring"; 2139 ring->id = VCS; 2140 2141 ring->write_tail = ring_write_tail; 2142 if (INTEL_INFO(dev)->gen >= 6) { 2143 ring->mmio_base = GEN6_BSD_RING_BASE; 2144 /* gen6 bsd needs a special wa for tail updates */ 2145 if (IS_GEN6(dev)) 2146 ring->write_tail = gen6_bsd_ring_write_tail; 2147 ring->flush = gen6_bsd_ring_flush; 2148 ring->add_request = gen6_add_request; 2149 ring->get_seqno = gen6_ring_get_seqno; 2150 ring->set_seqno = ring_set_seqno; 2151 if (INTEL_INFO(dev)->gen >= 8) { 2152 ring->irq_enable_mask = 2153 GT_RENDER_USER_INTERRUPT << GEN8_VCS1_IRQ_SHIFT; 2154 ring->irq_get = gen8_ring_get_irq; 2155 ring->irq_put = gen8_ring_put_irq; 2156 ring->dispatch_execbuffer = 2157 gen8_ring_dispatch_execbuffer; 2158 } else { 2159 ring->irq_enable_mask = GT_BSD_USER_INTERRUPT; 2160 ring->irq_get = gen6_ring_get_irq; 2161 ring->irq_put = gen6_ring_put_irq; 2162 ring->dispatch_execbuffer = 2163 gen6_ring_dispatch_execbuffer; 2164 } 2165 ring->semaphore.sync_to = gen6_ring_sync; 2166 ring->semaphore.signal = gen6_signal; 2167 /* 2168 * The current semaphore is only applied on pre-gen8 platform. 2169 * And there is no VCS2 ring on the pre-gen8 platform. So the 2170 * semaphore between VCS and VCS2 is initialized as INVALID. 2171 * Gen8 will initialize the sema between VCS2 and VCS later. 2172 */ 2173 ring->semaphore.mbox.wait[RCS] = MI_SEMAPHORE_SYNC_VR; 2174 ring->semaphore.mbox.wait[VCS] = MI_SEMAPHORE_SYNC_INVALID; 2175 ring->semaphore.mbox.wait[BCS] = MI_SEMAPHORE_SYNC_VB; 2176 ring->semaphore.mbox.wait[VECS] = MI_SEMAPHORE_SYNC_VVE; 2177 ring->semaphore.mbox.wait[VCS2] = MI_SEMAPHORE_SYNC_INVALID; 2178 ring->semaphore.mbox.signal[RCS] = GEN6_RVSYNC; 2179 ring->semaphore.mbox.signal[VCS] = GEN6_NOSYNC; 2180 ring->semaphore.mbox.signal[BCS] = GEN6_BVSYNC; 2181 ring->semaphore.mbox.signal[VECS] = GEN6_VEVSYNC; 2182 ring->semaphore.mbox.signal[VCS2] = GEN6_NOSYNC; 2183 } else { 2184 ring->mmio_base = BSD_RING_BASE; 2185 ring->flush = bsd_ring_flush; 2186 ring->add_request = i9xx_add_request; 2187 ring->get_seqno = ring_get_seqno; 2188 ring->set_seqno = ring_set_seqno; 2189 if (IS_GEN5(dev)) { 2190 ring->irq_enable_mask = ILK_BSD_USER_INTERRUPT; 2191 ring->irq_get = gen5_ring_get_irq; 2192 ring->irq_put = gen5_ring_put_irq; 2193 } else { 2194 ring->irq_enable_mask = I915_BSD_USER_INTERRUPT; 2195 ring->irq_get = i9xx_ring_get_irq; 2196 ring->irq_put = i9xx_ring_put_irq; 2197 } 2198 ring->dispatch_execbuffer = i965_dispatch_execbuffer; 2199 } 2200 ring->init = init_ring_common; 2201 2202 return intel_init_ring_buffer(dev, ring); 2203 } 2204 2205 /** 2206 * Initialize the second BSD ring for Broadwell GT3. 2207 * It is noted that this only exists on Broadwell GT3. 2208 */ 2209 int intel_init_bsd2_ring_buffer(struct drm_device *dev) 2210 { 2211 struct drm_i915_private *dev_priv = dev->dev_private; 2212 struct intel_engine_cs *ring = &dev_priv->ring[VCS2]; 2213 2214 if ((INTEL_INFO(dev)->gen != 8)) { 2215 DRM_ERROR("No dual-BSD ring on non-BDW machine\n"); 2216 return -EINVAL; 2217 } 2218 2219 ring->name = "bds2_ring"; 2220 ring->id = VCS2; 2221 2222 ring->write_tail = ring_write_tail; 2223 ring->mmio_base = GEN8_BSD2_RING_BASE; 2224 ring->flush = gen6_bsd_ring_flush; 2225 ring->add_request = gen6_add_request; 2226 ring->get_seqno = gen6_ring_get_seqno; 2227 ring->set_seqno = ring_set_seqno; 2228 ring->irq_enable_mask = 2229 GT_RENDER_USER_INTERRUPT << GEN8_VCS2_IRQ_SHIFT; 2230 ring->irq_get = gen8_ring_get_irq; 2231 ring->irq_put = gen8_ring_put_irq; 2232 ring->dispatch_execbuffer = 2233 gen8_ring_dispatch_execbuffer; 2234 ring->semaphore.sync_to = gen6_ring_sync; 2235 ring->semaphore.signal = gen6_signal; 2236 /* 2237 * The current semaphore is only applied on the pre-gen8. And there 2238 * is no bsd2 ring on the pre-gen8. So now the semaphore_register 2239 * between VCS2 and other ring is initialized as invalid. 2240 * Gen8 will initialize the sema between VCS2 and other ring later. 2241 */ 2242 ring->semaphore.mbox.wait[RCS] = MI_SEMAPHORE_SYNC_INVALID; 2243 ring->semaphore.mbox.wait[VCS] = MI_SEMAPHORE_SYNC_INVALID; 2244 ring->semaphore.mbox.wait[BCS] = MI_SEMAPHORE_SYNC_INVALID; 2245 ring->semaphore.mbox.wait[VECS] = MI_SEMAPHORE_SYNC_INVALID; 2246 ring->semaphore.mbox.wait[VCS2] = MI_SEMAPHORE_SYNC_INVALID; 2247 ring->semaphore.mbox.signal[RCS] = GEN6_NOSYNC; 2248 ring->semaphore.mbox.signal[VCS] = GEN6_NOSYNC; 2249 ring->semaphore.mbox.signal[BCS] = GEN6_NOSYNC; 2250 ring->semaphore.mbox.signal[VECS] = GEN6_NOSYNC; 2251 ring->semaphore.mbox.signal[VCS2] = GEN6_NOSYNC; 2252 2253 ring->init = init_ring_common; 2254 2255 return intel_init_ring_buffer(dev, ring); 2256 } 2257 2258 int intel_init_blt_ring_buffer(struct drm_device *dev) 2259 { 2260 struct drm_i915_private *dev_priv = dev->dev_private; 2261 struct intel_engine_cs *ring = &dev_priv->ring[BCS]; 2262 2263 ring->name = "blitter ring"; 2264 ring->id = BCS; 2265 2266 ring->mmio_base = BLT_RING_BASE; 2267 ring->write_tail = ring_write_tail; 2268 ring->flush = gen6_ring_flush; 2269 ring->add_request = gen6_add_request; 2270 ring->get_seqno = gen6_ring_get_seqno; 2271 ring->set_seqno = ring_set_seqno; 2272 if (INTEL_INFO(dev)->gen >= 8) { 2273 ring->irq_enable_mask = 2274 GT_RENDER_USER_INTERRUPT << GEN8_BCS_IRQ_SHIFT; 2275 ring->irq_get = gen8_ring_get_irq; 2276 ring->irq_put = gen8_ring_put_irq; 2277 ring->dispatch_execbuffer = gen8_ring_dispatch_execbuffer; 2278 } else { 2279 ring->irq_enable_mask = GT_BLT_USER_INTERRUPT; 2280 ring->irq_get = gen6_ring_get_irq; 2281 ring->irq_put = gen6_ring_put_irq; 2282 ring->dispatch_execbuffer = gen6_ring_dispatch_execbuffer; 2283 } 2284 ring->semaphore.sync_to = gen6_ring_sync; 2285 ring->semaphore.signal = gen6_signal; 2286 /* 2287 * The current semaphore is only applied on pre-gen8 platform. And 2288 * there is no VCS2 ring on the pre-gen8 platform. So the semaphore 2289 * between BCS and VCS2 is initialized as INVALID. 2290 * Gen8 will initialize the sema between BCS and VCS2 later. 2291 */ 2292 ring->semaphore.mbox.wait[RCS] = MI_SEMAPHORE_SYNC_BR; 2293 ring->semaphore.mbox.wait[VCS] = MI_SEMAPHORE_SYNC_BV; 2294 ring->semaphore.mbox.wait[BCS] = MI_SEMAPHORE_SYNC_INVALID; 2295 ring->semaphore.mbox.wait[VECS] = MI_SEMAPHORE_SYNC_BVE; 2296 ring->semaphore.mbox.wait[VCS2] = MI_SEMAPHORE_SYNC_INVALID; 2297 ring->semaphore.mbox.signal[RCS] = GEN6_RBSYNC; 2298 ring->semaphore.mbox.signal[VCS] = GEN6_VBSYNC; 2299 ring->semaphore.mbox.signal[BCS] = GEN6_NOSYNC; 2300 ring->semaphore.mbox.signal[VECS] = GEN6_VEBSYNC; 2301 ring->semaphore.mbox.signal[VCS2] = GEN6_NOSYNC; 2302 ring->init = init_ring_common; 2303 2304 return intel_init_ring_buffer(dev, ring); 2305 } 2306 2307 int intel_init_vebox_ring_buffer(struct drm_device *dev) 2308 { 2309 struct drm_i915_private *dev_priv = dev->dev_private; 2310 struct intel_engine_cs *ring = &dev_priv->ring[VECS]; 2311 2312 ring->name = "video enhancement ring"; 2313 ring->id = VECS; 2314 2315 ring->mmio_base = VEBOX_RING_BASE; 2316 ring->write_tail = ring_write_tail; 2317 ring->flush = gen6_ring_flush; 2318 ring->add_request = gen6_add_request; 2319 ring->get_seqno = gen6_ring_get_seqno; 2320 ring->set_seqno = ring_set_seqno; 2321 2322 if (INTEL_INFO(dev)->gen >= 8) { 2323 ring->irq_enable_mask = 2324 GT_RENDER_USER_INTERRUPT << GEN8_VECS_IRQ_SHIFT; 2325 ring->irq_get = gen8_ring_get_irq; 2326 ring->irq_put = gen8_ring_put_irq; 2327 ring->dispatch_execbuffer = gen8_ring_dispatch_execbuffer; 2328 } else { 2329 ring->irq_enable_mask = PM_VEBOX_USER_INTERRUPT; 2330 ring->irq_get = hsw_vebox_get_irq; 2331 ring->irq_put = hsw_vebox_put_irq; 2332 ring->dispatch_execbuffer = gen6_ring_dispatch_execbuffer; 2333 } 2334 ring->semaphore.sync_to = gen6_ring_sync; 2335 ring->semaphore.signal = gen6_signal; 2336 ring->semaphore.mbox.wait[RCS] = MI_SEMAPHORE_SYNC_VER; 2337 ring->semaphore.mbox.wait[VCS] = MI_SEMAPHORE_SYNC_VEV; 2338 ring->semaphore.mbox.wait[BCS] = MI_SEMAPHORE_SYNC_VEB; 2339 ring->semaphore.mbox.wait[VECS] = MI_SEMAPHORE_SYNC_INVALID; 2340 ring->semaphore.mbox.wait[VCS2] = MI_SEMAPHORE_SYNC_INVALID; 2341 ring->semaphore.mbox.signal[RCS] = GEN6_RVESYNC; 2342 ring->semaphore.mbox.signal[VCS] = GEN6_VVESYNC; 2343 ring->semaphore.mbox.signal[BCS] = GEN6_BVESYNC; 2344 ring->semaphore.mbox.signal[VECS] = GEN6_NOSYNC; 2345 ring->semaphore.mbox.signal[VCS2] = GEN6_NOSYNC; 2346 ring->init = init_ring_common; 2347 2348 return intel_init_ring_buffer(dev, ring); 2349 } 2350 2351 int 2352 intel_ring_flush_all_caches(struct intel_engine_cs *ring) 2353 { 2354 int ret; 2355 2356 if (!ring->gpu_caches_dirty) 2357 return 0; 2358 2359 ret = ring->flush(ring, 0, I915_GEM_GPU_DOMAINS); 2360 if (ret) 2361 return ret; 2362 2363 trace_i915_gem_ring_flush(ring, 0, I915_GEM_GPU_DOMAINS); 2364 2365 ring->gpu_caches_dirty = false; 2366 return 0; 2367 } 2368 2369 int 2370 intel_ring_invalidate_all_caches(struct intel_engine_cs *ring) 2371 { 2372 uint32_t flush_domains; 2373 int ret; 2374 2375 flush_domains = 0; 2376 if (ring->gpu_caches_dirty) 2377 flush_domains = I915_GEM_GPU_DOMAINS; 2378 2379 ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, flush_domains); 2380 if (ret) 2381 return ret; 2382 2383 trace_i915_gem_ring_flush(ring, I915_GEM_GPU_DOMAINS, flush_domains); 2384 2385 ring->gpu_caches_dirty = false; 2386 return 0; 2387 } 2388 2389 void 2390 intel_stop_ring_buffer(struct intel_engine_cs *ring) 2391 { 2392 int ret; 2393 2394 if (!intel_ring_initialized(ring)) 2395 return; 2396 2397 ret = intel_ring_idle(ring); 2398 if (ret && !i915_reset_in_progress(&to_i915(ring->dev)->gpu_error)) 2399 DRM_ERROR("failed to quiesce %s whilst cleaning up: %d\n", 2400 ring->name, ret); 2401 2402 stop_ring(ring); 2403 } 2404