1 /* 2 * Copyright © 2008-2010 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Eric Anholt <eric@anholt.net> 25 * Zou Nan hai <nanhai.zou@intel.com> 26 * Xiang Hai hao<haihao.xiang@intel.com> 27 * 28 */ 29 30 #include <drm/drmP.h> 31 #include "i915_drv.h" 32 #include <drm/i915_drm.h> 33 #include "i915_trace.h" 34 #include "intel_drv.h" 35 36 static inline int ring_space(struct intel_ring_buffer *ring) 37 { 38 int space = (ring->head & HEAD_ADDR) - (ring->tail + I915_RING_FREE_SPACE); 39 if (space < 0) 40 space += ring->size; 41 return space; 42 } 43 44 void __intel_ring_advance(struct intel_ring_buffer *ring) 45 { 46 struct drm_i915_private *dev_priv = ring->dev->dev_private; 47 48 ring->tail &= ring->size - 1; 49 if (dev_priv->gpu_error.stop_rings & intel_ring_flag(ring)) 50 return; 51 ring->write_tail(ring, ring->tail); 52 } 53 54 static int 55 gen2_render_ring_flush(struct intel_ring_buffer *ring, 56 u32 invalidate_domains, 57 u32 flush_domains) 58 { 59 u32 cmd; 60 int ret; 61 62 cmd = MI_FLUSH; 63 if (((invalidate_domains|flush_domains) & I915_GEM_DOMAIN_RENDER) == 0) 64 cmd |= MI_NO_WRITE_FLUSH; 65 66 if (invalidate_domains & I915_GEM_DOMAIN_SAMPLER) 67 cmd |= MI_READ_FLUSH; 68 69 ret = intel_ring_begin(ring, 2); 70 if (ret) 71 return ret; 72 73 intel_ring_emit(ring, cmd); 74 intel_ring_emit(ring, MI_NOOP); 75 intel_ring_advance(ring); 76 77 return 0; 78 } 79 80 static int 81 gen4_render_ring_flush(struct intel_ring_buffer *ring, 82 u32 invalidate_domains, 83 u32 flush_domains) 84 { 85 struct drm_device *dev = ring->dev; 86 u32 cmd; 87 int ret; 88 89 /* 90 * read/write caches: 91 * 92 * I915_GEM_DOMAIN_RENDER is always invalidated, but is 93 * only flushed if MI_NO_WRITE_FLUSH is unset. On 965, it is 94 * also flushed at 2d versus 3d pipeline switches. 95 * 96 * read-only caches: 97 * 98 * I915_GEM_DOMAIN_SAMPLER is flushed on pre-965 if 99 * MI_READ_FLUSH is set, and is always flushed on 965. 100 * 101 * I915_GEM_DOMAIN_COMMAND may not exist? 102 * 103 * I915_GEM_DOMAIN_INSTRUCTION, which exists on 965, is 104 * invalidated when MI_EXE_FLUSH is set. 105 * 106 * I915_GEM_DOMAIN_VERTEX, which exists on 965, is 107 * invalidated with every MI_FLUSH. 108 * 109 * TLBs: 110 * 111 * On 965, TLBs associated with I915_GEM_DOMAIN_COMMAND 112 * and I915_GEM_DOMAIN_CPU in are invalidated at PTE write and 113 * I915_GEM_DOMAIN_RENDER and I915_GEM_DOMAIN_SAMPLER 114 * are flushed at any MI_FLUSH. 115 */ 116 117 cmd = MI_FLUSH | MI_NO_WRITE_FLUSH; 118 if ((invalidate_domains|flush_domains) & I915_GEM_DOMAIN_RENDER) 119 cmd &= ~MI_NO_WRITE_FLUSH; 120 if (invalidate_domains & I915_GEM_DOMAIN_INSTRUCTION) 121 cmd |= MI_EXE_FLUSH; 122 123 if (invalidate_domains & I915_GEM_DOMAIN_COMMAND && 124 (IS_G4X(dev) || IS_GEN5(dev))) 125 cmd |= MI_INVALIDATE_ISP; 126 127 ret = intel_ring_begin(ring, 2); 128 if (ret) 129 return ret; 130 131 intel_ring_emit(ring, cmd); 132 intel_ring_emit(ring, MI_NOOP); 133 intel_ring_advance(ring); 134 135 return 0; 136 } 137 138 /** 139 * Emits a PIPE_CONTROL with a non-zero post-sync operation, for 140 * implementing two workarounds on gen6. From section 1.4.7.1 141 * "PIPE_CONTROL" of the Sandy Bridge PRM volume 2 part 1: 142 * 143 * [DevSNB-C+{W/A}] Before any depth stall flush (including those 144 * produced by non-pipelined state commands), software needs to first 145 * send a PIPE_CONTROL with no bits set except Post-Sync Operation != 146 * 0. 147 * 148 * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush Enable 149 * =1, a PIPE_CONTROL with any non-zero post-sync-op is required. 150 * 151 * And the workaround for these two requires this workaround first: 152 * 153 * [Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent 154 * BEFORE the pipe-control with a post-sync op and no write-cache 155 * flushes. 156 * 157 * And this last workaround is tricky because of the requirements on 158 * that bit. From section 1.4.7.2.3 "Stall" of the Sandy Bridge PRM 159 * volume 2 part 1: 160 * 161 * "1 of the following must also be set: 162 * - Render Target Cache Flush Enable ([12] of DW1) 163 * - Depth Cache Flush Enable ([0] of DW1) 164 * - Stall at Pixel Scoreboard ([1] of DW1) 165 * - Depth Stall ([13] of DW1) 166 * - Post-Sync Operation ([13] of DW1) 167 * - Notify Enable ([8] of DW1)" 168 * 169 * The cache flushes require the workaround flush that triggered this 170 * one, so we can't use it. Depth stall would trigger the same. 171 * Post-sync nonzero is what triggered this second workaround, so we 172 * can't use that one either. Notify enable is IRQs, which aren't 173 * really our business. That leaves only stall at scoreboard. 174 */ 175 static int 176 intel_emit_post_sync_nonzero_flush(struct intel_ring_buffer *ring) 177 { 178 u32 scratch_addr = ring->scratch.gtt_offset + 128; 179 int ret; 180 181 182 ret = intel_ring_begin(ring, 6); 183 if (ret) 184 return ret; 185 186 intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(5)); 187 intel_ring_emit(ring, PIPE_CONTROL_CS_STALL | 188 PIPE_CONTROL_STALL_AT_SCOREBOARD); 189 intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); /* address */ 190 intel_ring_emit(ring, 0); /* low dword */ 191 intel_ring_emit(ring, 0); /* high dword */ 192 intel_ring_emit(ring, MI_NOOP); 193 intel_ring_advance(ring); 194 195 ret = intel_ring_begin(ring, 6); 196 if (ret) 197 return ret; 198 199 intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(5)); 200 intel_ring_emit(ring, PIPE_CONTROL_QW_WRITE); 201 intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); /* address */ 202 intel_ring_emit(ring, 0); 203 intel_ring_emit(ring, 0); 204 intel_ring_emit(ring, MI_NOOP); 205 intel_ring_advance(ring); 206 207 return 0; 208 } 209 210 static int 211 gen6_render_ring_flush(struct intel_ring_buffer *ring, 212 u32 invalidate_domains, u32 flush_domains) 213 { 214 u32 flags = 0; 215 u32 scratch_addr = ring->scratch.gtt_offset + 128; 216 int ret; 217 218 /* Force SNB workarounds for PIPE_CONTROL flushes */ 219 ret = intel_emit_post_sync_nonzero_flush(ring); 220 if (ret) 221 return ret; 222 223 /* Just flush everything. Experiments have shown that reducing the 224 * number of bits based on the write domains has little performance 225 * impact. 226 */ 227 if (flush_domains) { 228 flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; 229 flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; 230 /* 231 * Ensure that any following seqno writes only happen 232 * when the render cache is indeed flushed. 233 */ 234 flags |= PIPE_CONTROL_CS_STALL; 235 } 236 if (invalidate_domains) { 237 flags |= PIPE_CONTROL_TLB_INVALIDATE; 238 flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE; 239 flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE; 240 flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE; 241 flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE; 242 flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE; 243 /* 244 * TLB invalidate requires a post-sync write. 245 */ 246 flags |= PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_CS_STALL; 247 } 248 249 ret = intel_ring_begin(ring, 4); 250 if (ret) 251 return ret; 252 253 intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4)); 254 intel_ring_emit(ring, flags); 255 intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); 256 intel_ring_emit(ring, 0); 257 intel_ring_advance(ring); 258 259 return 0; 260 } 261 262 static int 263 gen7_render_ring_cs_stall_wa(struct intel_ring_buffer *ring) 264 { 265 int ret; 266 267 ret = intel_ring_begin(ring, 4); 268 if (ret) 269 return ret; 270 271 intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4)); 272 intel_ring_emit(ring, PIPE_CONTROL_CS_STALL | 273 PIPE_CONTROL_STALL_AT_SCOREBOARD); 274 intel_ring_emit(ring, 0); 275 intel_ring_emit(ring, 0); 276 intel_ring_advance(ring); 277 278 return 0; 279 } 280 281 static int gen7_ring_fbc_flush(struct intel_ring_buffer *ring, u32 value) 282 { 283 int ret; 284 285 if (!ring->fbc_dirty) 286 return 0; 287 288 ret = intel_ring_begin(ring, 6); 289 if (ret) 290 return ret; 291 /* WaFbcNukeOn3DBlt:ivb/hsw */ 292 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); 293 intel_ring_emit(ring, MSG_FBC_REND_STATE); 294 intel_ring_emit(ring, value); 295 intel_ring_emit(ring, MI_STORE_REGISTER_MEM(1) | MI_SRM_LRM_GLOBAL_GTT); 296 intel_ring_emit(ring, MSG_FBC_REND_STATE); 297 intel_ring_emit(ring, ring->scratch.gtt_offset + 256); 298 intel_ring_advance(ring); 299 300 ring->fbc_dirty = false; 301 return 0; 302 } 303 304 static int 305 gen7_render_ring_flush(struct intel_ring_buffer *ring, 306 u32 invalidate_domains, u32 flush_domains) 307 { 308 u32 flags = 0; 309 u32 scratch_addr = ring->scratch.gtt_offset + 128; 310 int ret; 311 312 /* 313 * Ensure that any following seqno writes only happen when the render 314 * cache is indeed flushed. 315 * 316 * Workaround: 4th PIPE_CONTROL command (except the ones with only 317 * read-cache invalidate bits set) must have the CS_STALL bit set. We 318 * don't try to be clever and just set it unconditionally. 319 */ 320 flags |= PIPE_CONTROL_CS_STALL; 321 322 /* Just flush everything. Experiments have shown that reducing the 323 * number of bits based on the write domains has little performance 324 * impact. 325 */ 326 if (flush_domains) { 327 flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; 328 flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; 329 } 330 if (invalidate_domains) { 331 flags |= PIPE_CONTROL_TLB_INVALIDATE; 332 flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE; 333 flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE; 334 flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE; 335 flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE; 336 flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE; 337 /* 338 * TLB invalidate requires a post-sync write. 339 */ 340 flags |= PIPE_CONTROL_QW_WRITE; 341 flags |= PIPE_CONTROL_GLOBAL_GTT_IVB; 342 343 /* Workaround: we must issue a pipe_control with CS-stall bit 344 * set before a pipe_control command that has the state cache 345 * invalidate bit set. */ 346 gen7_render_ring_cs_stall_wa(ring); 347 } 348 349 ret = intel_ring_begin(ring, 4); 350 if (ret) 351 return ret; 352 353 intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4)); 354 intel_ring_emit(ring, flags); 355 intel_ring_emit(ring, scratch_addr); 356 intel_ring_emit(ring, 0); 357 intel_ring_advance(ring); 358 359 if (!invalidate_domains && flush_domains) 360 return gen7_ring_fbc_flush(ring, FBC_REND_NUKE); 361 362 return 0; 363 } 364 365 static int 366 gen8_render_ring_flush(struct intel_ring_buffer *ring, 367 u32 invalidate_domains, u32 flush_domains) 368 { 369 u32 flags = 0; 370 u32 scratch_addr = ring->scratch.gtt_offset + 128; 371 int ret; 372 373 flags |= PIPE_CONTROL_CS_STALL; 374 375 if (flush_domains) { 376 flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; 377 flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; 378 } 379 if (invalidate_domains) { 380 flags |= PIPE_CONTROL_TLB_INVALIDATE; 381 flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE; 382 flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE; 383 flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE; 384 flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE; 385 flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE; 386 flags |= PIPE_CONTROL_QW_WRITE; 387 flags |= PIPE_CONTROL_GLOBAL_GTT_IVB; 388 } 389 390 ret = intel_ring_begin(ring, 6); 391 if (ret) 392 return ret; 393 394 intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(6)); 395 intel_ring_emit(ring, flags); 396 intel_ring_emit(ring, scratch_addr); 397 intel_ring_emit(ring, 0); 398 intel_ring_emit(ring, 0); 399 intel_ring_emit(ring, 0); 400 intel_ring_advance(ring); 401 402 return 0; 403 404 } 405 406 static void ring_write_tail(struct intel_ring_buffer *ring, 407 u32 value) 408 { 409 drm_i915_private_t *dev_priv = ring->dev->dev_private; 410 I915_WRITE_TAIL(ring, value); 411 } 412 413 u32 intel_ring_get_active_head(struct intel_ring_buffer *ring) 414 { 415 drm_i915_private_t *dev_priv = ring->dev->dev_private; 416 u32 acthd_reg = INTEL_INFO(ring->dev)->gen >= 4 ? 417 RING_ACTHD(ring->mmio_base) : ACTHD; 418 419 return I915_READ(acthd_reg); 420 } 421 422 static void ring_setup_phys_status_page(struct intel_ring_buffer *ring) 423 { 424 struct drm_i915_private *dev_priv = ring->dev->dev_private; 425 u32 addr; 426 427 addr = dev_priv->status_page_dmah->busaddr; 428 if (INTEL_INFO(ring->dev)->gen >= 4) 429 addr |= (dev_priv->status_page_dmah->busaddr >> 28) & 0xf0; 430 I915_WRITE(HWS_PGA, addr); 431 } 432 433 static int init_ring_common(struct intel_ring_buffer *ring) 434 { 435 struct drm_device *dev = ring->dev; 436 drm_i915_private_t *dev_priv = dev->dev_private; 437 struct drm_i915_gem_object *obj = ring->obj; 438 int ret = 0; 439 u32 head; 440 441 gen6_gt_force_wake_get(dev_priv, FORCEWAKE_ALL); 442 443 if (I915_NEED_GFX_HWS(dev)) 444 intel_ring_setup_status_page(ring); 445 else 446 ring_setup_phys_status_page(ring); 447 448 /* Stop the ring if it's running. */ 449 I915_WRITE_CTL(ring, 0); 450 I915_WRITE_HEAD(ring, 0); 451 ring->write_tail(ring, 0); 452 453 head = I915_READ_HEAD(ring) & HEAD_ADDR; 454 455 /* G45 ring initialization fails to reset head to zero */ 456 if (head != 0) { 457 DRM_DEBUG_KMS("%s head not reset to zero " 458 "ctl %08x head %08x tail %08x start %08x\n", 459 ring->name, 460 I915_READ_CTL(ring), 461 I915_READ_HEAD(ring), 462 I915_READ_TAIL(ring), 463 I915_READ_START(ring)); 464 465 I915_WRITE_HEAD(ring, 0); 466 467 if (I915_READ_HEAD(ring) & HEAD_ADDR) { 468 DRM_ERROR("failed to set %s head to zero " 469 "ctl %08x head %08x tail %08x start %08x\n", 470 ring->name, 471 I915_READ_CTL(ring), 472 I915_READ_HEAD(ring), 473 I915_READ_TAIL(ring), 474 I915_READ_START(ring)); 475 } 476 } 477 478 /* Initialize the ring. This must happen _after_ we've cleared the ring 479 * registers with the above sequence (the readback of the HEAD registers 480 * also enforces ordering), otherwise the hw might lose the new ring 481 * register values. */ 482 I915_WRITE_START(ring, i915_gem_obj_ggtt_offset(obj)); 483 I915_WRITE_CTL(ring, 484 ((ring->size - PAGE_SIZE) & RING_NR_PAGES) 485 | RING_VALID); 486 487 /* If the head is still not zero, the ring is dead */ 488 if (wait_for((I915_READ_CTL(ring) & RING_VALID) != 0 && 489 I915_READ_START(ring) == i915_gem_obj_ggtt_offset(obj) && 490 (I915_READ_HEAD(ring) & HEAD_ADDR) == 0, 50)) { 491 DRM_ERROR("%s initialization failed " 492 "ctl %08x head %08x tail %08x start %08x\n", 493 ring->name, 494 I915_READ_CTL(ring), 495 I915_READ_HEAD(ring), 496 I915_READ_TAIL(ring), 497 I915_READ_START(ring)); 498 ret = -EIO; 499 goto out; 500 } 501 502 if (!drm_core_check_feature(ring->dev, DRIVER_MODESET)) 503 i915_kernel_lost_context(ring->dev); 504 else { 505 ring->head = I915_READ_HEAD(ring); 506 ring->tail = I915_READ_TAIL(ring) & TAIL_ADDR; 507 ring->space = ring_space(ring); 508 ring->last_retired_head = -1; 509 } 510 511 memset(&ring->hangcheck, 0, sizeof(ring->hangcheck)); 512 513 out: 514 gen6_gt_force_wake_put(dev_priv, FORCEWAKE_ALL); 515 516 return ret; 517 } 518 519 static int 520 init_pipe_control(struct intel_ring_buffer *ring) 521 { 522 int ret; 523 524 if (ring->scratch.obj) 525 return 0; 526 527 ring->scratch.obj = i915_gem_alloc_object(ring->dev, 4096); 528 if (ring->scratch.obj == NULL) { 529 DRM_ERROR("Failed to allocate seqno page\n"); 530 ret = -ENOMEM; 531 goto err; 532 } 533 534 i915_gem_object_set_cache_level(ring->scratch.obj, I915_CACHE_LLC); 535 536 ret = i915_gem_obj_ggtt_pin(ring->scratch.obj, 4096, true, false); 537 if (ret) 538 goto err_unref; 539 540 ring->scratch.gtt_offset = i915_gem_obj_ggtt_offset(ring->scratch.obj); 541 ring->scratch.cpu_page = kmap(ring->scratch.obj->pages[0]); 542 if (ring->scratch.cpu_page == NULL) { 543 ret = -ENOMEM; 544 goto err_unpin; 545 } 546 547 DRM_DEBUG_DRIVER("%s pipe control offset: 0x%08x\n", 548 ring->name, ring->scratch.gtt_offset); 549 return 0; 550 551 err_unpin: 552 i915_gem_object_unpin(ring->scratch.obj); 553 err_unref: 554 drm_gem_object_unreference(&ring->scratch.obj->base); 555 err: 556 return ret; 557 } 558 559 static int init_render_ring(struct intel_ring_buffer *ring) 560 { 561 struct drm_device *dev = ring->dev; 562 struct drm_i915_private *dev_priv = dev->dev_private; 563 int ret = init_ring_common(ring); 564 565 if (INTEL_INFO(dev)->gen > 3) 566 I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(VS_TIMER_DISPATCH)); 567 568 /* We need to disable the AsyncFlip performance optimisations in order 569 * to use MI_WAIT_FOR_EVENT within the CS. It should already be 570 * programmed to '1' on all products. 571 * 572 * WaDisableAsyncFlipPerfMode:snb,ivb,hsw,vlv 573 */ 574 if (INTEL_INFO(dev)->gen >= 6) 575 I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(ASYNC_FLIP_PERF_DISABLE)); 576 577 /* Required for the hardware to program scanline values for waiting */ 578 if (INTEL_INFO(dev)->gen == 6) 579 I915_WRITE(GFX_MODE, 580 _MASKED_BIT_ENABLE(GFX_TLB_INVALIDATE_ALWAYS)); 581 582 if (IS_GEN7(dev)) 583 I915_WRITE(GFX_MODE_GEN7, 584 _MASKED_BIT_DISABLE(GFX_TLB_INVALIDATE_ALWAYS) | 585 _MASKED_BIT_ENABLE(GFX_REPLAY_MODE)); 586 587 if (INTEL_INFO(dev)->gen >= 5) { 588 ret = init_pipe_control(ring); 589 if (ret) 590 return ret; 591 } 592 593 if (IS_GEN6(dev)) { 594 /* From the Sandybridge PRM, volume 1 part 3, page 24: 595 * "If this bit is set, STCunit will have LRA as replacement 596 * policy. [...] This bit must be reset. LRA replacement 597 * policy is not supported." 598 */ 599 I915_WRITE(CACHE_MODE_0, 600 _MASKED_BIT_DISABLE(CM0_STC_EVICT_DISABLE_LRA_SNB)); 601 602 /* This is not explicitly set for GEN6, so read the register. 603 * see intel_ring_mi_set_context() for why we care. 604 * TODO: consider explicitly setting the bit for GEN5 605 */ 606 ring->itlb_before_ctx_switch = 607 !!(I915_READ(GFX_MODE) & GFX_TLB_INVALIDATE_ALWAYS); 608 } 609 610 if (INTEL_INFO(dev)->gen >= 6) 611 I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING)); 612 613 if (HAS_L3_DPF(dev)) 614 I915_WRITE_IMR(ring, ~GT_PARITY_ERROR(dev)); 615 616 return ret; 617 } 618 619 static void render_ring_cleanup(struct intel_ring_buffer *ring) 620 { 621 struct drm_device *dev = ring->dev; 622 623 if (ring->scratch.obj == NULL) 624 return; 625 626 if (INTEL_INFO(dev)->gen >= 5) { 627 kunmap(ring->scratch.obj->pages[0]); 628 i915_gem_object_unpin(ring->scratch.obj); 629 } 630 631 drm_gem_object_unreference(&ring->scratch.obj->base); 632 ring->scratch.obj = NULL; 633 } 634 635 static void 636 update_mboxes(struct intel_ring_buffer *ring, 637 u32 mmio_offset) 638 { 639 /* NB: In order to be able to do semaphore MBOX updates for varying number 640 * of rings, it's easiest if we round up each individual update to a 641 * multiple of 2 (since ring updates must always be a multiple of 2) 642 * even though the actual update only requires 3 dwords. 643 */ 644 #define MBOX_UPDATE_DWORDS 4 645 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); 646 intel_ring_emit(ring, mmio_offset); 647 intel_ring_emit(ring, ring->outstanding_lazy_seqno); 648 intel_ring_emit(ring, MI_NOOP); 649 } 650 651 /** 652 * gen6_add_request - Update the semaphore mailbox registers 653 * 654 * @ring - ring that is adding a request 655 * @seqno - return seqno stuck into the ring 656 * 657 * Update the mailbox registers in the *other* rings with the current seqno. 658 * This acts like a signal in the canonical semaphore. 659 */ 660 static int 661 gen6_add_request(struct intel_ring_buffer *ring) 662 { 663 struct drm_device *dev = ring->dev; 664 struct drm_i915_private *dev_priv = dev->dev_private; 665 struct intel_ring_buffer *useless; 666 int i, ret, num_dwords = 4; 667 668 if (i915_semaphore_is_enabled(dev)) 669 num_dwords += ((I915_NUM_RINGS-1) * MBOX_UPDATE_DWORDS); 670 #undef MBOX_UPDATE_DWORDS 671 672 ret = intel_ring_begin(ring, num_dwords); 673 if (ret) 674 return ret; 675 676 if (i915_semaphore_is_enabled(dev)) { 677 for_each_ring(useless, dev_priv, i) { 678 u32 mbox_reg = ring->signal_mbox[i]; 679 if (mbox_reg != GEN6_NOSYNC) 680 update_mboxes(ring, mbox_reg); 681 } 682 } 683 684 intel_ring_emit(ring, MI_STORE_DWORD_INDEX); 685 intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT); 686 intel_ring_emit(ring, ring->outstanding_lazy_seqno); 687 intel_ring_emit(ring, MI_USER_INTERRUPT); 688 __intel_ring_advance(ring); 689 690 return 0; 691 } 692 693 static inline bool i915_gem_has_seqno_wrapped(struct drm_device *dev, 694 u32 seqno) 695 { 696 struct drm_i915_private *dev_priv = dev->dev_private; 697 return dev_priv->last_seqno < seqno; 698 } 699 700 /** 701 * intel_ring_sync - sync the waiter to the signaller on seqno 702 * 703 * @waiter - ring that is waiting 704 * @signaller - ring which has, or will signal 705 * @seqno - seqno which the waiter will block on 706 */ 707 static int 708 gen6_ring_sync(struct intel_ring_buffer *waiter, 709 struct intel_ring_buffer *signaller, 710 u32 seqno) 711 { 712 int ret; 713 u32 dw1 = MI_SEMAPHORE_MBOX | 714 MI_SEMAPHORE_COMPARE | 715 MI_SEMAPHORE_REGISTER; 716 717 /* Throughout all of the GEM code, seqno passed implies our current 718 * seqno is >= the last seqno executed. However for hardware the 719 * comparison is strictly greater than. 720 */ 721 seqno -= 1; 722 723 WARN_ON(signaller->semaphore_register[waiter->id] == 724 MI_SEMAPHORE_SYNC_INVALID); 725 726 ret = intel_ring_begin(waiter, 4); 727 if (ret) 728 return ret; 729 730 /* If seqno wrap happened, omit the wait with no-ops */ 731 if (likely(!i915_gem_has_seqno_wrapped(waiter->dev, seqno))) { 732 intel_ring_emit(waiter, 733 dw1 | 734 signaller->semaphore_register[waiter->id]); 735 intel_ring_emit(waiter, seqno); 736 intel_ring_emit(waiter, 0); 737 intel_ring_emit(waiter, MI_NOOP); 738 } else { 739 intel_ring_emit(waiter, MI_NOOP); 740 intel_ring_emit(waiter, MI_NOOP); 741 intel_ring_emit(waiter, MI_NOOP); 742 intel_ring_emit(waiter, MI_NOOP); 743 } 744 intel_ring_advance(waiter); 745 746 return 0; 747 } 748 749 #define PIPE_CONTROL_FLUSH(ring__, addr__) \ 750 do { \ 751 intel_ring_emit(ring__, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE | \ 752 PIPE_CONTROL_DEPTH_STALL); \ 753 intel_ring_emit(ring__, (addr__) | PIPE_CONTROL_GLOBAL_GTT); \ 754 intel_ring_emit(ring__, 0); \ 755 intel_ring_emit(ring__, 0); \ 756 } while (0) 757 758 static int 759 pc_render_add_request(struct intel_ring_buffer *ring) 760 { 761 u32 scratch_addr = ring->scratch.gtt_offset + 128; 762 int ret; 763 764 /* For Ironlake, MI_USER_INTERRUPT was deprecated and apparently 765 * incoherent with writes to memory, i.e. completely fubar, 766 * so we need to use PIPE_NOTIFY instead. 767 * 768 * However, we also need to workaround the qword write 769 * incoherence by flushing the 6 PIPE_NOTIFY buffers out to 770 * memory before requesting an interrupt. 771 */ 772 ret = intel_ring_begin(ring, 32); 773 if (ret) 774 return ret; 775 776 intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE | 777 PIPE_CONTROL_WRITE_FLUSH | 778 PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE); 779 intel_ring_emit(ring, ring->scratch.gtt_offset | PIPE_CONTROL_GLOBAL_GTT); 780 intel_ring_emit(ring, ring->outstanding_lazy_seqno); 781 intel_ring_emit(ring, 0); 782 PIPE_CONTROL_FLUSH(ring, scratch_addr); 783 scratch_addr += 128; /* write to separate cachelines */ 784 PIPE_CONTROL_FLUSH(ring, scratch_addr); 785 scratch_addr += 128; 786 PIPE_CONTROL_FLUSH(ring, scratch_addr); 787 scratch_addr += 128; 788 PIPE_CONTROL_FLUSH(ring, scratch_addr); 789 scratch_addr += 128; 790 PIPE_CONTROL_FLUSH(ring, scratch_addr); 791 scratch_addr += 128; 792 PIPE_CONTROL_FLUSH(ring, scratch_addr); 793 794 intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE | 795 PIPE_CONTROL_WRITE_FLUSH | 796 PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE | 797 PIPE_CONTROL_NOTIFY); 798 intel_ring_emit(ring, ring->scratch.gtt_offset | PIPE_CONTROL_GLOBAL_GTT); 799 intel_ring_emit(ring, ring->outstanding_lazy_seqno); 800 intel_ring_emit(ring, 0); 801 __intel_ring_advance(ring); 802 803 return 0; 804 } 805 806 static u32 807 gen6_ring_get_seqno(struct intel_ring_buffer *ring, bool lazy_coherency) 808 { 809 /* Workaround to force correct ordering between irq and seqno writes on 810 * ivb (and maybe also on snb) by reading from a CS register (like 811 * ACTHD) before reading the status page. */ 812 if (!lazy_coherency) 813 intel_ring_get_active_head(ring); 814 return intel_read_status_page(ring, I915_GEM_HWS_INDEX); 815 } 816 817 static u32 818 ring_get_seqno(struct intel_ring_buffer *ring, bool lazy_coherency) 819 { 820 return intel_read_status_page(ring, I915_GEM_HWS_INDEX); 821 } 822 823 static void 824 ring_set_seqno(struct intel_ring_buffer *ring, u32 seqno) 825 { 826 intel_write_status_page(ring, I915_GEM_HWS_INDEX, seqno); 827 } 828 829 static u32 830 pc_render_get_seqno(struct intel_ring_buffer *ring, bool lazy_coherency) 831 { 832 return ring->scratch.cpu_page[0]; 833 } 834 835 static void 836 pc_render_set_seqno(struct intel_ring_buffer *ring, u32 seqno) 837 { 838 ring->scratch.cpu_page[0] = seqno; 839 } 840 841 static bool 842 gen5_ring_get_irq(struct intel_ring_buffer *ring) 843 { 844 struct drm_device *dev = ring->dev; 845 drm_i915_private_t *dev_priv = dev->dev_private; 846 847 if (!dev->irq_enabled) 848 return false; 849 850 lockmgr(&dev_priv->irq_lock, LK_EXCLUSIVE); 851 if (ring->irq_refcount++ == 0) 852 ilk_enable_gt_irq(dev_priv, ring->irq_enable_mask); 853 lockmgr(&dev_priv->irq_lock, LK_RELEASE); 854 855 return true; 856 } 857 858 static void 859 gen5_ring_put_irq(struct intel_ring_buffer *ring) 860 { 861 struct drm_device *dev = ring->dev; 862 drm_i915_private_t *dev_priv = dev->dev_private; 863 864 lockmgr(&dev_priv->irq_lock, LK_EXCLUSIVE); 865 if (--ring->irq_refcount == 0) 866 ilk_disable_gt_irq(dev_priv, ring->irq_enable_mask); 867 lockmgr(&dev_priv->irq_lock, LK_RELEASE); 868 } 869 870 static bool 871 i9xx_ring_get_irq(struct intel_ring_buffer *ring) 872 { 873 struct drm_device *dev = ring->dev; 874 drm_i915_private_t *dev_priv = dev->dev_private; 875 876 if (!dev->irq_enabled) 877 return false; 878 879 lockmgr(&dev_priv->irq_lock, LK_EXCLUSIVE); 880 if (ring->irq_refcount++ == 0) { 881 dev_priv->irq_mask &= ~ring->irq_enable_mask; 882 I915_WRITE(IMR, dev_priv->irq_mask); 883 POSTING_READ(IMR); 884 } 885 lockmgr(&dev_priv->irq_lock, LK_RELEASE); 886 887 return true; 888 } 889 890 static void 891 i9xx_ring_put_irq(struct intel_ring_buffer *ring) 892 { 893 struct drm_device *dev = ring->dev; 894 drm_i915_private_t *dev_priv = dev->dev_private; 895 896 lockmgr(&dev_priv->irq_lock, LK_EXCLUSIVE); 897 if (--ring->irq_refcount == 0) { 898 dev_priv->irq_mask |= ring->irq_enable_mask; 899 I915_WRITE(IMR, dev_priv->irq_mask); 900 POSTING_READ(IMR); 901 } 902 lockmgr(&dev_priv->irq_lock, LK_RELEASE); 903 } 904 905 static bool 906 i8xx_ring_get_irq(struct intel_ring_buffer *ring) 907 { 908 struct drm_device *dev = ring->dev; 909 drm_i915_private_t *dev_priv = dev->dev_private; 910 911 if (!dev->irq_enabled) 912 return false; 913 914 lockmgr(&dev_priv->irq_lock, LK_EXCLUSIVE); 915 if (ring->irq_refcount++ == 0) { 916 dev_priv->irq_mask &= ~ring->irq_enable_mask; 917 I915_WRITE16(IMR, dev_priv->irq_mask); 918 POSTING_READ16(IMR); 919 } 920 lockmgr(&dev_priv->irq_lock, LK_RELEASE); 921 922 return true; 923 } 924 925 static void 926 i8xx_ring_put_irq(struct intel_ring_buffer *ring) 927 { 928 struct drm_device *dev = ring->dev; 929 drm_i915_private_t *dev_priv = dev->dev_private; 930 931 lockmgr(&dev_priv->irq_lock, LK_EXCLUSIVE); 932 if (--ring->irq_refcount == 0) { 933 dev_priv->irq_mask |= ring->irq_enable_mask; 934 I915_WRITE16(IMR, dev_priv->irq_mask); 935 POSTING_READ16(IMR); 936 } 937 lockmgr(&dev_priv->irq_lock, LK_RELEASE); 938 } 939 940 void intel_ring_setup_status_page(struct intel_ring_buffer *ring) 941 { 942 struct drm_device *dev = ring->dev; 943 drm_i915_private_t *dev_priv = ring->dev->dev_private; 944 u32 mmio = 0; 945 946 /* The ring status page addresses are no longer next to the rest of 947 * the ring registers as of gen7. 948 */ 949 if (IS_GEN7(dev)) { 950 switch (ring->id) { 951 case RCS: 952 mmio = RENDER_HWS_PGA_GEN7; 953 break; 954 case BCS: 955 mmio = BLT_HWS_PGA_GEN7; 956 break; 957 case VCS: 958 mmio = BSD_HWS_PGA_GEN7; 959 break; 960 case VECS: 961 mmio = VEBOX_HWS_PGA_GEN7; 962 break; 963 } 964 } else if (IS_GEN6(ring->dev)) { 965 mmio = RING_HWS_PGA_GEN6(ring->mmio_base); 966 } else { 967 /* XXX: gen8 returns to sanity */ 968 mmio = RING_HWS_PGA(ring->mmio_base); 969 } 970 971 I915_WRITE(mmio, (u32)ring->status_page.gfx_addr); 972 POSTING_READ(mmio); 973 974 /* Flush the TLB for this page */ 975 if (INTEL_INFO(dev)->gen >= 6) { 976 u32 reg = RING_INSTPM(ring->mmio_base); 977 I915_WRITE(reg, 978 _MASKED_BIT_ENABLE(INSTPM_TLB_INVALIDATE | 979 INSTPM_SYNC_FLUSH)); 980 if (wait_for((I915_READ(reg) & INSTPM_SYNC_FLUSH) == 0, 981 1000)) 982 DRM_ERROR("%s: wait for SyncFlush to complete for TLB invalidation timed out\n", 983 ring->name); 984 } 985 } 986 987 static int 988 bsd_ring_flush(struct intel_ring_buffer *ring, 989 u32 invalidate_domains, 990 u32 flush_domains) 991 { 992 int ret; 993 994 ret = intel_ring_begin(ring, 2); 995 if (ret) 996 return ret; 997 998 intel_ring_emit(ring, MI_FLUSH); 999 intel_ring_emit(ring, MI_NOOP); 1000 intel_ring_advance(ring); 1001 return 0; 1002 } 1003 1004 static int 1005 i9xx_add_request(struct intel_ring_buffer *ring) 1006 { 1007 int ret; 1008 1009 ret = intel_ring_begin(ring, 4); 1010 if (ret) 1011 return ret; 1012 1013 intel_ring_emit(ring, MI_STORE_DWORD_INDEX); 1014 intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT); 1015 intel_ring_emit(ring, ring->outstanding_lazy_seqno); 1016 intel_ring_emit(ring, MI_USER_INTERRUPT); 1017 __intel_ring_advance(ring); 1018 1019 return 0; 1020 } 1021 1022 static bool 1023 gen6_ring_get_irq(struct intel_ring_buffer *ring) 1024 { 1025 struct drm_device *dev = ring->dev; 1026 drm_i915_private_t *dev_priv = dev->dev_private; 1027 1028 if (!dev->irq_enabled) 1029 return false; 1030 1031 lockmgr(&dev_priv->irq_lock, LK_EXCLUSIVE); 1032 if (ring->irq_refcount++ == 0) { 1033 if (HAS_L3_DPF(dev) && ring->id == RCS) 1034 I915_WRITE_IMR(ring, 1035 ~(ring->irq_enable_mask | 1036 GT_PARITY_ERROR(dev))); 1037 else 1038 I915_WRITE_IMR(ring, ~ring->irq_enable_mask); 1039 ilk_enable_gt_irq(dev_priv, ring->irq_enable_mask); 1040 } 1041 lockmgr(&dev_priv->irq_lock, LK_RELEASE); 1042 1043 return true; 1044 } 1045 1046 static void 1047 gen6_ring_put_irq(struct intel_ring_buffer *ring) 1048 { 1049 struct drm_device *dev = ring->dev; 1050 drm_i915_private_t *dev_priv = dev->dev_private; 1051 1052 lockmgr(&dev_priv->irq_lock, LK_EXCLUSIVE); 1053 if (--ring->irq_refcount == 0) { 1054 if (HAS_L3_DPF(dev) && ring->id == RCS) 1055 I915_WRITE_IMR(ring, ~GT_PARITY_ERROR(dev)); 1056 else 1057 I915_WRITE_IMR(ring, ~0); 1058 ilk_disable_gt_irq(dev_priv, ring->irq_enable_mask); 1059 } 1060 lockmgr(&dev_priv->irq_lock, LK_RELEASE); 1061 } 1062 1063 static bool 1064 hsw_vebox_get_irq(struct intel_ring_buffer *ring) 1065 { 1066 struct drm_device *dev = ring->dev; 1067 struct drm_i915_private *dev_priv = dev->dev_private; 1068 1069 if (!dev->irq_enabled) 1070 return false; 1071 1072 lockmgr(&dev_priv->irq_lock, LK_EXCLUSIVE); 1073 if (ring->irq_refcount++ == 0) { 1074 I915_WRITE_IMR(ring, ~ring->irq_enable_mask); 1075 snb_enable_pm_irq(dev_priv, ring->irq_enable_mask); 1076 } 1077 lockmgr(&dev_priv->irq_lock, LK_RELEASE); 1078 1079 return true; 1080 } 1081 1082 static void 1083 hsw_vebox_put_irq(struct intel_ring_buffer *ring) 1084 { 1085 struct drm_device *dev = ring->dev; 1086 struct drm_i915_private *dev_priv = dev->dev_private; 1087 1088 if (!dev->irq_enabled) 1089 return; 1090 1091 lockmgr(&dev_priv->irq_lock, LK_EXCLUSIVE); 1092 if (--ring->irq_refcount == 0) { 1093 I915_WRITE_IMR(ring, ~0); 1094 snb_disable_pm_irq(dev_priv, ring->irq_enable_mask); 1095 } 1096 lockmgr(&dev_priv->irq_lock, LK_RELEASE); 1097 } 1098 1099 static bool 1100 gen8_ring_get_irq(struct intel_ring_buffer *ring) 1101 { 1102 struct drm_device *dev = ring->dev; 1103 struct drm_i915_private *dev_priv = dev->dev_private; 1104 1105 if (!dev->irq_enabled) 1106 return false; 1107 1108 lockmgr(&dev_priv->irq_lock, LK_EXCLUSIVE); 1109 if (ring->irq_refcount++ == 0) { 1110 if (HAS_L3_DPF(dev) && ring->id == RCS) { 1111 I915_WRITE_IMR(ring, 1112 ~(ring->irq_enable_mask | 1113 GT_RENDER_L3_PARITY_ERROR_INTERRUPT)); 1114 } else { 1115 I915_WRITE_IMR(ring, ~ring->irq_enable_mask); 1116 } 1117 POSTING_READ(RING_IMR(ring->mmio_base)); 1118 } 1119 lockmgr(&dev_priv->irq_lock, LK_RELEASE); 1120 1121 return true; 1122 } 1123 1124 static void 1125 gen8_ring_put_irq(struct intel_ring_buffer *ring) 1126 { 1127 struct drm_device *dev = ring->dev; 1128 struct drm_i915_private *dev_priv = dev->dev_private; 1129 1130 lockmgr(&dev_priv->irq_lock, LK_EXCLUSIVE); 1131 if (--ring->irq_refcount == 0) { 1132 if (HAS_L3_DPF(dev) && ring->id == RCS) { 1133 I915_WRITE_IMR(ring, 1134 ~GT_RENDER_L3_PARITY_ERROR_INTERRUPT); 1135 } else { 1136 I915_WRITE_IMR(ring, ~0); 1137 } 1138 POSTING_READ(RING_IMR(ring->mmio_base)); 1139 } 1140 lockmgr(&dev_priv->irq_lock, LK_RELEASE); 1141 } 1142 1143 static int 1144 i965_dispatch_execbuffer(struct intel_ring_buffer *ring, 1145 u32 offset, u32 length, 1146 unsigned flags) 1147 { 1148 int ret; 1149 1150 ret = intel_ring_begin(ring, 2); 1151 if (ret) 1152 return ret; 1153 1154 intel_ring_emit(ring, 1155 MI_BATCH_BUFFER_START | 1156 MI_BATCH_GTT | 1157 (flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE_I965)); 1158 intel_ring_emit(ring, offset); 1159 intel_ring_advance(ring); 1160 1161 return 0; 1162 } 1163 1164 /* Just userspace ABI convention to limit the wa batch bo to a resonable size */ 1165 #define I830_BATCH_LIMIT (256*1024) 1166 static int 1167 i830_dispatch_execbuffer(struct intel_ring_buffer *ring, 1168 u32 offset, u32 len, 1169 unsigned flags) 1170 { 1171 int ret; 1172 1173 if (flags & I915_DISPATCH_PINNED) { 1174 ret = intel_ring_begin(ring, 4); 1175 if (ret) 1176 return ret; 1177 1178 intel_ring_emit(ring, MI_BATCH_BUFFER); 1179 intel_ring_emit(ring, offset | (flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE)); 1180 intel_ring_emit(ring, offset + len - 8); 1181 intel_ring_emit(ring, MI_NOOP); 1182 intel_ring_advance(ring); 1183 } else { 1184 u32 cs_offset = ring->scratch.gtt_offset; 1185 1186 if (len > I830_BATCH_LIMIT) 1187 return -ENOSPC; 1188 1189 ret = intel_ring_begin(ring, 9+3); 1190 if (ret) 1191 return ret; 1192 /* Blit the batch (which has now all relocs applied) to the stable batch 1193 * scratch bo area (so that the CS never stumbles over its tlb 1194 * invalidation bug) ... */ 1195 intel_ring_emit(ring, XY_SRC_COPY_BLT_CMD | 1196 XY_SRC_COPY_BLT_WRITE_ALPHA | 1197 XY_SRC_COPY_BLT_WRITE_RGB); 1198 intel_ring_emit(ring, BLT_DEPTH_32 | BLT_ROP_GXCOPY | 4096); 1199 intel_ring_emit(ring, 0); 1200 intel_ring_emit(ring, (DIV_ROUND_UP(len, 4096) << 16) | 1024); 1201 intel_ring_emit(ring, cs_offset); 1202 intel_ring_emit(ring, 0); 1203 intel_ring_emit(ring, 4096); 1204 intel_ring_emit(ring, offset); 1205 intel_ring_emit(ring, MI_FLUSH); 1206 1207 /* ... and execute it. */ 1208 intel_ring_emit(ring, MI_BATCH_BUFFER); 1209 intel_ring_emit(ring, cs_offset | (flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE)); 1210 intel_ring_emit(ring, cs_offset + len - 8); 1211 intel_ring_advance(ring); 1212 } 1213 1214 return 0; 1215 } 1216 1217 static int 1218 i915_dispatch_execbuffer(struct intel_ring_buffer *ring, 1219 u32 offset, u32 len, 1220 unsigned flags) 1221 { 1222 int ret; 1223 1224 ret = intel_ring_begin(ring, 2); 1225 if (ret) 1226 return ret; 1227 1228 intel_ring_emit(ring, MI_BATCH_BUFFER_START | MI_BATCH_GTT); 1229 intel_ring_emit(ring, offset | (flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE)); 1230 intel_ring_advance(ring); 1231 1232 return 0; 1233 } 1234 1235 static void cleanup_status_page(struct intel_ring_buffer *ring) 1236 { 1237 struct drm_i915_gem_object *obj; 1238 1239 obj = ring->status_page.obj; 1240 if (obj == NULL) 1241 return; 1242 1243 kunmap(obj->pages[0]); 1244 i915_gem_object_unpin(obj); 1245 drm_gem_object_unreference(&obj->base); 1246 ring->status_page.obj = NULL; 1247 } 1248 1249 static int init_status_page(struct intel_ring_buffer *ring) 1250 { 1251 struct drm_device *dev = ring->dev; 1252 struct drm_i915_gem_object *obj; 1253 int ret; 1254 1255 obj = i915_gem_alloc_object(dev, 4096); 1256 if (obj == NULL) { 1257 DRM_ERROR("Failed to allocate status page\n"); 1258 ret = -ENOMEM; 1259 goto err; 1260 } 1261 1262 i915_gem_object_set_cache_level(obj, I915_CACHE_LLC); 1263 1264 ret = i915_gem_obj_ggtt_pin(obj, 4096, true, false); 1265 if (ret != 0) { 1266 goto err_unref; 1267 } 1268 1269 ring->status_page.gfx_addr = i915_gem_obj_ggtt_offset(obj); 1270 ring->status_page.page_addr = kmap(obj->pages[0]); 1271 if (ring->status_page.page_addr == NULL) { 1272 ret = -ENOMEM; 1273 goto err_unpin; 1274 } 1275 ring->status_page.obj = obj; 1276 memset(ring->status_page.page_addr, 0, PAGE_SIZE); 1277 1278 DRM_DEBUG_DRIVER("%s hws offset: 0x%08x\n", 1279 ring->name, ring->status_page.gfx_addr); 1280 1281 return 0; 1282 1283 err_unpin: 1284 i915_gem_object_unpin(obj); 1285 err_unref: 1286 drm_gem_object_unreference(&obj->base); 1287 err: 1288 return ret; 1289 } 1290 1291 static int init_phys_status_page(struct intel_ring_buffer *ring) 1292 { 1293 struct drm_i915_private *dev_priv = ring->dev->dev_private; 1294 1295 if (!dev_priv->status_page_dmah) { 1296 dev_priv->status_page_dmah = 1297 drm_pci_alloc(ring->dev, PAGE_SIZE, PAGE_SIZE); 1298 if (!dev_priv->status_page_dmah) 1299 return -ENOMEM; 1300 } 1301 1302 ring->status_page.page_addr = dev_priv->status_page_dmah->vaddr; 1303 memset(ring->status_page.page_addr, 0, PAGE_SIZE); 1304 1305 return 0; 1306 } 1307 1308 static int intel_init_ring_buffer(struct drm_device *dev, 1309 struct intel_ring_buffer *ring) 1310 { 1311 struct drm_i915_gem_object *obj; 1312 int ret; 1313 1314 ring->dev = dev; 1315 INIT_LIST_HEAD(&ring->active_list); 1316 INIT_LIST_HEAD(&ring->request_list); 1317 ring->size = 32 * PAGE_SIZE; 1318 memset(ring->sync_seqno, 0, sizeof(ring->sync_seqno)); 1319 1320 init_waitqueue_head(&ring->irq_queue); 1321 1322 if (I915_NEED_GFX_HWS(dev)) { 1323 ret = init_status_page(ring); 1324 if (ret) 1325 return ret; 1326 } else { 1327 BUG_ON(ring->id != RCS); 1328 ret = init_phys_status_page(ring); 1329 if (ret) 1330 return ret; 1331 } 1332 1333 obj = NULL; 1334 if (!HAS_LLC(dev)) 1335 obj = i915_gem_object_create_stolen(dev, ring->size); 1336 if (obj == NULL) 1337 obj = i915_gem_alloc_object(dev, ring->size); 1338 if (obj == NULL) { 1339 DRM_ERROR("Failed to allocate ringbuffer\n"); 1340 ret = -ENOMEM; 1341 goto err_hws; 1342 } 1343 1344 ring->obj = obj; 1345 1346 ret = i915_gem_obj_ggtt_pin(obj, PAGE_SIZE, true, false); 1347 if (ret) 1348 goto err_unref; 1349 1350 ret = i915_gem_object_set_to_gtt_domain(obj, true); 1351 if (ret) 1352 goto err_unpin; 1353 1354 ring->virtual_start = 1355 ioremap_wc(dev->agp->base + i915_gem_obj_ggtt_offset(obj), 1356 ring->size); 1357 if (ring->virtual_start == NULL) { 1358 DRM_ERROR("Failed to map ringbuffer.\n"); 1359 ret = -EINVAL; 1360 goto err_unpin; 1361 } 1362 1363 ret = ring->init(ring); 1364 if (ret) 1365 goto err_unmap; 1366 1367 /* Workaround an erratum on the i830 which causes a hang if 1368 * the TAIL pointer points to within the last 2 cachelines 1369 * of the buffer. 1370 */ 1371 ring->effective_size = ring->size; 1372 if (IS_I830(ring->dev) || IS_845G(ring->dev)) 1373 ring->effective_size -= 128; 1374 1375 return 0; 1376 1377 err_unmap: 1378 pmap_unmapdev((vm_offset_t)ring->virtual_start, ring->size); 1379 err_unpin: 1380 i915_gem_object_unpin(obj); 1381 err_unref: 1382 drm_gem_object_unreference(&obj->base); 1383 ring->obj = NULL; 1384 err_hws: 1385 cleanup_status_page(ring); 1386 return ret; 1387 } 1388 1389 void intel_cleanup_ring_buffer(struct intel_ring_buffer *ring) 1390 { 1391 struct drm_i915_private *dev_priv; 1392 int ret; 1393 1394 if (ring->obj == NULL) 1395 return; 1396 1397 /* Disable the ring buffer. The ring must be idle at this point */ 1398 dev_priv = ring->dev->dev_private; 1399 ret = intel_ring_idle(ring); 1400 if (ret && !i915_reset_in_progress(&dev_priv->gpu_error)) 1401 DRM_ERROR("failed to quiesce %s whilst cleaning up: %d\n", 1402 ring->name, ret); 1403 1404 I915_WRITE_CTL(ring, 0); 1405 1406 pmap_unmapdev((vm_offset_t)ring->virtual_start, ring->size); 1407 1408 i915_gem_object_unpin(ring->obj); 1409 drm_gem_object_unreference(&ring->obj->base); 1410 ring->obj = NULL; 1411 ring->preallocated_lazy_request = NULL; 1412 ring->outstanding_lazy_seqno = 0; 1413 1414 if (ring->cleanup) 1415 ring->cleanup(ring); 1416 1417 cleanup_status_page(ring); 1418 } 1419 1420 static int intel_ring_wait_seqno(struct intel_ring_buffer *ring, u32 seqno) 1421 { 1422 int ret; 1423 1424 ret = i915_wait_seqno(ring, seqno); 1425 if (!ret) 1426 i915_gem_retire_requests_ring(ring); 1427 1428 return ret; 1429 } 1430 1431 static int intel_ring_wait_request(struct intel_ring_buffer *ring, int n) 1432 { 1433 struct drm_i915_gem_request *request; 1434 u32 seqno = 0; 1435 int ret; 1436 1437 i915_gem_retire_requests_ring(ring); 1438 1439 if (ring->last_retired_head != -1) { 1440 ring->head = ring->last_retired_head; 1441 ring->last_retired_head = -1; 1442 ring->space = ring_space(ring); 1443 if (ring->space >= n) 1444 return 0; 1445 } 1446 1447 list_for_each_entry(request, &ring->request_list, list) { 1448 int space; 1449 1450 if (request->tail == -1) 1451 continue; 1452 1453 space = request->tail - (ring->tail + I915_RING_FREE_SPACE); 1454 if (space < 0) 1455 space += ring->size; 1456 if (space >= n) { 1457 seqno = request->seqno; 1458 break; 1459 } 1460 1461 /* Consume this request in case we need more space than 1462 * is available and so need to prevent a race between 1463 * updating last_retired_head and direct reads of 1464 * I915_RING_HEAD. It also provides a nice sanity check. 1465 */ 1466 request->tail = -1; 1467 } 1468 1469 if (seqno == 0) 1470 return -ENOSPC; 1471 1472 ret = intel_ring_wait_seqno(ring, seqno); 1473 if (ret) 1474 return ret; 1475 1476 if (WARN_ON(ring->last_retired_head == -1)) 1477 return -ENOSPC; 1478 1479 ring->head = ring->last_retired_head; 1480 ring->last_retired_head = -1; 1481 ring->space = ring_space(ring); 1482 if (WARN_ON(ring->space < n)) 1483 return -ENOSPC; 1484 1485 return 0; 1486 } 1487 1488 static int ring_wait_for_space(struct intel_ring_buffer *ring, int n) 1489 { 1490 struct drm_device *dev = ring->dev; 1491 struct drm_i915_private *dev_priv = dev->dev_private; 1492 unsigned long end; 1493 int ret; 1494 1495 ret = intel_ring_wait_request(ring, n); 1496 if (ret != -ENOSPC) 1497 return ret; 1498 1499 /* force the tail write in case we have been skipping them */ 1500 __intel_ring_advance(ring); 1501 1502 trace_i915_ring_wait_begin(ring); 1503 /* With GEM the hangcheck timer should kick us out of the loop, 1504 * leaving it early runs the risk of corrupting GEM state (due 1505 * to running on almost untested codepaths). But on resume 1506 * timers don't work yet, so prevent a complete hang in that 1507 * case by choosing an insanely large timeout. */ 1508 end = jiffies + 60 * HZ; 1509 1510 do { 1511 ring->head = I915_READ_HEAD(ring); 1512 ring->space = ring_space(ring); 1513 if (ring->space >= n) { 1514 trace_i915_ring_wait_end(ring); 1515 return 0; 1516 } 1517 1518 #if 0 1519 if (dev->primary->master) { 1520 struct drm_i915_master_private *master_priv = dev->primary->master->driver_priv; 1521 if (master_priv->sarea_priv) 1522 master_priv->sarea_priv->perf_boxes |= I915_BOX_WAIT; 1523 } 1524 #else 1525 if (dev_priv->sarea_priv) 1526 dev_priv->sarea_priv->perf_boxes |= I915_BOX_WAIT; 1527 #endif 1528 1529 msleep(1); 1530 1531 ret = i915_gem_check_wedge(&dev_priv->gpu_error, 1532 dev_priv->mm.interruptible); 1533 if (ret) 1534 return ret; 1535 } while (!time_after(jiffies, end)); 1536 trace_i915_ring_wait_end(ring); 1537 return -EBUSY; 1538 } 1539 1540 static int intel_wrap_ring_buffer(struct intel_ring_buffer *ring) 1541 { 1542 uint32_t __iomem *virt; 1543 int rem = ring->size - ring->tail; 1544 1545 if (ring->space < rem) { 1546 int ret = ring_wait_for_space(ring, rem); 1547 if (ret) 1548 return ret; 1549 } 1550 1551 virt = (unsigned int *)((char *)ring->virtual_start + ring->tail); 1552 rem /= 4; 1553 while (rem--) 1554 iowrite32(MI_NOOP, virt++); 1555 1556 ring->tail = 0; 1557 ring->space = ring_space(ring); 1558 1559 return 0; 1560 } 1561 1562 int intel_ring_idle(struct intel_ring_buffer *ring) 1563 { 1564 u32 seqno; 1565 int ret; 1566 1567 /* We need to add any requests required to flush the objects and ring */ 1568 if (ring->outstanding_lazy_seqno) { 1569 ret = i915_add_request(ring, NULL); 1570 if (ret) 1571 return ret; 1572 } 1573 1574 /* Wait upon the last request to be completed */ 1575 if (list_empty(&ring->request_list)) 1576 return 0; 1577 1578 seqno = list_entry(ring->request_list.prev, 1579 struct drm_i915_gem_request, 1580 list)->seqno; 1581 1582 return i915_wait_seqno(ring, seqno); 1583 } 1584 1585 static int 1586 intel_ring_alloc_seqno(struct intel_ring_buffer *ring) 1587 { 1588 if (ring->outstanding_lazy_seqno) 1589 return 0; 1590 1591 if (ring->preallocated_lazy_request == NULL) { 1592 struct drm_i915_gem_request *request; 1593 1594 request = kmalloc(sizeof(*request), M_DRM, M_WAITOK); 1595 if (request == NULL) 1596 return -ENOMEM; 1597 1598 ring->preallocated_lazy_request = request; 1599 } 1600 1601 return i915_gem_get_seqno(ring->dev, &ring->outstanding_lazy_seqno); 1602 } 1603 1604 static int __intel_ring_prepare(struct intel_ring_buffer *ring, 1605 int bytes) 1606 { 1607 int ret; 1608 1609 if (unlikely(ring->tail + bytes > ring->effective_size)) { 1610 ret = intel_wrap_ring_buffer(ring); 1611 if (unlikely(ret)) 1612 return ret; 1613 } 1614 1615 if (unlikely(ring->space < bytes)) { 1616 ret = ring_wait_for_space(ring, bytes); 1617 if (unlikely(ret)) 1618 return ret; 1619 } 1620 1621 return 0; 1622 } 1623 1624 int intel_ring_begin(struct intel_ring_buffer *ring, 1625 int num_dwords) 1626 { 1627 drm_i915_private_t *dev_priv = ring->dev->dev_private; 1628 int ret; 1629 1630 ret = i915_gem_check_wedge(&dev_priv->gpu_error, 1631 dev_priv->mm.interruptible); 1632 if (ret) 1633 return ret; 1634 1635 ret = __intel_ring_prepare(ring, num_dwords * sizeof(uint32_t)); 1636 if (ret) 1637 return ret; 1638 1639 /* Preallocate the olr before touching the ring */ 1640 ret = intel_ring_alloc_seqno(ring); 1641 if (ret) 1642 return ret; 1643 1644 ring->space -= num_dwords * sizeof(uint32_t); 1645 return 0; 1646 } 1647 1648 /* Align the ring tail to a cacheline boundary */ 1649 int intel_ring_cacheline_align(struct intel_ring_buffer *ring) 1650 { 1651 int num_dwords = (64 - (ring->tail & 63)) / sizeof(uint32_t); 1652 int ret; 1653 1654 if (num_dwords == 0) 1655 return 0; 1656 1657 ret = intel_ring_begin(ring, num_dwords); 1658 if (ret) 1659 return ret; 1660 1661 while (num_dwords--) 1662 intel_ring_emit(ring, MI_NOOP); 1663 1664 intel_ring_advance(ring); 1665 1666 return 0; 1667 } 1668 1669 void intel_ring_init_seqno(struct intel_ring_buffer *ring, u32 seqno) 1670 { 1671 struct drm_i915_private *dev_priv = ring->dev->dev_private; 1672 1673 BUG_ON(ring->outstanding_lazy_seqno); 1674 1675 if (INTEL_INFO(ring->dev)->gen >= 6) { 1676 I915_WRITE(RING_SYNC_0(ring->mmio_base), 0); 1677 I915_WRITE(RING_SYNC_1(ring->mmio_base), 0); 1678 if (HAS_VEBOX(ring->dev)) 1679 I915_WRITE(RING_SYNC_2(ring->mmio_base), 0); 1680 } 1681 1682 ring->set_seqno(ring, seqno); 1683 ring->hangcheck.seqno = seqno; 1684 } 1685 1686 static void gen6_bsd_ring_write_tail(struct intel_ring_buffer *ring, 1687 u32 value) 1688 { 1689 drm_i915_private_t *dev_priv = ring->dev->dev_private; 1690 1691 /* Every tail move must follow the sequence below */ 1692 1693 /* Disable notification that the ring is IDLE. The GT 1694 * will then assume that it is busy and bring it out of rc6. 1695 */ 1696 I915_WRITE(GEN6_BSD_SLEEP_PSMI_CONTROL, 1697 _MASKED_BIT_ENABLE(GEN6_BSD_SLEEP_MSG_DISABLE)); 1698 1699 /* Clear the context id. Here be magic! */ 1700 I915_WRITE64(GEN6_BSD_RNCID, 0x0); 1701 1702 /* Wait for the ring not to be idle, i.e. for it to wake up. */ 1703 if (wait_for((I915_READ(GEN6_BSD_SLEEP_PSMI_CONTROL) & 1704 GEN6_BSD_SLEEP_INDICATOR) == 0, 1705 50)) 1706 DRM_ERROR("timed out waiting for the BSD ring to wake up\n"); 1707 1708 /* Now that the ring is fully powered up, update the tail */ 1709 I915_WRITE_TAIL(ring, value); 1710 POSTING_READ(RING_TAIL(ring->mmio_base)); 1711 1712 /* Let the ring send IDLE messages to the GT again, 1713 * and so let it sleep to conserve power when idle. 1714 */ 1715 I915_WRITE(GEN6_BSD_SLEEP_PSMI_CONTROL, 1716 _MASKED_BIT_DISABLE(GEN6_BSD_SLEEP_MSG_DISABLE)); 1717 } 1718 1719 static int gen6_bsd_ring_flush(struct intel_ring_buffer *ring, 1720 u32 invalidate, u32 flush) 1721 { 1722 uint32_t cmd; 1723 int ret; 1724 1725 ret = intel_ring_begin(ring, 4); 1726 if (ret) 1727 return ret; 1728 1729 cmd = MI_FLUSH_DW; 1730 if (INTEL_INFO(ring->dev)->gen >= 8) 1731 cmd += 1; 1732 /* 1733 * Bspec vol 1c.5 - video engine command streamer: 1734 * "If ENABLED, all TLBs will be invalidated once the flush 1735 * operation is complete. This bit is only valid when the 1736 * Post-Sync Operation field is a value of 1h or 3h." 1737 */ 1738 if (invalidate & I915_GEM_GPU_DOMAINS) 1739 cmd |= MI_INVALIDATE_TLB | MI_INVALIDATE_BSD | 1740 MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW; 1741 intel_ring_emit(ring, cmd); 1742 intel_ring_emit(ring, I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT); 1743 if (INTEL_INFO(ring->dev)->gen >= 8) { 1744 intel_ring_emit(ring, 0); /* upper addr */ 1745 intel_ring_emit(ring, 0); /* value */ 1746 } else { 1747 intel_ring_emit(ring, 0); 1748 intel_ring_emit(ring, MI_NOOP); 1749 } 1750 intel_ring_advance(ring); 1751 return 0; 1752 } 1753 1754 static int 1755 gen8_ring_dispatch_execbuffer(struct intel_ring_buffer *ring, 1756 u32 offset, u32 len, 1757 unsigned flags) 1758 { 1759 struct drm_i915_private *dev_priv = ring->dev->dev_private; 1760 bool ppgtt = dev_priv->mm.aliasing_ppgtt != NULL && 1761 !(flags & I915_DISPATCH_SECURE); 1762 int ret; 1763 1764 ret = intel_ring_begin(ring, 4); 1765 if (ret) 1766 return ret; 1767 1768 /* FIXME(BDW): Address space and security selectors. */ 1769 intel_ring_emit(ring, MI_BATCH_BUFFER_START_GEN8 | (ppgtt<<8)); 1770 intel_ring_emit(ring, offset); 1771 intel_ring_emit(ring, 0); 1772 intel_ring_emit(ring, MI_NOOP); 1773 intel_ring_advance(ring); 1774 1775 return 0; 1776 } 1777 1778 static int 1779 hsw_ring_dispatch_execbuffer(struct intel_ring_buffer *ring, 1780 u32 offset, u32 len, 1781 unsigned flags) 1782 { 1783 int ret; 1784 1785 ret = intel_ring_begin(ring, 2); 1786 if (ret) 1787 return ret; 1788 1789 intel_ring_emit(ring, 1790 MI_BATCH_BUFFER_START | MI_BATCH_PPGTT_HSW | 1791 (flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE_HSW)); 1792 /* bit0-7 is the length on GEN6+ */ 1793 intel_ring_emit(ring, offset); 1794 intel_ring_advance(ring); 1795 1796 return 0; 1797 } 1798 1799 static int 1800 gen6_ring_dispatch_execbuffer(struct intel_ring_buffer *ring, 1801 u32 offset, u32 len, 1802 unsigned flags) 1803 { 1804 int ret; 1805 1806 ret = intel_ring_begin(ring, 2); 1807 if (ret) 1808 return ret; 1809 1810 intel_ring_emit(ring, 1811 MI_BATCH_BUFFER_START | 1812 (flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE_I965)); 1813 /* bit0-7 is the length on GEN6+ */ 1814 intel_ring_emit(ring, offset); 1815 intel_ring_advance(ring); 1816 1817 return 0; 1818 } 1819 1820 /* Blitter support (SandyBridge+) */ 1821 1822 static int gen6_ring_flush(struct intel_ring_buffer *ring, 1823 u32 invalidate, u32 flush) 1824 { 1825 struct drm_device *dev = ring->dev; 1826 uint32_t cmd; 1827 int ret; 1828 1829 ret = intel_ring_begin(ring, 4); 1830 if (ret) 1831 return ret; 1832 1833 cmd = MI_FLUSH_DW; 1834 if (INTEL_INFO(ring->dev)->gen >= 8) 1835 cmd += 1; 1836 /* 1837 * Bspec vol 1c.3 - blitter engine command streamer: 1838 * "If ENABLED, all TLBs will be invalidated once the flush 1839 * operation is complete. This bit is only valid when the 1840 * Post-Sync Operation field is a value of 1h or 3h." 1841 */ 1842 if (invalidate & I915_GEM_DOMAIN_RENDER) 1843 cmd |= MI_INVALIDATE_TLB | MI_FLUSH_DW_STORE_INDEX | 1844 MI_FLUSH_DW_OP_STOREDW; 1845 intel_ring_emit(ring, cmd); 1846 intel_ring_emit(ring, I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT); 1847 if (INTEL_INFO(ring->dev)->gen >= 8) { 1848 intel_ring_emit(ring, 0); /* upper addr */ 1849 intel_ring_emit(ring, 0); /* value */ 1850 } else { 1851 intel_ring_emit(ring, 0); 1852 intel_ring_emit(ring, MI_NOOP); 1853 } 1854 intel_ring_advance(ring); 1855 1856 if (IS_GEN7(dev) && !invalidate && flush) 1857 return gen7_ring_fbc_flush(ring, FBC_REND_CACHE_CLEAN); 1858 1859 return 0; 1860 } 1861 1862 int intel_init_render_ring_buffer(struct drm_device *dev) 1863 { 1864 drm_i915_private_t *dev_priv = dev->dev_private; 1865 struct intel_ring_buffer *ring = &dev_priv->ring[RCS]; 1866 1867 ring->name = "render ring"; 1868 ring->id = RCS; 1869 ring->mmio_base = RENDER_RING_BASE; 1870 1871 if (INTEL_INFO(dev)->gen >= 6) { 1872 ring->add_request = gen6_add_request; 1873 ring->flush = gen7_render_ring_flush; 1874 if (INTEL_INFO(dev)->gen == 6) 1875 ring->flush = gen6_render_ring_flush; 1876 if (INTEL_INFO(dev)->gen >= 8) { 1877 ring->flush = gen8_render_ring_flush; 1878 ring->irq_get = gen8_ring_get_irq; 1879 ring->irq_put = gen8_ring_put_irq; 1880 } else { 1881 ring->irq_get = gen6_ring_get_irq; 1882 ring->irq_put = gen6_ring_put_irq; 1883 } 1884 ring->irq_enable_mask = GT_RENDER_USER_INTERRUPT; 1885 ring->get_seqno = gen6_ring_get_seqno; 1886 ring->set_seqno = ring_set_seqno; 1887 ring->sync_to = gen6_ring_sync; 1888 ring->semaphore_register[RCS] = MI_SEMAPHORE_SYNC_INVALID; 1889 ring->semaphore_register[VCS] = MI_SEMAPHORE_SYNC_RV; 1890 ring->semaphore_register[BCS] = MI_SEMAPHORE_SYNC_RB; 1891 ring->semaphore_register[VECS] = MI_SEMAPHORE_SYNC_RVE; 1892 ring->signal_mbox[RCS] = GEN6_NOSYNC; 1893 ring->signal_mbox[VCS] = GEN6_VRSYNC; 1894 ring->signal_mbox[BCS] = GEN6_BRSYNC; 1895 ring->signal_mbox[VECS] = GEN6_VERSYNC; 1896 } else if (IS_GEN5(dev)) { 1897 ring->add_request = pc_render_add_request; 1898 ring->flush = gen4_render_ring_flush; 1899 ring->get_seqno = pc_render_get_seqno; 1900 ring->set_seqno = pc_render_set_seqno; 1901 ring->irq_get = gen5_ring_get_irq; 1902 ring->irq_put = gen5_ring_put_irq; 1903 ring->irq_enable_mask = GT_RENDER_USER_INTERRUPT | 1904 GT_RENDER_PIPECTL_NOTIFY_INTERRUPT; 1905 } else { 1906 ring->add_request = i9xx_add_request; 1907 if (INTEL_INFO(dev)->gen < 4) 1908 ring->flush = gen2_render_ring_flush; 1909 else 1910 ring->flush = gen4_render_ring_flush; 1911 ring->get_seqno = ring_get_seqno; 1912 ring->set_seqno = ring_set_seqno; 1913 if (IS_GEN2(dev)) { 1914 ring->irq_get = i8xx_ring_get_irq; 1915 ring->irq_put = i8xx_ring_put_irq; 1916 } else { 1917 ring->irq_get = i9xx_ring_get_irq; 1918 ring->irq_put = i9xx_ring_put_irq; 1919 } 1920 ring->irq_enable_mask = I915_USER_INTERRUPT; 1921 } 1922 ring->write_tail = ring_write_tail; 1923 if (IS_HASWELL(dev)) 1924 ring->dispatch_execbuffer = hsw_ring_dispatch_execbuffer; 1925 else if (IS_GEN8(dev)) 1926 ring->dispatch_execbuffer = gen8_ring_dispatch_execbuffer; 1927 else if (INTEL_INFO(dev)->gen >= 6) 1928 ring->dispatch_execbuffer = gen6_ring_dispatch_execbuffer; 1929 else if (INTEL_INFO(dev)->gen >= 4) 1930 ring->dispatch_execbuffer = i965_dispatch_execbuffer; 1931 else if (IS_I830(dev) || IS_845G(dev)) 1932 ring->dispatch_execbuffer = i830_dispatch_execbuffer; 1933 else 1934 ring->dispatch_execbuffer = i915_dispatch_execbuffer; 1935 ring->init = init_render_ring; 1936 ring->cleanup = render_ring_cleanup; 1937 1938 /* Workaround batchbuffer to combat CS tlb bug. */ 1939 if (HAS_BROKEN_CS_TLB(dev)) { 1940 struct drm_i915_gem_object *obj; 1941 int ret; 1942 1943 obj = i915_gem_alloc_object(dev, I830_BATCH_LIMIT); 1944 if (obj == NULL) { 1945 DRM_ERROR("Failed to allocate batch bo\n"); 1946 return -ENOMEM; 1947 } 1948 1949 ret = i915_gem_obj_ggtt_pin(obj, 0, true, false); 1950 if (ret != 0) { 1951 drm_gem_object_unreference(&obj->base); 1952 DRM_ERROR("Failed to ping batch bo\n"); 1953 return ret; 1954 } 1955 1956 ring->scratch.obj = obj; 1957 ring->scratch.gtt_offset = i915_gem_obj_ggtt_offset(obj); 1958 } 1959 1960 return intel_init_ring_buffer(dev, ring); 1961 } 1962 1963 int intel_render_ring_init_dri(struct drm_device *dev, u64 start, u32 size) 1964 { 1965 drm_i915_private_t *dev_priv = dev->dev_private; 1966 struct intel_ring_buffer *ring = &dev_priv->ring[RCS]; 1967 int ret; 1968 1969 ring->name = "render ring"; 1970 ring->id = RCS; 1971 ring->mmio_base = RENDER_RING_BASE; 1972 1973 if (INTEL_INFO(dev)->gen >= 6) { 1974 /* non-kms not supported on gen6+ */ 1975 return -ENODEV; 1976 } 1977 1978 /* Note: gem is not supported on gen5/ilk without kms (the corresponding 1979 * gem_init ioctl returns with -ENODEV). Hence we do not need to set up 1980 * the special gen5 functions. */ 1981 ring->add_request = i9xx_add_request; 1982 if (INTEL_INFO(dev)->gen < 4) 1983 ring->flush = gen2_render_ring_flush; 1984 else 1985 ring->flush = gen4_render_ring_flush; 1986 ring->get_seqno = ring_get_seqno; 1987 ring->set_seqno = ring_set_seqno; 1988 if (IS_GEN2(dev)) { 1989 ring->irq_get = i8xx_ring_get_irq; 1990 ring->irq_put = i8xx_ring_put_irq; 1991 } else { 1992 ring->irq_get = i9xx_ring_get_irq; 1993 ring->irq_put = i9xx_ring_put_irq; 1994 } 1995 ring->irq_enable_mask = I915_USER_INTERRUPT; 1996 ring->write_tail = ring_write_tail; 1997 if (INTEL_INFO(dev)->gen >= 4) 1998 ring->dispatch_execbuffer = i965_dispatch_execbuffer; 1999 else if (IS_I830(dev) || IS_845G(dev)) 2000 ring->dispatch_execbuffer = i830_dispatch_execbuffer; 2001 else 2002 ring->dispatch_execbuffer = i915_dispatch_execbuffer; 2003 ring->init = init_render_ring; 2004 ring->cleanup = render_ring_cleanup; 2005 2006 ring->dev = dev; 2007 INIT_LIST_HEAD(&ring->active_list); 2008 INIT_LIST_HEAD(&ring->request_list); 2009 2010 ring->size = size; 2011 ring->effective_size = ring->size; 2012 if (IS_I830(ring->dev) || IS_845G(ring->dev)) 2013 ring->effective_size -= 128; 2014 2015 ring->virtual_start = ioremap_wc(start, size); 2016 if (ring->virtual_start == NULL) { 2017 DRM_ERROR("can not ioremap virtual address for" 2018 " ring buffer\n"); 2019 return -ENOMEM; 2020 } 2021 2022 if (!I915_NEED_GFX_HWS(dev)) { 2023 ret = init_phys_status_page(ring); 2024 if (ret) 2025 return ret; 2026 } 2027 2028 return 0; 2029 } 2030 2031 int intel_init_bsd_ring_buffer(struct drm_device *dev) 2032 { 2033 drm_i915_private_t *dev_priv = dev->dev_private; 2034 struct intel_ring_buffer *ring = &dev_priv->ring[VCS]; 2035 2036 ring->name = "bsd ring"; 2037 ring->id = VCS; 2038 2039 ring->write_tail = ring_write_tail; 2040 if (INTEL_INFO(dev)->gen >= 6) { 2041 ring->mmio_base = GEN6_BSD_RING_BASE; 2042 /* gen6 bsd needs a special wa for tail updates */ 2043 if (IS_GEN6(dev)) 2044 ring->write_tail = gen6_bsd_ring_write_tail; 2045 ring->flush = gen6_bsd_ring_flush; 2046 ring->add_request = gen6_add_request; 2047 ring->get_seqno = gen6_ring_get_seqno; 2048 ring->set_seqno = ring_set_seqno; 2049 if (INTEL_INFO(dev)->gen >= 8) { 2050 ring->irq_enable_mask = 2051 GT_RENDER_USER_INTERRUPT << GEN8_VCS1_IRQ_SHIFT; 2052 ring->irq_get = gen8_ring_get_irq; 2053 ring->irq_put = gen8_ring_put_irq; 2054 ring->dispatch_execbuffer = 2055 gen8_ring_dispatch_execbuffer; 2056 } else { 2057 ring->irq_enable_mask = GT_BSD_USER_INTERRUPT; 2058 ring->irq_get = gen6_ring_get_irq; 2059 ring->irq_put = gen6_ring_put_irq; 2060 ring->dispatch_execbuffer = 2061 gen6_ring_dispatch_execbuffer; 2062 } 2063 ring->sync_to = gen6_ring_sync; 2064 ring->semaphore_register[RCS] = MI_SEMAPHORE_SYNC_VR; 2065 ring->semaphore_register[VCS] = MI_SEMAPHORE_SYNC_INVALID; 2066 ring->semaphore_register[BCS] = MI_SEMAPHORE_SYNC_VB; 2067 ring->semaphore_register[VECS] = MI_SEMAPHORE_SYNC_VVE; 2068 ring->signal_mbox[RCS] = GEN6_RVSYNC; 2069 ring->signal_mbox[VCS] = GEN6_NOSYNC; 2070 ring->signal_mbox[BCS] = GEN6_BVSYNC; 2071 ring->signal_mbox[VECS] = GEN6_VEVSYNC; 2072 } else { 2073 ring->mmio_base = BSD_RING_BASE; 2074 ring->flush = bsd_ring_flush; 2075 ring->add_request = i9xx_add_request; 2076 ring->get_seqno = ring_get_seqno; 2077 ring->set_seqno = ring_set_seqno; 2078 if (IS_GEN5(dev)) { 2079 ring->irq_enable_mask = ILK_BSD_USER_INTERRUPT; 2080 ring->irq_get = gen5_ring_get_irq; 2081 ring->irq_put = gen5_ring_put_irq; 2082 } else { 2083 ring->irq_enable_mask = I915_BSD_USER_INTERRUPT; 2084 ring->irq_get = i9xx_ring_get_irq; 2085 ring->irq_put = i9xx_ring_put_irq; 2086 } 2087 ring->dispatch_execbuffer = i965_dispatch_execbuffer; 2088 } 2089 ring->init = init_ring_common; 2090 2091 return intel_init_ring_buffer(dev, ring); 2092 } 2093 2094 int intel_init_blt_ring_buffer(struct drm_device *dev) 2095 { 2096 drm_i915_private_t *dev_priv = dev->dev_private; 2097 struct intel_ring_buffer *ring = &dev_priv->ring[BCS]; 2098 2099 ring->name = "blitter ring"; 2100 ring->id = BCS; 2101 2102 ring->mmio_base = BLT_RING_BASE; 2103 ring->write_tail = ring_write_tail; 2104 ring->flush = gen6_ring_flush; 2105 ring->add_request = gen6_add_request; 2106 ring->get_seqno = gen6_ring_get_seqno; 2107 ring->set_seqno = ring_set_seqno; 2108 if (INTEL_INFO(dev)->gen >= 8) { 2109 ring->irq_enable_mask = 2110 GT_RENDER_USER_INTERRUPT << GEN8_BCS_IRQ_SHIFT; 2111 ring->irq_get = gen8_ring_get_irq; 2112 ring->irq_put = gen8_ring_put_irq; 2113 ring->dispatch_execbuffer = gen8_ring_dispatch_execbuffer; 2114 } else { 2115 ring->irq_enable_mask = GT_BLT_USER_INTERRUPT; 2116 ring->irq_get = gen6_ring_get_irq; 2117 ring->irq_put = gen6_ring_put_irq; 2118 ring->dispatch_execbuffer = gen6_ring_dispatch_execbuffer; 2119 } 2120 ring->sync_to = gen6_ring_sync; 2121 ring->semaphore_register[RCS] = MI_SEMAPHORE_SYNC_BR; 2122 ring->semaphore_register[VCS] = MI_SEMAPHORE_SYNC_BV; 2123 ring->semaphore_register[BCS] = MI_SEMAPHORE_SYNC_INVALID; 2124 ring->semaphore_register[VECS] = MI_SEMAPHORE_SYNC_BVE; 2125 ring->signal_mbox[RCS] = GEN6_RBSYNC; 2126 ring->signal_mbox[VCS] = GEN6_VBSYNC; 2127 ring->signal_mbox[BCS] = GEN6_NOSYNC; 2128 ring->signal_mbox[VECS] = GEN6_VEBSYNC; 2129 ring->init = init_ring_common; 2130 2131 return intel_init_ring_buffer(dev, ring); 2132 } 2133 2134 int intel_init_vebox_ring_buffer(struct drm_device *dev) 2135 { 2136 drm_i915_private_t *dev_priv = dev->dev_private; 2137 struct intel_ring_buffer *ring = &dev_priv->ring[VECS]; 2138 2139 ring->name = "video enhancement ring"; 2140 ring->id = VECS; 2141 2142 ring->mmio_base = VEBOX_RING_BASE; 2143 ring->write_tail = ring_write_tail; 2144 ring->flush = gen6_ring_flush; 2145 ring->add_request = gen6_add_request; 2146 ring->get_seqno = gen6_ring_get_seqno; 2147 ring->set_seqno = ring_set_seqno; 2148 2149 if (INTEL_INFO(dev)->gen >= 8) { 2150 ring->irq_enable_mask = 2151 GT_RENDER_USER_INTERRUPT << GEN8_VECS_IRQ_SHIFT; 2152 ring->irq_get = gen8_ring_get_irq; 2153 ring->irq_put = gen8_ring_put_irq; 2154 ring->dispatch_execbuffer = gen8_ring_dispatch_execbuffer; 2155 } else { 2156 ring->irq_enable_mask = PM_VEBOX_USER_INTERRUPT; 2157 ring->irq_get = hsw_vebox_get_irq; 2158 ring->irq_put = hsw_vebox_put_irq; 2159 ring->dispatch_execbuffer = gen6_ring_dispatch_execbuffer; 2160 } 2161 ring->sync_to = gen6_ring_sync; 2162 ring->semaphore_register[RCS] = MI_SEMAPHORE_SYNC_VER; 2163 ring->semaphore_register[VCS] = MI_SEMAPHORE_SYNC_VEV; 2164 ring->semaphore_register[BCS] = MI_SEMAPHORE_SYNC_VEB; 2165 ring->semaphore_register[VECS] = MI_SEMAPHORE_SYNC_INVALID; 2166 ring->signal_mbox[RCS] = GEN6_RVESYNC; 2167 ring->signal_mbox[VCS] = GEN6_VVESYNC; 2168 ring->signal_mbox[BCS] = GEN6_BVESYNC; 2169 ring->signal_mbox[VECS] = GEN6_NOSYNC; 2170 ring->init = init_ring_common; 2171 2172 return intel_init_ring_buffer(dev, ring); 2173 } 2174 2175 int 2176 intel_ring_flush_all_caches(struct intel_ring_buffer *ring) 2177 { 2178 int ret; 2179 2180 if (!ring->gpu_caches_dirty) 2181 return 0; 2182 2183 ret = ring->flush(ring, 0, I915_GEM_GPU_DOMAINS); 2184 if (ret) 2185 return ret; 2186 2187 trace_i915_gem_ring_flush(ring, 0, I915_GEM_GPU_DOMAINS); 2188 2189 ring->gpu_caches_dirty = false; 2190 return 0; 2191 } 2192 2193 int 2194 intel_ring_invalidate_all_caches(struct intel_ring_buffer *ring) 2195 { 2196 uint32_t flush_domains; 2197 int ret; 2198 2199 flush_domains = 0; 2200 if (ring->gpu_caches_dirty) 2201 flush_domains = I915_GEM_GPU_DOMAINS; 2202 2203 ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, flush_domains); 2204 if (ret) 2205 return ret; 2206 2207 trace_i915_gem_ring_flush(ring, I915_GEM_GPU_DOMAINS, flush_domains); 2208 2209 ring->gpu_caches_dirty = false; 2210 return 0; 2211 } 2212