1 /* 2 * Copyright © 2008-2010 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Eric Anholt <eric@anholt.net> 25 * Zou Nan hai <nanhai.zou@intel.com> 26 * Xiang Hai hao<haihao.xiang@intel.com> 27 * 28 * $FreeBSD: head/sys/dev/drm2/i915/intel_ringbuffer.c 253709 2013-07-27 16:42:29Z kib $ 29 */ 30 31 #include <drm/drmP.h> 32 #include <drm/i915_drm.h> 33 #include "i915_drv.h" 34 #include "intel_drv.h" 35 #include "intel_ringbuffer.h" 36 #include <sys/sched.h> 37 38 /* 39 * 965+ support PIPE_CONTROL commands, which provide finer grained control 40 * over cache flushing. 41 */ 42 struct pipe_control { 43 struct drm_i915_gem_object *obj; 44 volatile u32 *cpu_page; 45 u32 gtt_offset; 46 }; 47 48 static inline int ring_space(struct intel_ring_buffer *ring) 49 { 50 int space = (ring->head & HEAD_ADDR) - (ring->tail + I915_RING_FREE_SPACE); 51 if (space < 0) 52 space += ring->size; 53 return space; 54 } 55 56 static int 57 render_ring_flush(struct intel_ring_buffer *ring, 58 uint32_t invalidate_domains, 59 uint32_t flush_domains) 60 { 61 struct drm_device *dev = ring->dev; 62 uint32_t cmd; 63 int ret; 64 65 /* 66 * read/write caches: 67 * 68 * I915_GEM_DOMAIN_RENDER is always invalidated, but is 69 * only flushed if MI_NO_WRITE_FLUSH is unset. On 965, it is 70 * also flushed at 2d versus 3d pipeline switches. 71 * 72 * read-only caches: 73 * 74 * I915_GEM_DOMAIN_SAMPLER is flushed on pre-965 if 75 * MI_READ_FLUSH is set, and is always flushed on 965. 76 * 77 * I915_GEM_DOMAIN_COMMAND may not exist? 78 * 79 * I915_GEM_DOMAIN_INSTRUCTION, which exists on 965, is 80 * invalidated when MI_EXE_FLUSH is set. 81 * 82 * I915_GEM_DOMAIN_VERTEX, which exists on 965, is 83 * invalidated with every MI_FLUSH. 84 * 85 * TLBs: 86 * 87 * On 965, TLBs associated with I915_GEM_DOMAIN_COMMAND 88 * and I915_GEM_DOMAIN_CPU in are invalidated at PTE write and 89 * I915_GEM_DOMAIN_RENDER and I915_GEM_DOMAIN_SAMPLER 90 * are flushed at any MI_FLUSH. 91 */ 92 93 cmd = MI_FLUSH | MI_NO_WRITE_FLUSH; 94 if ((invalidate_domains|flush_domains) & 95 I915_GEM_DOMAIN_RENDER) 96 cmd &= ~MI_NO_WRITE_FLUSH; 97 if (INTEL_INFO(dev)->gen < 4) { 98 /* 99 * On the 965, the sampler cache always gets flushed 100 * and this bit is reserved. 101 */ 102 if (invalidate_domains & I915_GEM_DOMAIN_SAMPLER) 103 cmd |= MI_READ_FLUSH; 104 } 105 if (invalidate_domains & I915_GEM_DOMAIN_INSTRUCTION) 106 cmd |= MI_EXE_FLUSH; 107 108 if (invalidate_domains & I915_GEM_DOMAIN_COMMAND && 109 (IS_G4X(dev) || IS_GEN5(dev))) 110 cmd |= MI_INVALIDATE_ISP; 111 112 ret = intel_ring_begin(ring, 2); 113 if (ret) 114 return ret; 115 116 intel_ring_emit(ring, cmd); 117 intel_ring_emit(ring, MI_NOOP); 118 intel_ring_advance(ring); 119 120 return 0; 121 } 122 123 /** 124 * Emits a PIPE_CONTROL with a non-zero post-sync operation, for 125 * implementing two workarounds on gen6. From section 1.4.7.1 126 * "PIPE_CONTROL" of the Sandy Bridge PRM volume 2 part 1: 127 * 128 * [DevSNB-C+{W/A}] Before any depth stall flush (including those 129 * produced by non-pipelined state commands), software needs to first 130 * send a PIPE_CONTROL with no bits set except Post-Sync Operation != 131 * 0. 132 * 133 * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush Enable 134 * =1, a PIPE_CONTROL with any non-zero post-sync-op is required. 135 * 136 * And the workaround for these two requires this workaround first: 137 * 138 * [Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent 139 * BEFORE the pipe-control with a post-sync op and no write-cache 140 * flushes. 141 * 142 * And this last workaround is tricky because of the requirements on 143 * that bit. From section 1.4.7.2.3 "Stall" of the Sandy Bridge PRM 144 * volume 2 part 1: 145 * 146 * "1 of the following must also be set: 147 * - Render Target Cache Flush Enable ([12] of DW1) 148 * - Depth Cache Flush Enable ([0] of DW1) 149 * - Stall at Pixel Scoreboard ([1] of DW1) 150 * - Depth Stall ([13] of DW1) 151 * - Post-Sync Operation ([13] of DW1) 152 * - Notify Enable ([8] of DW1)" 153 * 154 * The cache flushes require the workaround flush that triggered this 155 * one, so we can't use it. Depth stall would trigger the same. 156 * Post-sync nonzero is what triggered this second workaround, so we 157 * can't use that one either. Notify enable is IRQs, which aren't 158 * really our business. That leaves only stall at scoreboard. 159 */ 160 static int 161 intel_emit_post_sync_nonzero_flush(struct intel_ring_buffer *ring) 162 { 163 struct pipe_control *pc = ring->private; 164 u32 scratch_addr = pc->gtt_offset + 128; 165 int ret; 166 167 168 ret = intel_ring_begin(ring, 6); 169 if (ret) 170 return ret; 171 172 intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(5)); 173 intel_ring_emit(ring, PIPE_CONTROL_CS_STALL | 174 PIPE_CONTROL_STALL_AT_SCOREBOARD); 175 intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); /* address */ 176 intel_ring_emit(ring, 0); /* low dword */ 177 intel_ring_emit(ring, 0); /* high dword */ 178 intel_ring_emit(ring, MI_NOOP); 179 intel_ring_advance(ring); 180 181 ret = intel_ring_begin(ring, 6); 182 if (ret) 183 return ret; 184 185 intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(5)); 186 intel_ring_emit(ring, PIPE_CONTROL_QW_WRITE); 187 intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); /* address */ 188 intel_ring_emit(ring, 0); 189 intel_ring_emit(ring, 0); 190 intel_ring_emit(ring, MI_NOOP); 191 intel_ring_advance(ring); 192 193 return 0; 194 } 195 196 static int 197 gen6_render_ring_flush(struct intel_ring_buffer *ring, 198 u32 invalidate_domains, u32 flush_domains) 199 { 200 u32 flags = 0; 201 struct pipe_control *pc = ring->private; 202 u32 scratch_addr = pc->gtt_offset + 128; 203 int ret; 204 205 /* Force SNB workarounds for PIPE_CONTROL flushes */ 206 intel_emit_post_sync_nonzero_flush(ring); 207 208 /* Just flush everything. Experiments have shown that reducing the 209 * number of bits based on the write domains has little performance 210 * impact. 211 */ 212 flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; 213 flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE; 214 flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE; 215 flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; 216 flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE; 217 flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE; 218 flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE; 219 220 ret = intel_ring_begin(ring, 6); 221 if (ret) 222 return ret; 223 224 intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(5)); 225 intel_ring_emit(ring, flags); 226 intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); 227 intel_ring_emit(ring, 0); /* lower dword */ 228 intel_ring_emit(ring, 0); /* uppwer dword */ 229 intel_ring_emit(ring, MI_NOOP); 230 intel_ring_advance(ring); 231 232 return 0; 233 } 234 235 static void ring_write_tail(struct intel_ring_buffer *ring, 236 uint32_t value) 237 { 238 drm_i915_private_t *dev_priv = ring->dev->dev_private; 239 I915_WRITE_TAIL(ring, value); 240 } 241 242 u32 intel_ring_get_active_head(struct intel_ring_buffer *ring) 243 { 244 drm_i915_private_t *dev_priv = ring->dev->dev_private; 245 uint32_t acthd_reg = INTEL_INFO(ring->dev)->gen >= 4 ? 246 RING_ACTHD(ring->mmio_base) : ACTHD; 247 248 return I915_READ(acthd_reg); 249 } 250 251 static int init_ring_common(struct intel_ring_buffer *ring) 252 { 253 drm_i915_private_t *dev_priv = ring->dev->dev_private; 254 struct drm_i915_gem_object *obj = ring->obj; 255 uint32_t head; 256 257 /* Stop the ring if it's running. */ 258 I915_WRITE_CTL(ring, 0); 259 I915_WRITE_HEAD(ring, 0); 260 ring->write_tail(ring, 0); 261 262 /* Initialize the ring. */ 263 I915_WRITE_START(ring, obj->gtt_offset); 264 head = I915_READ_HEAD(ring) & HEAD_ADDR; 265 266 /* G45 ring initialization fails to reset head to zero */ 267 if (head != 0) { 268 DRM_DEBUG("%s head not reset to zero " 269 "ctl %08x head %08x tail %08x start %08x\n", 270 ring->name, 271 I915_READ_CTL(ring), 272 I915_READ_HEAD(ring), 273 I915_READ_TAIL(ring), 274 I915_READ_START(ring)); 275 276 I915_WRITE_HEAD(ring, 0); 277 278 if (I915_READ_HEAD(ring) & HEAD_ADDR) { 279 DRM_ERROR("failed to set %s head to zero " 280 "ctl %08x head %08x tail %08x start %08x\n", 281 ring->name, 282 I915_READ_CTL(ring), 283 I915_READ_HEAD(ring), 284 I915_READ_TAIL(ring), 285 I915_READ_START(ring)); 286 } 287 } 288 289 I915_WRITE_CTL(ring, 290 ((ring->size - PAGE_SIZE) & RING_NR_PAGES) 291 | RING_VALID); 292 293 /* If the head is still not zero, the ring is dead */ 294 if (_intel_wait_for(ring->dev, 295 (I915_READ_CTL(ring) & RING_VALID) != 0 && 296 I915_READ_START(ring) == obj->gtt_offset && 297 (I915_READ_HEAD(ring) & HEAD_ADDR) == 0, 298 50, 1, "915rii")) { 299 DRM_ERROR("%s initialization failed " 300 "ctl %08x head %08x tail %08x start %08x\n", 301 ring->name, 302 I915_READ_CTL(ring), 303 I915_READ_HEAD(ring), 304 I915_READ_TAIL(ring), 305 I915_READ_START(ring)); 306 return -EIO; 307 } 308 309 if (!drm_core_check_feature(ring->dev, DRIVER_MODESET)) 310 i915_kernel_lost_context(ring->dev); 311 else { 312 ring->head = I915_READ_HEAD(ring); 313 ring->tail = I915_READ_TAIL(ring) & TAIL_ADDR; 314 ring->space = ring_space(ring); 315 } 316 317 return 0; 318 } 319 320 static int 321 init_pipe_control(struct intel_ring_buffer *ring) 322 { 323 struct pipe_control *pc; 324 struct drm_i915_gem_object *obj; 325 int ret; 326 327 if (ring->private) 328 return 0; 329 330 pc = kmalloc(sizeof(*pc), DRM_I915_GEM, M_WAITOK); 331 if (!pc) 332 return -ENOMEM; 333 334 obj = i915_gem_alloc_object(ring->dev, 4096); 335 if (obj == NULL) { 336 DRM_ERROR("Failed to allocate seqno page\n"); 337 ret = -ENOMEM; 338 goto err; 339 } 340 341 i915_gem_object_set_cache_level(obj, I915_CACHE_LLC); 342 343 ret = i915_gem_object_pin(obj, 4096, true); 344 if (ret) 345 goto err_unref; 346 347 pc->gtt_offset = obj->gtt_offset; 348 pc->cpu_page = (uint32_t *)kmem_alloc_nofault(&kernel_map, PAGE_SIZE, PAGE_SIZE); 349 if (pc->cpu_page == NULL) 350 goto err_unpin; 351 pmap_qenter((uintptr_t)pc->cpu_page, &obj->pages[0], 1); 352 pmap_invalidate_cache_range((vm_offset_t)pc->cpu_page, 353 (vm_offset_t)pc->cpu_page + PAGE_SIZE); 354 355 pc->obj = obj; 356 ring->private = pc; 357 return 0; 358 359 err_unpin: 360 i915_gem_object_unpin(obj); 361 err_unref: 362 drm_gem_object_unreference(&obj->base); 363 err: 364 drm_free(pc, DRM_I915_GEM); 365 return ret; 366 } 367 368 static void 369 cleanup_pipe_control(struct intel_ring_buffer *ring) 370 { 371 struct pipe_control *pc = ring->private; 372 struct drm_i915_gem_object *obj; 373 374 if (!ring->private) 375 return; 376 377 obj = pc->obj; 378 pmap_qremove((vm_offset_t)pc->cpu_page, 1); 379 kmem_free(&kernel_map, (uintptr_t)pc->cpu_page, PAGE_SIZE); 380 i915_gem_object_unpin(obj); 381 drm_gem_object_unreference(&obj->base); 382 383 drm_free(pc, DRM_I915_GEM); 384 ring->private = NULL; 385 } 386 387 static int init_render_ring(struct intel_ring_buffer *ring) 388 { 389 struct drm_device *dev = ring->dev; 390 struct drm_i915_private *dev_priv = dev->dev_private; 391 int ret = init_ring_common(ring); 392 393 if (INTEL_INFO(dev)->gen > 3) 394 I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(VS_TIMER_DISPATCH)); 395 396 /* We need to disable the AsyncFlip performance optimisations in order 397 * to use MI_WAIT_FOR_EVENT within the CS. It should already be 398 * programmed to '1' on all products. 399 */ 400 if (INTEL_INFO(dev)->gen >= 6) 401 I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(ASYNC_FLIP_PERF_DISABLE)); 402 403 /* Required for the hardware to program scanline values for waiting */ 404 if (INTEL_INFO(dev)->gen == 6) 405 I915_WRITE(GFX_MODE, 406 _MASKED_BIT_ENABLE(GFX_TLB_INVALIDATE_ALWAYS)); 407 408 if (IS_GEN7(dev)) 409 I915_WRITE(GFX_MODE_GEN7, 410 _MASKED_BIT_DISABLE(GFX_TLB_INVALIDATE_ALWAYS) | 411 _MASKED_BIT_ENABLE(GFX_REPLAY_MODE)); 412 413 if (INTEL_INFO(dev)->gen >= 5) { 414 ret = init_pipe_control(ring); 415 if (ret) 416 return ret; 417 } 418 419 if (IS_GEN6(dev)) { 420 /* From the Sandybridge PRM, volume 1 part 3, page 24: 421 * "If this bit is set, STCunit will have LRA as replacement 422 * policy. [...] This bit must be reset. LRA replacement 423 * policy is not supported." 424 */ 425 I915_WRITE(CACHE_MODE_0, 426 _MASKED_BIT_DISABLE(CM0_STC_EVICT_DISABLE_LRA_SNB)); 427 } 428 429 if (INTEL_INFO(dev)->gen >= 6) 430 I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING)); 431 432 if (HAS_L3_GPU_CACHE(dev)) 433 I915_WRITE_IMR(ring, ~GEN6_RENDER_L3_PARITY_ERROR); 434 435 return ret; 436 } 437 438 static void render_ring_cleanup(struct intel_ring_buffer *ring) 439 { 440 if (!ring->private) 441 return; 442 443 cleanup_pipe_control(ring); 444 } 445 446 static void 447 update_mboxes(struct intel_ring_buffer *ring, 448 u32 seqno, 449 u32 mmio_offset) 450 { 451 intel_ring_emit(ring, MI_SEMAPHORE_MBOX | 452 MI_SEMAPHORE_GLOBAL_GTT | 453 MI_SEMAPHORE_REGISTER | 454 MI_SEMAPHORE_UPDATE); 455 intel_ring_emit(ring, seqno); 456 intel_ring_emit(ring, mmio_offset); 457 } 458 459 /** 460 * gen6_add_request - Update the semaphore mailbox registers 461 * 462 * @ring - ring that is adding a request 463 * @seqno - return seqno stuck into the ring 464 * 465 * Update the mailbox registers in the *other* rings with the current seqno. 466 * This acts like a signal in the canonical semaphore. 467 */ 468 static int 469 gen6_add_request(struct intel_ring_buffer *ring, 470 u32 *seqno) 471 { 472 u32 mbox1_reg; 473 u32 mbox2_reg; 474 int ret; 475 476 ret = intel_ring_begin(ring, 10); 477 if (ret) 478 return ret; 479 480 mbox1_reg = ring->signal_mbox[0]; 481 mbox2_reg = ring->signal_mbox[1]; 482 483 *seqno = i915_gem_next_request_seqno(ring); 484 485 update_mboxes(ring, *seqno, mbox1_reg); 486 update_mboxes(ring, *seqno, mbox2_reg); 487 intel_ring_emit(ring, MI_STORE_DWORD_INDEX); 488 intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT); 489 intel_ring_emit(ring, *seqno); 490 intel_ring_emit(ring, MI_USER_INTERRUPT); 491 intel_ring_advance(ring); 492 493 return 0; 494 } 495 496 /** 497 * intel_ring_sync - sync the waiter to the signaller on seqno 498 * 499 * @waiter - ring that is waiting 500 * @signaller - ring which has, or will signal 501 * @seqno - seqno which the waiter will block on 502 */ 503 static int 504 intel_ring_sync(struct intel_ring_buffer *waiter, 505 struct intel_ring_buffer *signaller, 506 int ring, 507 u32 seqno) 508 { 509 int ret; 510 u32 dw1 = MI_SEMAPHORE_MBOX | 511 MI_SEMAPHORE_COMPARE | 512 MI_SEMAPHORE_REGISTER; 513 514 ret = intel_ring_begin(waiter, 4); 515 if (ret) 516 return ret; 517 518 intel_ring_emit(waiter, dw1 | signaller->semaphore_register[ring]); 519 intel_ring_emit(waiter, seqno); 520 intel_ring_emit(waiter, 0); 521 intel_ring_emit(waiter, MI_NOOP); 522 intel_ring_advance(waiter); 523 524 return 0; 525 } 526 527 int render_ring_sync_to(struct intel_ring_buffer *waiter, 528 struct intel_ring_buffer *signaller, u32 seqno); 529 int gen6_bsd_ring_sync_to(struct intel_ring_buffer *waiter, 530 struct intel_ring_buffer *signaller, u32 seqno); 531 int gen6_blt_ring_sync_to(struct intel_ring_buffer *waiter, 532 struct intel_ring_buffer *signaller, u32 seqno); 533 534 /* VCS->RCS (RVSYNC) or BCS->RCS (RBSYNC) */ 535 int 536 render_ring_sync_to(struct intel_ring_buffer *waiter, 537 struct intel_ring_buffer *signaller, 538 u32 seqno) 539 { 540 KASSERT(signaller->semaphore_register[RCS] != MI_SEMAPHORE_SYNC_INVALID, 541 ("valid RCS semaphore")); 542 return intel_ring_sync(waiter, 543 signaller, 544 RCS, 545 seqno); 546 } 547 548 /* RCS->VCS (VRSYNC) or BCS->VCS (VBSYNC) */ 549 int 550 gen6_bsd_ring_sync_to(struct intel_ring_buffer *waiter, 551 struct intel_ring_buffer *signaller, 552 u32 seqno) 553 { 554 KASSERT(signaller->semaphore_register[VCS] != MI_SEMAPHORE_SYNC_INVALID, 555 ("Valid VCS semaphore")); 556 return intel_ring_sync(waiter, 557 signaller, 558 VCS, 559 seqno); 560 } 561 562 /* RCS->BCS (BRSYNC) or VCS->BCS (BVSYNC) */ 563 int 564 gen6_blt_ring_sync_to(struct intel_ring_buffer *waiter, 565 struct intel_ring_buffer *signaller, 566 u32 seqno) 567 { 568 KASSERT(signaller->semaphore_register[BCS] != MI_SEMAPHORE_SYNC_INVALID, 569 ("Valid BCS semaphore")); 570 return intel_ring_sync(waiter, 571 signaller, 572 BCS, 573 seqno); 574 } 575 576 #define PIPE_CONTROL_FLUSH(ring__, addr__) \ 577 do { \ 578 intel_ring_emit(ring__, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE | \ 579 PIPE_CONTROL_DEPTH_STALL); \ 580 intel_ring_emit(ring__, (addr__) | PIPE_CONTROL_GLOBAL_GTT); \ 581 intel_ring_emit(ring__, 0); \ 582 intel_ring_emit(ring__, 0); \ 583 } while (0) 584 585 static int 586 pc_render_add_request(struct intel_ring_buffer *ring, 587 uint32_t *result) 588 { 589 u32 seqno = i915_gem_next_request_seqno(ring); 590 struct pipe_control *pc = ring->private; 591 u32 scratch_addr = pc->gtt_offset + 128; 592 int ret; 593 594 /* For Ironlake, MI_USER_INTERRUPT was deprecated and apparently 595 * incoherent with writes to memory, i.e. completely fubar, 596 * so we need to use PIPE_NOTIFY instead. 597 * 598 * However, we also need to workaround the qword write 599 * incoherence by flushing the 6 PIPE_NOTIFY buffers out to 600 * memory before requesting an interrupt. 601 */ 602 ret = intel_ring_begin(ring, 32); 603 if (ret) 604 return ret; 605 606 intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE | 607 PIPE_CONTROL_WRITE_FLUSH | 608 PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE); 609 intel_ring_emit(ring, pc->gtt_offset | PIPE_CONTROL_GLOBAL_GTT); 610 intel_ring_emit(ring, seqno); 611 intel_ring_emit(ring, 0); 612 PIPE_CONTROL_FLUSH(ring, scratch_addr); 613 scratch_addr += 128; /* write to separate cachelines */ 614 PIPE_CONTROL_FLUSH(ring, scratch_addr); 615 scratch_addr += 128; 616 PIPE_CONTROL_FLUSH(ring, scratch_addr); 617 scratch_addr += 128; 618 PIPE_CONTROL_FLUSH(ring, scratch_addr); 619 scratch_addr += 128; 620 PIPE_CONTROL_FLUSH(ring, scratch_addr); 621 scratch_addr += 128; 622 PIPE_CONTROL_FLUSH(ring, scratch_addr); 623 intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE | 624 PIPE_CONTROL_WRITE_FLUSH | 625 PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE | 626 PIPE_CONTROL_NOTIFY); 627 intel_ring_emit(ring, pc->gtt_offset | PIPE_CONTROL_GLOBAL_GTT); 628 intel_ring_emit(ring, seqno); 629 intel_ring_emit(ring, 0); 630 intel_ring_advance(ring); 631 632 *result = seqno; 633 return 0; 634 } 635 636 static int 637 render_ring_add_request(struct intel_ring_buffer *ring, 638 uint32_t *result) 639 { 640 u32 seqno = i915_gem_next_request_seqno(ring); 641 int ret; 642 643 ret = intel_ring_begin(ring, 4); 644 if (ret) 645 return ret; 646 647 intel_ring_emit(ring, MI_STORE_DWORD_INDEX); 648 intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT); 649 intel_ring_emit(ring, seqno); 650 intel_ring_emit(ring, MI_USER_INTERRUPT); 651 intel_ring_advance(ring); 652 653 *result = seqno; 654 return 0; 655 } 656 657 static u32 658 gen6_ring_get_seqno(struct intel_ring_buffer *ring, bool lazy_coherency) 659 { 660 /* Workaround to force correct ordering between irq and seqno writes on 661 * ivb (and maybe also on snb) by reading from a CS register (like 662 * ACTHD) before reading the status page. */ 663 if (!lazy_coherency) 664 intel_ring_get_active_head(ring); 665 return intel_read_status_page(ring, I915_GEM_HWS_INDEX); 666 } 667 668 static u32 669 ring_get_seqno(struct intel_ring_buffer *ring, bool lazy_coherency) 670 { 671 return intel_read_status_page(ring, I915_GEM_HWS_INDEX); 672 } 673 674 static u32 675 pc_render_get_seqno(struct intel_ring_buffer *ring, bool lazy_coherency) 676 { 677 struct pipe_control *pc = ring->private; 678 return pc->cpu_page[0]; 679 } 680 681 static void 682 ironlake_enable_irq(drm_i915_private_t *dev_priv, uint32_t mask) 683 { 684 dev_priv->gt_irq_mask &= ~mask; 685 I915_WRITE(GTIMR, dev_priv->gt_irq_mask); 686 POSTING_READ(GTIMR); 687 } 688 689 static void 690 ironlake_disable_irq(drm_i915_private_t *dev_priv, uint32_t mask) 691 { 692 dev_priv->gt_irq_mask |= mask; 693 I915_WRITE(GTIMR, dev_priv->gt_irq_mask); 694 POSTING_READ(GTIMR); 695 } 696 697 static void 698 i915_enable_irq(drm_i915_private_t *dev_priv, uint32_t mask) 699 { 700 dev_priv->irq_mask &= ~mask; 701 I915_WRITE(IMR, dev_priv->irq_mask); 702 POSTING_READ(IMR); 703 } 704 705 static void 706 i915_disable_irq(drm_i915_private_t *dev_priv, uint32_t mask) 707 { 708 dev_priv->irq_mask |= mask; 709 I915_WRITE(IMR, dev_priv->irq_mask); 710 POSTING_READ(IMR); 711 } 712 713 static bool 714 render_ring_get_irq(struct intel_ring_buffer *ring) 715 { 716 struct drm_device *dev = ring->dev; 717 drm_i915_private_t *dev_priv = dev->dev_private; 718 719 if (!dev->irq_enabled) 720 return false; 721 722 lockmgr(&dev_priv->irq_lock, LK_EXCLUSIVE); 723 if (ring->irq_refcount++ == 0) { 724 if (HAS_PCH_SPLIT(dev)) 725 ironlake_enable_irq(dev_priv, 726 GT_PIPE_NOTIFY | GT_USER_INTERRUPT); 727 else 728 i915_enable_irq(dev_priv, I915_USER_INTERRUPT); 729 } 730 lockmgr(&dev_priv->irq_lock, LK_RELEASE); 731 732 return true; 733 } 734 735 static void 736 render_ring_put_irq(struct intel_ring_buffer *ring) 737 { 738 struct drm_device *dev = ring->dev; 739 drm_i915_private_t *dev_priv = dev->dev_private; 740 741 lockmgr(&dev_priv->irq_lock, LK_EXCLUSIVE); 742 if (--ring->irq_refcount == 0) { 743 if (HAS_PCH_SPLIT(dev)) 744 ironlake_disable_irq(dev_priv, 745 GT_USER_INTERRUPT | 746 GT_PIPE_NOTIFY); 747 else 748 i915_disable_irq(dev_priv, I915_USER_INTERRUPT); 749 } 750 lockmgr(&dev_priv->irq_lock, LK_RELEASE); 751 } 752 753 void intel_ring_setup_status_page(struct intel_ring_buffer *ring) 754 { 755 struct drm_device *dev = ring->dev; 756 drm_i915_private_t *dev_priv = dev->dev_private; 757 uint32_t mmio = 0; 758 759 /* The ring status page addresses are no longer next to the rest of 760 * the ring registers as of gen7. 761 */ 762 if (IS_GEN7(dev)) { 763 switch (ring->id) { 764 case RCS: 765 mmio = RENDER_HWS_PGA_GEN7; 766 break; 767 case BCS: 768 mmio = BLT_HWS_PGA_GEN7; 769 break; 770 case VCS: 771 mmio = BSD_HWS_PGA_GEN7; 772 break; 773 } 774 } else if (IS_GEN6(dev)) { 775 mmio = RING_HWS_PGA_GEN6(ring->mmio_base); 776 } else { 777 mmio = RING_HWS_PGA(ring->mmio_base); 778 } 779 780 I915_WRITE(mmio, (u32)ring->status_page.gfx_addr); 781 POSTING_READ(mmio); 782 } 783 784 static int 785 bsd_ring_flush(struct intel_ring_buffer *ring, 786 uint32_t invalidate_domains, 787 uint32_t flush_domains) 788 { 789 int ret; 790 791 ret = intel_ring_begin(ring, 2); 792 if (ret) 793 return ret; 794 795 intel_ring_emit(ring, MI_FLUSH); 796 intel_ring_emit(ring, MI_NOOP); 797 intel_ring_advance(ring); 798 return 0; 799 } 800 801 static int 802 ring_add_request(struct intel_ring_buffer *ring, 803 uint32_t *result) 804 { 805 uint32_t seqno; 806 int ret; 807 808 ret = intel_ring_begin(ring, 4); 809 if (ret) 810 return ret; 811 812 seqno = i915_gem_next_request_seqno(ring); 813 814 intel_ring_emit(ring, MI_STORE_DWORD_INDEX); 815 intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT); 816 intel_ring_emit(ring, seqno); 817 intel_ring_emit(ring, MI_USER_INTERRUPT); 818 intel_ring_advance(ring); 819 820 *result = seqno; 821 return 0; 822 } 823 824 static bool 825 gen6_ring_get_irq(struct intel_ring_buffer *ring, uint32_t gflag, uint32_t rflag) 826 { 827 struct drm_device *dev = ring->dev; 828 drm_i915_private_t *dev_priv = dev->dev_private; 829 830 if (!dev->irq_enabled) 831 return false; 832 833 gen6_gt_force_wake_get(dev_priv); 834 835 lockmgr(&dev_priv->irq_lock, LK_EXCLUSIVE); 836 if (ring->irq_refcount++ == 0) { 837 ring->irq_mask &= ~rflag; 838 I915_WRITE_IMR(ring, ring->irq_mask); 839 ironlake_enable_irq(dev_priv, gflag); 840 } 841 lockmgr(&dev_priv->irq_lock, LK_RELEASE); 842 843 return true; 844 } 845 846 static void 847 gen6_ring_put_irq(struct intel_ring_buffer *ring, uint32_t gflag, uint32_t rflag) 848 { 849 struct drm_device *dev = ring->dev; 850 drm_i915_private_t *dev_priv = dev->dev_private; 851 852 lockmgr(&dev_priv->irq_lock, LK_EXCLUSIVE); 853 if (--ring->irq_refcount == 0) { 854 ring->irq_mask |= rflag; 855 I915_WRITE_IMR(ring, ring->irq_mask); 856 ironlake_disable_irq(dev_priv, gflag); 857 } 858 lockmgr(&dev_priv->irq_lock, LK_RELEASE); 859 860 gen6_gt_force_wake_put(dev_priv); 861 } 862 863 static bool 864 bsd_ring_get_irq(struct intel_ring_buffer *ring) 865 { 866 struct drm_device *dev = ring->dev; 867 drm_i915_private_t *dev_priv = dev->dev_private; 868 869 if (!dev->irq_enabled) 870 return false; 871 872 lockmgr(&dev_priv->irq_lock, LK_EXCLUSIVE); 873 if (ring->irq_refcount++ == 0) { 874 if (IS_G4X(dev)) 875 i915_enable_irq(dev_priv, I915_BSD_USER_INTERRUPT); 876 else 877 ironlake_enable_irq(dev_priv, GT_BSD_USER_INTERRUPT); 878 } 879 lockmgr(&dev_priv->irq_lock, LK_RELEASE); 880 881 return true; 882 } 883 static void 884 bsd_ring_put_irq(struct intel_ring_buffer *ring) 885 { 886 struct drm_device *dev = ring->dev; 887 drm_i915_private_t *dev_priv = dev->dev_private; 888 889 lockmgr(&dev_priv->irq_lock, LK_EXCLUSIVE); 890 if (--ring->irq_refcount == 0) { 891 if (IS_G4X(dev)) 892 i915_disable_irq(dev_priv, I915_BSD_USER_INTERRUPT); 893 else 894 ironlake_disable_irq(dev_priv, GT_BSD_USER_INTERRUPT); 895 } 896 lockmgr(&dev_priv->irq_lock, LK_RELEASE); 897 } 898 899 static int 900 ring_dispatch_execbuffer(struct intel_ring_buffer *ring, uint32_t offset, 901 uint32_t length) 902 { 903 int ret; 904 905 ret = intel_ring_begin(ring, 2); 906 if (ret) 907 return ret; 908 909 intel_ring_emit(ring, 910 MI_BATCH_BUFFER_START | (2 << 6) | 911 MI_BATCH_NON_SECURE_I965); 912 intel_ring_emit(ring, offset); 913 intel_ring_advance(ring); 914 915 return 0; 916 } 917 918 static int 919 render_ring_dispatch_execbuffer(struct intel_ring_buffer *ring, 920 uint32_t offset, uint32_t len) 921 { 922 struct drm_device *dev = ring->dev; 923 int ret; 924 925 if (IS_I830(dev) || IS_845G(dev)) { 926 ret = intel_ring_begin(ring, 4); 927 if (ret) 928 return ret; 929 930 intel_ring_emit(ring, MI_BATCH_BUFFER); 931 intel_ring_emit(ring, offset | MI_BATCH_NON_SECURE); 932 intel_ring_emit(ring, offset + len - 8); 933 intel_ring_emit(ring, 0); 934 } else { 935 ret = intel_ring_begin(ring, 2); 936 if (ret) 937 return ret; 938 939 if (INTEL_INFO(dev)->gen >= 4) { 940 intel_ring_emit(ring, 941 MI_BATCH_BUFFER_START | (2 << 6) | 942 MI_BATCH_NON_SECURE_I965); 943 intel_ring_emit(ring, offset); 944 } else { 945 intel_ring_emit(ring, 946 MI_BATCH_BUFFER_START | (2 << 6)); 947 intel_ring_emit(ring, offset | MI_BATCH_NON_SECURE); 948 } 949 } 950 intel_ring_advance(ring); 951 952 return 0; 953 } 954 955 static void cleanup_status_page(struct intel_ring_buffer *ring) 956 { 957 drm_i915_private_t *dev_priv = ring->dev->dev_private; 958 struct drm_i915_gem_object *obj; 959 960 obj = ring->status_page.obj; 961 if (obj == NULL) 962 return; 963 964 pmap_qremove((vm_offset_t)ring->status_page.page_addr, 1); 965 kmem_free(&kernel_map, (vm_offset_t)ring->status_page.page_addr, 966 PAGE_SIZE); 967 i915_gem_object_unpin(obj); 968 drm_gem_object_unreference(&obj->base); 969 ring->status_page.obj = NULL; 970 971 memset(&dev_priv->hws_map, 0, sizeof(dev_priv->hws_map)); 972 } 973 974 static int init_status_page(struct intel_ring_buffer *ring) 975 { 976 struct drm_device *dev = ring->dev; 977 drm_i915_private_t *dev_priv = dev->dev_private; 978 struct drm_i915_gem_object *obj; 979 int ret; 980 981 obj = i915_gem_alloc_object(dev, 4096); 982 if (obj == NULL) { 983 DRM_ERROR("Failed to allocate status page\n"); 984 ret = -ENOMEM; 985 goto err; 986 } 987 988 i915_gem_object_set_cache_level(obj, I915_CACHE_LLC); 989 990 ret = i915_gem_object_pin(obj, 4096, true); 991 if (ret != 0) { 992 goto err_unref; 993 } 994 995 ring->status_page.gfx_addr = obj->gtt_offset; 996 ring->status_page.page_addr = (void *)kmem_alloc_nofault(&kernel_map, 997 PAGE_SIZE, PAGE_SIZE); 998 if (ring->status_page.page_addr == NULL) { 999 memset(&dev_priv->hws_map, 0, sizeof(dev_priv->hws_map)); 1000 goto err_unpin; 1001 } 1002 pmap_qenter((vm_offset_t)ring->status_page.page_addr, &obj->pages[0], 1003 1); 1004 pmap_invalidate_cache_range((vm_offset_t)ring->status_page.page_addr, 1005 (vm_offset_t)ring->status_page.page_addr + PAGE_SIZE); 1006 ring->status_page.obj = obj; 1007 memset(ring->status_page.page_addr, 0, PAGE_SIZE); 1008 1009 intel_ring_setup_status_page(ring); 1010 DRM_DEBUG("i915: init_status_page %s hws offset: 0x%08x\n", 1011 ring->name, ring->status_page.gfx_addr); 1012 1013 return 0; 1014 1015 err_unpin: 1016 i915_gem_object_unpin(obj); 1017 err_unref: 1018 drm_gem_object_unreference(&obj->base); 1019 err: 1020 return ret; 1021 } 1022 1023 static int intel_init_ring_buffer(struct drm_device *dev, 1024 struct intel_ring_buffer *ring) 1025 { 1026 struct drm_i915_gem_object *obj; 1027 int ret; 1028 1029 ring->dev = dev; 1030 INIT_LIST_HEAD(&ring->active_list); 1031 INIT_LIST_HEAD(&ring->request_list); 1032 INIT_LIST_HEAD(&ring->gpu_write_list); 1033 1034 ring->irq_mask = ~0; 1035 1036 init_waitqueue_head(&ring->irq_queue); 1037 1038 if (I915_NEED_GFX_HWS(dev)) { 1039 ret = init_status_page(ring); 1040 if (ret) 1041 return ret; 1042 } 1043 1044 obj = i915_gem_alloc_object(dev, ring->size); 1045 if (obj == NULL) { 1046 DRM_ERROR("Failed to allocate ringbuffer\n"); 1047 ret = -ENOMEM; 1048 goto err_hws; 1049 } 1050 1051 ring->obj = obj; 1052 1053 ret = i915_gem_object_pin(obj, PAGE_SIZE, true); 1054 if (ret) 1055 goto err_unref; 1056 1057 ring->map.size = ring->size; 1058 ring->map.offset = dev->agp->base + obj->gtt_offset; 1059 ring->map.type = 0; 1060 ring->map.flags = 0; 1061 ring->map.mtrr = 0; 1062 1063 drm_core_ioremap_wc(&ring->map, dev); 1064 if (ring->map.virtual == NULL) { 1065 DRM_ERROR("Failed to map ringbuffer.\n"); 1066 ret = -EINVAL; 1067 goto err_unpin; 1068 } 1069 1070 ring->virtual_start = ring->map.virtual; 1071 ret = ring->init(ring); 1072 if (ret) 1073 goto err_unmap; 1074 1075 /* Workaround an erratum on the i830 which causes a hang if 1076 * the TAIL pointer points to within the last 2 cachelines 1077 * of the buffer. 1078 */ 1079 ring->effective_size = ring->size; 1080 if (IS_I830(ring->dev) || IS_845G(ring->dev)) 1081 ring->effective_size -= 128; 1082 1083 return 0; 1084 1085 err_unmap: 1086 drm_core_ioremapfree(&ring->map, dev); 1087 err_unpin: 1088 i915_gem_object_unpin(obj); 1089 err_unref: 1090 drm_gem_object_unreference(&obj->base); 1091 ring->obj = NULL; 1092 err_hws: 1093 cleanup_status_page(ring); 1094 return ret; 1095 } 1096 1097 void intel_cleanup_ring_buffer(struct intel_ring_buffer *ring) 1098 { 1099 struct drm_i915_private *dev_priv; 1100 int ret; 1101 1102 if (ring->obj == NULL) 1103 return; 1104 1105 /* Disable the ring buffer. The ring must be idle at this point */ 1106 dev_priv = ring->dev->dev_private; 1107 ret = intel_ring_idle(ring); 1108 if (ret) 1109 DRM_ERROR("failed to quiesce %s whilst cleaning up: %d\n", 1110 ring->name, ret); 1111 1112 I915_WRITE_CTL(ring, 0); 1113 1114 drm_core_ioremapfree(&ring->map, ring->dev); 1115 1116 i915_gem_object_unpin(ring->obj); 1117 drm_gem_object_unreference(&ring->obj->base); 1118 ring->obj = NULL; 1119 1120 if (ring->cleanup) 1121 ring->cleanup(ring); 1122 1123 cleanup_status_page(ring); 1124 } 1125 1126 static int intel_ring_wait_seqno(struct intel_ring_buffer *ring, u32 seqno) 1127 { 1128 int ret; 1129 1130 ret = i915_wait_seqno(ring, seqno); 1131 if (!ret) 1132 i915_gem_retire_requests_ring(ring); 1133 1134 return ret; 1135 } 1136 1137 static int intel_ring_wait_request(struct intel_ring_buffer *ring, int n) 1138 { 1139 struct drm_i915_gem_request *request; 1140 u32 seqno = 0; 1141 int ret; 1142 1143 i915_gem_retire_requests_ring(ring); 1144 1145 if (ring->last_retired_head != -1) { 1146 ring->head = ring->last_retired_head; 1147 ring->last_retired_head = -1; 1148 ring->space = ring_space(ring); 1149 if (ring->space >= n) 1150 return 0; 1151 } 1152 1153 list_for_each_entry(request, &ring->request_list, list) { 1154 int space; 1155 1156 if (request->tail == -1) 1157 continue; 1158 1159 space = request->tail - (ring->tail + 8); 1160 if (space < 0) 1161 space += ring->size; 1162 if (space >= n) { 1163 seqno = request->seqno; 1164 break; 1165 } 1166 1167 /* Consume this request in case we need more space than 1168 * is available and so need to prevent a race between 1169 * updating last_retired_head and direct reads of 1170 * I915_RING_HEAD. It also provides a nice sanity check. 1171 */ 1172 request->tail = -1; 1173 } 1174 1175 if (seqno == 0) 1176 return -ENOSPC; 1177 1178 ret = intel_ring_wait_seqno(ring, seqno); 1179 if (ret) 1180 return ret; 1181 1182 if (ring->last_retired_head == -1) 1183 return -ENOSPC; 1184 1185 ring->head = ring->last_retired_head; 1186 ring->last_retired_head = -1; 1187 ring->space = ring_space(ring); 1188 if (ring->space < n) 1189 return -ENOSPC; 1190 1191 return 0; 1192 } 1193 1194 static int ring_wait_for_space(struct intel_ring_buffer *ring, int n) 1195 { 1196 struct drm_device *dev = ring->dev; 1197 struct drm_i915_private *dev_priv = dev->dev_private; 1198 unsigned long end; 1199 int ret; 1200 1201 ret = intel_ring_wait_request(ring, n); 1202 if (ret != -ENOSPC) 1203 return ret; 1204 1205 /* With GEM the hangcheck timer should kick us out of the loop, 1206 * leaving it early runs the risk of corrupting GEM state (due 1207 * to running on almost untested codepaths). But on resume 1208 * timers don't work yet, so prevent a complete hang in that 1209 * case by choosing an insanely large timeout. */ 1210 end = ticks + 60 * hz; 1211 1212 do { 1213 ring->head = I915_READ_HEAD(ring); 1214 ring->space = ring_space(ring); 1215 if (ring->space >= n) { 1216 return 0; 1217 } 1218 1219 #if 0 1220 if (dev->primary->master) { 1221 struct drm_i915_master_private *master_priv = dev->primary->master->driver_priv; 1222 if (master_priv->sarea_priv) 1223 master_priv->sarea_priv->perf_boxes |= I915_BOX_WAIT; 1224 } 1225 #else 1226 if (dev_priv->sarea_priv) 1227 dev_priv->sarea_priv->perf_boxes |= I915_BOX_WAIT; 1228 #endif 1229 1230 DELAY(1000); 1231 1232 ret = i915_gem_check_wedge(dev_priv, dev_priv->mm.interruptible); 1233 if (ret) 1234 return ret; 1235 } while (!time_after(ticks, end)); 1236 return -EBUSY; 1237 } 1238 1239 static int intel_wrap_ring_buffer(struct intel_ring_buffer *ring) 1240 { 1241 uint32_t __iomem *virt; 1242 int rem = ring->size - ring->tail; 1243 1244 if (ring->space < rem) { 1245 int ret = ring_wait_for_space(ring, rem); 1246 if (ret) 1247 return ret; 1248 } 1249 1250 virt = (unsigned int *)((char *)ring->virtual_start + ring->tail); 1251 rem /= 4; 1252 while (rem--) 1253 *virt++ = MI_NOOP; 1254 1255 ring->tail = 0; 1256 ring->space = ring_space(ring); 1257 1258 return 0; 1259 } 1260 1261 int intel_ring_idle(struct intel_ring_buffer *ring) 1262 { 1263 return ring_wait_for_space(ring, ring->size - 8); 1264 } 1265 1266 int intel_ring_begin(struct intel_ring_buffer *ring, 1267 int num_dwords) 1268 { 1269 struct drm_i915_private *dev_priv = ring->dev->dev_private; 1270 int n = 4*num_dwords; 1271 int ret; 1272 1273 ret = i915_gem_check_wedge(dev_priv, dev_priv->mm.interruptible); 1274 if (ret) 1275 return ret; 1276 1277 if (unlikely(ring->tail + n > ring->effective_size)) { 1278 ret = intel_wrap_ring_buffer(ring); 1279 if (unlikely(ret)) 1280 return ret; 1281 } 1282 1283 if (unlikely(ring->space < n)) { 1284 ret = ring_wait_for_space(ring, n); 1285 if (unlikely(ret)) 1286 return ret; 1287 } 1288 1289 ring->space -= n; 1290 return 0; 1291 } 1292 1293 void intel_ring_advance(struct intel_ring_buffer *ring) 1294 { 1295 ring->tail &= ring->size - 1; 1296 ring->write_tail(ring, ring->tail); 1297 } 1298 1299 static const struct intel_ring_buffer render_ring = { 1300 .name = "render ring", 1301 .id = RCS, 1302 .mmio_base = RENDER_RING_BASE, 1303 .size = 32 * PAGE_SIZE, 1304 .init = init_render_ring, 1305 .write_tail = ring_write_tail, 1306 .flush = render_ring_flush, 1307 .add_request = render_ring_add_request, 1308 .get_seqno = ring_get_seqno, 1309 .irq_get = render_ring_get_irq, 1310 .irq_put = render_ring_put_irq, 1311 .dispatch_execbuffer = render_ring_dispatch_execbuffer, 1312 .cleanup = render_ring_cleanup, 1313 .sync_to = render_ring_sync_to, 1314 .semaphore_register = {MI_SEMAPHORE_SYNC_INVALID, 1315 MI_SEMAPHORE_SYNC_RV, 1316 MI_SEMAPHORE_SYNC_RB}, 1317 .signal_mbox = {GEN6_VRSYNC, GEN6_BRSYNC}, 1318 }; 1319 1320 /* ring buffer for bit-stream decoder */ 1321 1322 static const struct intel_ring_buffer bsd_ring = { 1323 .name = "bsd ring", 1324 .id = VCS, 1325 .mmio_base = BSD_RING_BASE, 1326 .size = 32 * PAGE_SIZE, 1327 .init = init_ring_common, 1328 .write_tail = ring_write_tail, 1329 .flush = bsd_ring_flush, 1330 .add_request = ring_add_request, 1331 .get_seqno = ring_get_seqno, 1332 .irq_get = bsd_ring_get_irq, 1333 .irq_put = bsd_ring_put_irq, 1334 .dispatch_execbuffer = ring_dispatch_execbuffer, 1335 }; 1336 1337 1338 static void gen6_bsd_ring_write_tail(struct intel_ring_buffer *ring, 1339 u32 value) 1340 { 1341 drm_i915_private_t *dev_priv = ring->dev->dev_private; 1342 1343 /* Every tail move must follow the sequence below */ 1344 1345 /* Disable notification that the ring is IDLE. The GT 1346 * will then assume that it is busy and bring it out of rc6. 1347 */ 1348 I915_WRITE(GEN6_BSD_SLEEP_PSMI_CONTROL, 1349 _MASKED_BIT_ENABLE(GEN6_BSD_SLEEP_MSG_DISABLE)); 1350 1351 /* Clear the context id. Here be magic! */ 1352 I915_WRITE64(GEN6_BSD_RNCID, 0x0); 1353 1354 /* Wait for the ring not to be idle, i.e. for it to wake up. */ 1355 if (wait_for((I915_READ(GEN6_BSD_SLEEP_PSMI_CONTROL) & 1356 GEN6_BSD_SLEEP_INDICATOR) == 0, 1357 50)) 1358 DRM_ERROR("timed out waiting for the BSD ring to wake up\n"); 1359 1360 /* Now that the ring is fully powered up, update the tail */ 1361 I915_WRITE_TAIL(ring, value); 1362 POSTING_READ(RING_TAIL(ring->mmio_base)); 1363 1364 /* Let the ring send IDLE messages to the GT again, 1365 * and so let it sleep to conserve power when idle. 1366 */ 1367 I915_WRITE(GEN6_BSD_SLEEP_PSMI_CONTROL, 1368 _MASKED_BIT_DISABLE(GEN6_BSD_SLEEP_MSG_DISABLE)); 1369 } 1370 1371 static int gen6_ring_flush(struct intel_ring_buffer *ring, 1372 uint32_t invalidate, uint32_t flush) 1373 { 1374 uint32_t cmd; 1375 int ret; 1376 1377 ret = intel_ring_begin(ring, 4); 1378 if (ret) 1379 return ret; 1380 1381 cmd = MI_FLUSH_DW; 1382 if (invalidate & I915_GEM_GPU_DOMAINS) 1383 cmd |= MI_INVALIDATE_TLB | MI_INVALIDATE_BSD; 1384 intel_ring_emit(ring, cmd); 1385 intel_ring_emit(ring, 0); 1386 intel_ring_emit(ring, 0); 1387 intel_ring_emit(ring, MI_NOOP); 1388 intel_ring_advance(ring); 1389 return 0; 1390 } 1391 1392 static int 1393 gen6_ring_dispatch_execbuffer(struct intel_ring_buffer *ring, 1394 uint32_t offset, uint32_t len) 1395 { 1396 int ret; 1397 1398 ret = intel_ring_begin(ring, 2); 1399 if (ret) 1400 return ret; 1401 1402 intel_ring_emit(ring, MI_BATCH_BUFFER_START | MI_BATCH_NON_SECURE_I965); 1403 /* bit0-7 is the length on GEN6+ */ 1404 intel_ring_emit(ring, offset); 1405 intel_ring_advance(ring); 1406 1407 return 0; 1408 } 1409 1410 static bool 1411 gen6_render_ring_get_irq(struct intel_ring_buffer *ring) 1412 { 1413 return gen6_ring_get_irq(ring, 1414 GT_USER_INTERRUPT, 1415 GEN6_RENDER_USER_INTERRUPT); 1416 } 1417 1418 static void 1419 gen6_render_ring_put_irq(struct intel_ring_buffer *ring) 1420 { 1421 return gen6_ring_put_irq(ring, 1422 GT_USER_INTERRUPT, 1423 GEN6_RENDER_USER_INTERRUPT); 1424 } 1425 1426 static bool 1427 gen6_bsd_ring_get_irq(struct intel_ring_buffer *ring) 1428 { 1429 return gen6_ring_get_irq(ring, 1430 GT_GEN6_BSD_USER_INTERRUPT, 1431 GEN6_BSD_USER_INTERRUPT); 1432 } 1433 1434 static void 1435 gen6_bsd_ring_put_irq(struct intel_ring_buffer *ring) 1436 { 1437 return gen6_ring_put_irq(ring, 1438 GT_GEN6_BSD_USER_INTERRUPT, 1439 GEN6_BSD_USER_INTERRUPT); 1440 } 1441 1442 /* ring buffer for Video Codec for Gen6+ */ 1443 static const struct intel_ring_buffer gen6_bsd_ring = { 1444 .name = "gen6 bsd ring", 1445 .id = VCS, 1446 .mmio_base = GEN6_BSD_RING_BASE, 1447 .size = 32 * PAGE_SIZE, 1448 .init = init_ring_common, 1449 .write_tail = gen6_bsd_ring_write_tail, 1450 .flush = gen6_ring_flush, 1451 .add_request = gen6_add_request, 1452 .get_seqno = gen6_ring_get_seqno, 1453 .irq_get = gen6_bsd_ring_get_irq, 1454 .irq_put = gen6_bsd_ring_put_irq, 1455 .dispatch_execbuffer = gen6_ring_dispatch_execbuffer, 1456 .sync_to = gen6_bsd_ring_sync_to, 1457 .semaphore_register = {MI_SEMAPHORE_SYNC_VR, 1458 MI_SEMAPHORE_SYNC_INVALID, 1459 MI_SEMAPHORE_SYNC_VB}, 1460 .signal_mbox = {GEN6_RVSYNC, GEN6_BVSYNC}, 1461 }; 1462 1463 /* Blitter support (SandyBridge+) */ 1464 1465 static bool 1466 blt_ring_get_irq(struct intel_ring_buffer *ring) 1467 { 1468 return gen6_ring_get_irq(ring, 1469 GT_GEN6_BLT_USER_INTERRUPT, 1470 GEN6_BLITTER_USER_INTERRUPT); 1471 } 1472 1473 static void 1474 blt_ring_put_irq(struct intel_ring_buffer *ring) 1475 { 1476 gen6_ring_put_irq(ring, 1477 GT_GEN6_BLT_USER_INTERRUPT, 1478 GEN6_BLITTER_USER_INTERRUPT); 1479 } 1480 1481 static int blt_ring_flush(struct intel_ring_buffer *ring, 1482 uint32_t invalidate, uint32_t flush) 1483 { 1484 uint32_t cmd; 1485 int ret; 1486 1487 ret = intel_ring_begin(ring, 4); 1488 if (ret) 1489 return ret; 1490 1491 cmd = MI_FLUSH_DW; 1492 if (invalidate & I915_GEM_DOMAIN_RENDER) 1493 cmd |= MI_INVALIDATE_TLB; 1494 intel_ring_emit(ring, cmd); 1495 intel_ring_emit(ring, 0); 1496 intel_ring_emit(ring, 0); 1497 intel_ring_emit(ring, MI_NOOP); 1498 intel_ring_advance(ring); 1499 return 0; 1500 } 1501 1502 static const struct intel_ring_buffer gen6_blt_ring = { 1503 .name = "blt ring", 1504 .id = BCS, 1505 .mmio_base = BLT_RING_BASE, 1506 .size = 32 * PAGE_SIZE, 1507 .init = init_ring_common, 1508 .write_tail = ring_write_tail, 1509 .flush = blt_ring_flush, 1510 .add_request = gen6_add_request, 1511 .get_seqno = gen6_ring_get_seqno, 1512 .irq_get = blt_ring_get_irq, 1513 .irq_put = blt_ring_put_irq, 1514 .dispatch_execbuffer = gen6_ring_dispatch_execbuffer, 1515 .sync_to = gen6_blt_ring_sync_to, 1516 .semaphore_register = {MI_SEMAPHORE_SYNC_BR, 1517 MI_SEMAPHORE_SYNC_BV, 1518 MI_SEMAPHORE_SYNC_INVALID}, 1519 .signal_mbox = {GEN6_RBSYNC, GEN6_VBSYNC}, 1520 }; 1521 1522 int intel_init_render_ring_buffer(struct drm_device *dev) 1523 { 1524 drm_i915_private_t *dev_priv = dev->dev_private; 1525 struct intel_ring_buffer *ring = &dev_priv->ring[RCS]; 1526 1527 *ring = render_ring; 1528 if (INTEL_INFO(dev)->gen >= 6) { 1529 ring->add_request = gen6_add_request; 1530 ring->flush = gen6_render_ring_flush; 1531 ring->irq_get = gen6_render_ring_get_irq; 1532 ring->irq_put = gen6_render_ring_put_irq; 1533 ring->get_seqno = gen6_ring_get_seqno; 1534 } else if (IS_GEN5(dev)) { 1535 ring->add_request = pc_render_add_request; 1536 ring->get_seqno = pc_render_get_seqno; 1537 } 1538 1539 if (!I915_NEED_GFX_HWS(dev)) { 1540 ring->status_page.page_addr = dev_priv->status_page_dmah->vaddr; 1541 memset(ring->status_page.page_addr, 0, PAGE_SIZE); 1542 } 1543 1544 return intel_init_ring_buffer(dev, ring); 1545 } 1546 1547 int intel_render_ring_init_dri(struct drm_device *dev, uint64_t start, 1548 uint32_t size) 1549 { 1550 drm_i915_private_t *dev_priv = dev->dev_private; 1551 struct intel_ring_buffer *ring = &dev_priv->ring[RCS]; 1552 1553 *ring = render_ring; 1554 if (INTEL_INFO(dev)->gen >= 6) { 1555 ring->add_request = gen6_add_request; 1556 ring->irq_get = gen6_render_ring_get_irq; 1557 ring->irq_put = gen6_render_ring_put_irq; 1558 } else if (IS_GEN5(dev)) { 1559 ring->add_request = pc_render_add_request; 1560 ring->get_seqno = pc_render_get_seqno; 1561 } 1562 1563 ring->dev = dev; 1564 INIT_LIST_HEAD(&ring->active_list); 1565 INIT_LIST_HEAD(&ring->request_list); 1566 INIT_LIST_HEAD(&ring->gpu_write_list); 1567 1568 ring->size = size; 1569 ring->effective_size = ring->size; 1570 if (IS_I830(ring->dev)) 1571 ring->effective_size -= 128; 1572 1573 ring->map.offset = start; 1574 ring->map.size = size; 1575 ring->map.type = 0; 1576 ring->map.flags = 0; 1577 ring->map.mtrr = 0; 1578 1579 drm_core_ioremap_wc(&ring->map, dev); 1580 if (ring->map.virtual == NULL) { 1581 DRM_ERROR("can not ioremap virtual address for" 1582 " ring buffer\n"); 1583 return -ENOMEM; 1584 } 1585 1586 ring->virtual_start = (void *)ring->map.virtual; 1587 return 0; 1588 } 1589 1590 int intel_init_bsd_ring_buffer(struct drm_device *dev) 1591 { 1592 drm_i915_private_t *dev_priv = dev->dev_private; 1593 struct intel_ring_buffer *ring = &dev_priv->ring[VCS]; 1594 1595 if (IS_GEN6(dev) || IS_GEN7(dev)) 1596 *ring = gen6_bsd_ring; 1597 else 1598 *ring = bsd_ring; 1599 1600 return intel_init_ring_buffer(dev, ring); 1601 } 1602 1603 int intel_init_blt_ring_buffer(struct drm_device *dev) 1604 { 1605 drm_i915_private_t *dev_priv = dev->dev_private; 1606 struct intel_ring_buffer *ring = &dev_priv->ring[BCS]; 1607 1608 *ring = gen6_blt_ring; 1609 1610 return intel_init_ring_buffer(dev, ring); 1611 } 1612 1613 int 1614 intel_ring_flush_all_caches(struct intel_ring_buffer *ring) 1615 { 1616 int ret; 1617 1618 if (!ring->gpu_caches_dirty) 1619 return 0; 1620 1621 ret = ring->flush(ring, 0, I915_GEM_GPU_DOMAINS); 1622 if (ret) 1623 return ret; 1624 1625 ring->gpu_caches_dirty = false; 1626 return 0; 1627 } 1628 1629 int 1630 intel_ring_invalidate_all_caches(struct intel_ring_buffer *ring) 1631 { 1632 uint32_t flush_domains; 1633 int ret; 1634 1635 flush_domains = 0; 1636 if (ring->gpu_caches_dirty) 1637 flush_domains = I915_GEM_GPU_DOMAINS; 1638 1639 ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, flush_domains); 1640 if (ret) 1641 return ret; 1642 1643 ring->gpu_caches_dirty = false; 1644 return 0; 1645 } 1646