1 /* 2 * Copyright © 2008-2010 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Eric Anholt <eric@anholt.net> 25 * Zou Nan hai <nanhai.zou@intel.com> 26 * Xiang Hai hao<haihao.xiang@intel.com> 27 * 28 * $FreeBSD: head/sys/dev/drm2/i915/intel_ringbuffer.c 253709 2013-07-27 16:42:29Z kib $ 29 */ 30 31 #include <drm/drmP.h> 32 #include <drm/i915_drm.h> 33 #include "i915_drv.h" 34 #include "intel_drv.h" 35 #include "intel_ringbuffer.h" 36 #include <sys/sched.h> 37 38 /* 39 * 965+ support PIPE_CONTROL commands, which provide finer grained control 40 * over cache flushing. 41 */ 42 struct pipe_control { 43 struct drm_i915_gem_object *obj; 44 volatile u32 *cpu_page; 45 u32 gtt_offset; 46 }; 47 48 void 49 i915_trace_irq_get(struct intel_ring_buffer *ring, uint32_t seqno) 50 { 51 52 if (ring->trace_irq_seqno == 0) { 53 lockmgr(&ring->irq_lock, LK_EXCLUSIVE); 54 if (ring->irq_get(ring)) 55 ring->trace_irq_seqno = seqno; 56 lockmgr(&ring->irq_lock, LK_RELEASE); 57 } 58 } 59 60 static inline int ring_space(struct intel_ring_buffer *ring) 61 { 62 int space = (ring->head & HEAD_ADDR) - (ring->tail + 8); 63 if (space < 0) 64 space += ring->size; 65 return space; 66 } 67 68 static int 69 render_ring_flush(struct intel_ring_buffer *ring, 70 uint32_t invalidate_domains, 71 uint32_t flush_domains) 72 { 73 struct drm_device *dev = ring->dev; 74 uint32_t cmd; 75 int ret; 76 77 /* 78 * read/write caches: 79 * 80 * I915_GEM_DOMAIN_RENDER is always invalidated, but is 81 * only flushed if MI_NO_WRITE_FLUSH is unset. On 965, it is 82 * also flushed at 2d versus 3d pipeline switches. 83 * 84 * read-only caches: 85 * 86 * I915_GEM_DOMAIN_SAMPLER is flushed on pre-965 if 87 * MI_READ_FLUSH is set, and is always flushed on 965. 88 * 89 * I915_GEM_DOMAIN_COMMAND may not exist? 90 * 91 * I915_GEM_DOMAIN_INSTRUCTION, which exists on 965, is 92 * invalidated when MI_EXE_FLUSH is set. 93 * 94 * I915_GEM_DOMAIN_VERTEX, which exists on 965, is 95 * invalidated with every MI_FLUSH. 96 * 97 * TLBs: 98 * 99 * On 965, TLBs associated with I915_GEM_DOMAIN_COMMAND 100 * and I915_GEM_DOMAIN_CPU in are invalidated at PTE write and 101 * I915_GEM_DOMAIN_RENDER and I915_GEM_DOMAIN_SAMPLER 102 * are flushed at any MI_FLUSH. 103 */ 104 105 cmd = MI_FLUSH | MI_NO_WRITE_FLUSH; 106 if ((invalidate_domains|flush_domains) & 107 I915_GEM_DOMAIN_RENDER) 108 cmd &= ~MI_NO_WRITE_FLUSH; 109 if (INTEL_INFO(dev)->gen < 4) { 110 /* 111 * On the 965, the sampler cache always gets flushed 112 * and this bit is reserved. 113 */ 114 if (invalidate_domains & I915_GEM_DOMAIN_SAMPLER) 115 cmd |= MI_READ_FLUSH; 116 } 117 if (invalidate_domains & I915_GEM_DOMAIN_INSTRUCTION) 118 cmd |= MI_EXE_FLUSH; 119 120 if (invalidate_domains & I915_GEM_DOMAIN_COMMAND && 121 (IS_G4X(dev) || IS_GEN5(dev))) 122 cmd |= MI_INVALIDATE_ISP; 123 124 ret = intel_ring_begin(ring, 2); 125 if (ret) 126 return ret; 127 128 intel_ring_emit(ring, cmd); 129 intel_ring_emit(ring, MI_NOOP); 130 intel_ring_advance(ring); 131 132 return 0; 133 } 134 135 /** 136 * Emits a PIPE_CONTROL with a non-zero post-sync operation, for 137 * implementing two workarounds on gen6. From section 1.4.7.1 138 * "PIPE_CONTROL" of the Sandy Bridge PRM volume 2 part 1: 139 * 140 * [DevSNB-C+{W/A}] Before any depth stall flush (including those 141 * produced by non-pipelined state commands), software needs to first 142 * send a PIPE_CONTROL with no bits set except Post-Sync Operation != 143 * 0. 144 * 145 * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush Enable 146 * =1, a PIPE_CONTROL with any non-zero post-sync-op is required. 147 * 148 * And the workaround for these two requires this workaround first: 149 * 150 * [Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent 151 * BEFORE the pipe-control with a post-sync op and no write-cache 152 * flushes. 153 * 154 * And this last workaround is tricky because of the requirements on 155 * that bit. From section 1.4.7.2.3 "Stall" of the Sandy Bridge PRM 156 * volume 2 part 1: 157 * 158 * "1 of the following must also be set: 159 * - Render Target Cache Flush Enable ([12] of DW1) 160 * - Depth Cache Flush Enable ([0] of DW1) 161 * - Stall at Pixel Scoreboard ([1] of DW1) 162 * - Depth Stall ([13] of DW1) 163 * - Post-Sync Operation ([13] of DW1) 164 * - Notify Enable ([8] of DW1)" 165 * 166 * The cache flushes require the workaround flush that triggered this 167 * one, so we can't use it. Depth stall would trigger the same. 168 * Post-sync nonzero is what triggered this second workaround, so we 169 * can't use that one either. Notify enable is IRQs, which aren't 170 * really our business. That leaves only stall at scoreboard. 171 */ 172 static int 173 intel_emit_post_sync_nonzero_flush(struct intel_ring_buffer *ring) 174 { 175 struct pipe_control *pc = ring->private; 176 u32 scratch_addr = pc->gtt_offset + 128; 177 int ret; 178 179 180 ret = intel_ring_begin(ring, 6); 181 if (ret) 182 return ret; 183 184 intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(5)); 185 intel_ring_emit(ring, PIPE_CONTROL_CS_STALL | 186 PIPE_CONTROL_STALL_AT_SCOREBOARD); 187 intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); /* address */ 188 intel_ring_emit(ring, 0); /* low dword */ 189 intel_ring_emit(ring, 0); /* high dword */ 190 intel_ring_emit(ring, MI_NOOP); 191 intel_ring_advance(ring); 192 193 ret = intel_ring_begin(ring, 6); 194 if (ret) 195 return ret; 196 197 intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(5)); 198 intel_ring_emit(ring, PIPE_CONTROL_QW_WRITE); 199 intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); /* address */ 200 intel_ring_emit(ring, 0); 201 intel_ring_emit(ring, 0); 202 intel_ring_emit(ring, MI_NOOP); 203 intel_ring_advance(ring); 204 205 return 0; 206 } 207 208 static int 209 gen6_render_ring_flush(struct intel_ring_buffer *ring, 210 u32 invalidate_domains, u32 flush_domains) 211 { 212 u32 flags = 0; 213 struct pipe_control *pc = ring->private; 214 u32 scratch_addr = pc->gtt_offset + 128; 215 int ret; 216 217 /* Force SNB workarounds for PIPE_CONTROL flushes */ 218 intel_emit_post_sync_nonzero_flush(ring); 219 220 /* Just flush everything. Experiments have shown that reducing the 221 * number of bits based on the write domains has little performance 222 * impact. 223 */ 224 flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; 225 flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE; 226 flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE; 227 flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; 228 flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE; 229 flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE; 230 flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE; 231 232 ret = intel_ring_begin(ring, 6); 233 if (ret) 234 return ret; 235 236 intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(5)); 237 intel_ring_emit(ring, flags); 238 intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); 239 intel_ring_emit(ring, 0); /* lower dword */ 240 intel_ring_emit(ring, 0); /* uppwer dword */ 241 intel_ring_emit(ring, MI_NOOP); 242 intel_ring_advance(ring); 243 244 return 0; 245 } 246 247 static void ring_write_tail(struct intel_ring_buffer *ring, 248 uint32_t value) 249 { 250 drm_i915_private_t *dev_priv = ring->dev->dev_private; 251 I915_WRITE_TAIL(ring, value); 252 } 253 254 u32 intel_ring_get_active_head(struct intel_ring_buffer *ring) 255 { 256 drm_i915_private_t *dev_priv = ring->dev->dev_private; 257 uint32_t acthd_reg = INTEL_INFO(ring->dev)->gen >= 4 ? 258 RING_ACTHD(ring->mmio_base) : ACTHD; 259 260 return I915_READ(acthd_reg); 261 } 262 263 static int init_ring_common(struct intel_ring_buffer *ring) 264 { 265 drm_i915_private_t *dev_priv = ring->dev->dev_private; 266 struct drm_i915_gem_object *obj = ring->obj; 267 uint32_t head; 268 269 /* Stop the ring if it's running. */ 270 I915_WRITE_CTL(ring, 0); 271 I915_WRITE_HEAD(ring, 0); 272 ring->write_tail(ring, 0); 273 274 /* Initialize the ring. */ 275 I915_WRITE_START(ring, obj->gtt_offset); 276 head = I915_READ_HEAD(ring) & HEAD_ADDR; 277 278 /* G45 ring initialization fails to reset head to zero */ 279 if (head != 0) { 280 DRM_DEBUG("%s head not reset to zero " 281 "ctl %08x head %08x tail %08x start %08x\n", 282 ring->name, 283 I915_READ_CTL(ring), 284 I915_READ_HEAD(ring), 285 I915_READ_TAIL(ring), 286 I915_READ_START(ring)); 287 288 I915_WRITE_HEAD(ring, 0); 289 290 if (I915_READ_HEAD(ring) & HEAD_ADDR) { 291 DRM_ERROR("failed to set %s head to zero " 292 "ctl %08x head %08x tail %08x start %08x\n", 293 ring->name, 294 I915_READ_CTL(ring), 295 I915_READ_HEAD(ring), 296 I915_READ_TAIL(ring), 297 I915_READ_START(ring)); 298 } 299 } 300 301 I915_WRITE_CTL(ring, 302 ((ring->size - PAGE_SIZE) & RING_NR_PAGES) 303 | RING_VALID); 304 305 /* If the head is still not zero, the ring is dead */ 306 if (_intel_wait_for(ring->dev, 307 (I915_READ_CTL(ring) & RING_VALID) != 0 && 308 I915_READ_START(ring) == obj->gtt_offset && 309 (I915_READ_HEAD(ring) & HEAD_ADDR) == 0, 310 50, 1, "915rii")) { 311 DRM_ERROR("%s initialization failed " 312 "ctl %08x head %08x tail %08x start %08x\n", 313 ring->name, 314 I915_READ_CTL(ring), 315 I915_READ_HEAD(ring), 316 I915_READ_TAIL(ring), 317 I915_READ_START(ring)); 318 return -EIO; 319 } 320 321 if (!drm_core_check_feature(ring->dev, DRIVER_MODESET)) 322 i915_kernel_lost_context(ring->dev); 323 else { 324 ring->head = I915_READ_HEAD(ring); 325 ring->tail = I915_READ_TAIL(ring) & TAIL_ADDR; 326 ring->space = ring_space(ring); 327 } 328 329 return 0; 330 } 331 332 static int 333 init_pipe_control(struct intel_ring_buffer *ring) 334 { 335 struct pipe_control *pc; 336 struct drm_i915_gem_object *obj; 337 int ret; 338 339 if (ring->private) 340 return 0; 341 342 pc = kmalloc(sizeof(*pc), DRM_I915_GEM, M_WAITOK); 343 if (!pc) 344 return -ENOMEM; 345 346 obj = i915_gem_alloc_object(ring->dev, 4096); 347 if (obj == NULL) { 348 DRM_ERROR("Failed to allocate seqno page\n"); 349 ret = -ENOMEM; 350 goto err; 351 } 352 353 i915_gem_object_set_cache_level(obj, I915_CACHE_LLC); 354 355 ret = i915_gem_object_pin(obj, 4096, true); 356 if (ret) 357 goto err_unref; 358 359 pc->gtt_offset = obj->gtt_offset; 360 pc->cpu_page = (uint32_t *)kmem_alloc_nofault(&kernel_map, PAGE_SIZE, PAGE_SIZE); 361 if (pc->cpu_page == NULL) 362 goto err_unpin; 363 pmap_qenter((uintptr_t)pc->cpu_page, &obj->pages[0], 1); 364 pmap_invalidate_cache_range((vm_offset_t)pc->cpu_page, 365 (vm_offset_t)pc->cpu_page + PAGE_SIZE); 366 367 pc->obj = obj; 368 ring->private = pc; 369 return 0; 370 371 err_unpin: 372 i915_gem_object_unpin(obj); 373 err_unref: 374 drm_gem_object_unreference(&obj->base); 375 err: 376 drm_free(pc, DRM_I915_GEM); 377 return ret; 378 } 379 380 static void 381 cleanup_pipe_control(struct intel_ring_buffer *ring) 382 { 383 struct pipe_control *pc = ring->private; 384 struct drm_i915_gem_object *obj; 385 386 if (!ring->private) 387 return; 388 389 obj = pc->obj; 390 pmap_qremove((vm_offset_t)pc->cpu_page, 1); 391 kmem_free(&kernel_map, (uintptr_t)pc->cpu_page, PAGE_SIZE); 392 i915_gem_object_unpin(obj); 393 drm_gem_object_unreference(&obj->base); 394 395 drm_free(pc, DRM_I915_GEM); 396 ring->private = NULL; 397 } 398 399 static int init_render_ring(struct intel_ring_buffer *ring) 400 { 401 struct drm_device *dev = ring->dev; 402 struct drm_i915_private *dev_priv = dev->dev_private; 403 int ret = init_ring_common(ring); 404 405 if (INTEL_INFO(dev)->gen > 3) { 406 int mode = VS_TIMER_DISPATCH << 16 | VS_TIMER_DISPATCH; 407 I915_WRITE(MI_MODE, mode); 408 if (IS_GEN7(dev)) 409 I915_WRITE(GFX_MODE_GEN7, 410 GFX_MODE_DISABLE(GFX_TLB_INVALIDATE_ALWAYS) | 411 GFX_MODE_ENABLE(GFX_REPLAY_MODE)); 412 } 413 414 if (INTEL_INFO(dev)->gen >= 5) { 415 ret = init_pipe_control(ring); 416 if (ret) 417 return ret; 418 } 419 420 421 if (IS_GEN6(dev)) { 422 /* From the Sandybridge PRM, volume 1 part 3, page 24: 423 * "If this bit is set, STCunit will have LRA as replacement 424 * policy. [...] This bit must be reset. LRA replacement 425 * policy is not supported." 426 */ 427 I915_WRITE(CACHE_MODE_0, 428 CM0_STC_EVICT_DISABLE_LRA_SNB << CM0_MASK_SHIFT); 429 } 430 431 if (INTEL_INFO(dev)->gen >= 6) { 432 I915_WRITE(INSTPM, 433 INSTPM_FORCE_ORDERING << 16 | INSTPM_FORCE_ORDERING); 434 } 435 436 return ret; 437 } 438 439 static void render_ring_cleanup(struct intel_ring_buffer *ring) 440 { 441 if (!ring->private) 442 return; 443 444 cleanup_pipe_control(ring); 445 } 446 447 static void 448 update_mboxes(struct intel_ring_buffer *ring, 449 u32 seqno, 450 u32 mmio_offset) 451 { 452 intel_ring_emit(ring, MI_SEMAPHORE_MBOX | 453 MI_SEMAPHORE_GLOBAL_GTT | 454 MI_SEMAPHORE_REGISTER | 455 MI_SEMAPHORE_UPDATE); 456 intel_ring_emit(ring, seqno); 457 intel_ring_emit(ring, mmio_offset); 458 } 459 460 /** 461 * gen6_add_request - Update the semaphore mailbox registers 462 * 463 * @ring - ring that is adding a request 464 * @seqno - return seqno stuck into the ring 465 * 466 * Update the mailbox registers in the *other* rings with the current seqno. 467 * This acts like a signal in the canonical semaphore. 468 */ 469 static int 470 gen6_add_request(struct intel_ring_buffer *ring, 471 u32 *seqno) 472 { 473 u32 mbox1_reg; 474 u32 mbox2_reg; 475 int ret; 476 477 ret = intel_ring_begin(ring, 10); 478 if (ret) 479 return ret; 480 481 mbox1_reg = ring->signal_mbox[0]; 482 mbox2_reg = ring->signal_mbox[1]; 483 484 *seqno = i915_gem_next_request_seqno(ring); 485 486 update_mboxes(ring, *seqno, mbox1_reg); 487 update_mboxes(ring, *seqno, mbox2_reg); 488 intel_ring_emit(ring, MI_STORE_DWORD_INDEX); 489 intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT); 490 intel_ring_emit(ring, *seqno); 491 intel_ring_emit(ring, MI_USER_INTERRUPT); 492 intel_ring_advance(ring); 493 494 return 0; 495 } 496 497 /** 498 * intel_ring_sync - sync the waiter to the signaller on seqno 499 * 500 * @waiter - ring that is waiting 501 * @signaller - ring which has, or will signal 502 * @seqno - seqno which the waiter will block on 503 */ 504 static int 505 intel_ring_sync(struct intel_ring_buffer *waiter, 506 struct intel_ring_buffer *signaller, 507 int ring, 508 u32 seqno) 509 { 510 int ret; 511 u32 dw1 = MI_SEMAPHORE_MBOX | 512 MI_SEMAPHORE_COMPARE | 513 MI_SEMAPHORE_REGISTER; 514 515 ret = intel_ring_begin(waiter, 4); 516 if (ret) 517 return ret; 518 519 intel_ring_emit(waiter, dw1 | signaller->semaphore_register[ring]); 520 intel_ring_emit(waiter, seqno); 521 intel_ring_emit(waiter, 0); 522 intel_ring_emit(waiter, MI_NOOP); 523 intel_ring_advance(waiter); 524 525 return 0; 526 } 527 528 int render_ring_sync_to(struct intel_ring_buffer *waiter, 529 struct intel_ring_buffer *signaller, u32 seqno); 530 int gen6_bsd_ring_sync_to(struct intel_ring_buffer *waiter, 531 struct intel_ring_buffer *signaller, u32 seqno); 532 int gen6_blt_ring_sync_to(struct intel_ring_buffer *waiter, 533 struct intel_ring_buffer *signaller, u32 seqno); 534 535 /* VCS->RCS (RVSYNC) or BCS->RCS (RBSYNC) */ 536 int 537 render_ring_sync_to(struct intel_ring_buffer *waiter, 538 struct intel_ring_buffer *signaller, 539 u32 seqno) 540 { 541 KASSERT(signaller->semaphore_register[RCS] != MI_SEMAPHORE_SYNC_INVALID, 542 ("valid RCS semaphore")); 543 return intel_ring_sync(waiter, 544 signaller, 545 RCS, 546 seqno); 547 } 548 549 /* RCS->VCS (VRSYNC) or BCS->VCS (VBSYNC) */ 550 int 551 gen6_bsd_ring_sync_to(struct intel_ring_buffer *waiter, 552 struct intel_ring_buffer *signaller, 553 u32 seqno) 554 { 555 KASSERT(signaller->semaphore_register[VCS] != MI_SEMAPHORE_SYNC_INVALID, 556 ("Valid VCS semaphore")); 557 return intel_ring_sync(waiter, 558 signaller, 559 VCS, 560 seqno); 561 } 562 563 /* RCS->BCS (BRSYNC) or VCS->BCS (BVSYNC) */ 564 int 565 gen6_blt_ring_sync_to(struct intel_ring_buffer *waiter, 566 struct intel_ring_buffer *signaller, 567 u32 seqno) 568 { 569 KASSERT(signaller->semaphore_register[BCS] != MI_SEMAPHORE_SYNC_INVALID, 570 ("Valid BCS semaphore")); 571 return intel_ring_sync(waiter, 572 signaller, 573 BCS, 574 seqno); 575 } 576 577 #define PIPE_CONTROL_FLUSH(ring__, addr__) \ 578 do { \ 579 intel_ring_emit(ring__, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE | \ 580 PIPE_CONTROL_DEPTH_STALL); \ 581 intel_ring_emit(ring__, (addr__) | PIPE_CONTROL_GLOBAL_GTT); \ 582 intel_ring_emit(ring__, 0); \ 583 intel_ring_emit(ring__, 0); \ 584 } while (0) 585 586 static int 587 pc_render_add_request(struct intel_ring_buffer *ring, 588 uint32_t *result) 589 { 590 u32 seqno = i915_gem_next_request_seqno(ring); 591 struct pipe_control *pc = ring->private; 592 u32 scratch_addr = pc->gtt_offset + 128; 593 int ret; 594 595 /* For Ironlake, MI_USER_INTERRUPT was deprecated and apparently 596 * incoherent with writes to memory, i.e. completely fubar, 597 * so we need to use PIPE_NOTIFY instead. 598 * 599 * However, we also need to workaround the qword write 600 * incoherence by flushing the 6 PIPE_NOTIFY buffers out to 601 * memory before requesting an interrupt. 602 */ 603 ret = intel_ring_begin(ring, 32); 604 if (ret) 605 return ret; 606 607 intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE | 608 PIPE_CONTROL_WRITE_FLUSH | 609 PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE); 610 intel_ring_emit(ring, pc->gtt_offset | PIPE_CONTROL_GLOBAL_GTT); 611 intel_ring_emit(ring, seqno); 612 intel_ring_emit(ring, 0); 613 PIPE_CONTROL_FLUSH(ring, scratch_addr); 614 scratch_addr += 128; /* write to separate cachelines */ 615 PIPE_CONTROL_FLUSH(ring, scratch_addr); 616 scratch_addr += 128; 617 PIPE_CONTROL_FLUSH(ring, scratch_addr); 618 scratch_addr += 128; 619 PIPE_CONTROL_FLUSH(ring, scratch_addr); 620 scratch_addr += 128; 621 PIPE_CONTROL_FLUSH(ring, scratch_addr); 622 scratch_addr += 128; 623 PIPE_CONTROL_FLUSH(ring, scratch_addr); 624 intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE | 625 PIPE_CONTROL_WRITE_FLUSH | 626 PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE | 627 PIPE_CONTROL_NOTIFY); 628 intel_ring_emit(ring, pc->gtt_offset | PIPE_CONTROL_GLOBAL_GTT); 629 intel_ring_emit(ring, seqno); 630 intel_ring_emit(ring, 0); 631 intel_ring_advance(ring); 632 633 *result = seqno; 634 return 0; 635 } 636 637 static int 638 render_ring_add_request(struct intel_ring_buffer *ring, 639 uint32_t *result) 640 { 641 u32 seqno = i915_gem_next_request_seqno(ring); 642 int ret; 643 644 ret = intel_ring_begin(ring, 4); 645 if (ret) 646 return ret; 647 648 intel_ring_emit(ring, MI_STORE_DWORD_INDEX); 649 intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT); 650 intel_ring_emit(ring, seqno); 651 intel_ring_emit(ring, MI_USER_INTERRUPT); 652 intel_ring_advance(ring); 653 654 *result = seqno; 655 return 0; 656 } 657 658 static u32 659 gen6_ring_get_seqno(struct intel_ring_buffer *ring) 660 { 661 struct drm_device *dev = ring->dev; 662 663 /* Workaround to force correct ordering between irq and seqno writes on 664 * ivb (and maybe also on snb) by reading from a CS register (like 665 * ACTHD) before reading the status page. */ 666 if (/* IS_GEN6(dev) || */IS_GEN7(dev)) 667 intel_ring_get_active_head(ring); 668 return intel_read_status_page(ring, I915_GEM_HWS_INDEX); 669 } 670 671 static uint32_t 672 ring_get_seqno(struct intel_ring_buffer *ring) 673 { 674 if (ring->status_page.page_addr == NULL) 675 return (-1); 676 return intel_read_status_page(ring, I915_GEM_HWS_INDEX); 677 } 678 679 static uint32_t 680 pc_render_get_seqno(struct intel_ring_buffer *ring) 681 { 682 struct pipe_control *pc = ring->private; 683 if (pc != NULL) 684 return pc->cpu_page[0]; 685 else 686 return (-1); 687 } 688 689 static void 690 ironlake_enable_irq(drm_i915_private_t *dev_priv, uint32_t mask) 691 { 692 dev_priv->gt_irq_mask &= ~mask; 693 I915_WRITE(GTIMR, dev_priv->gt_irq_mask); 694 POSTING_READ(GTIMR); 695 } 696 697 static void 698 ironlake_disable_irq(drm_i915_private_t *dev_priv, uint32_t mask) 699 { 700 dev_priv->gt_irq_mask |= mask; 701 I915_WRITE(GTIMR, dev_priv->gt_irq_mask); 702 POSTING_READ(GTIMR); 703 } 704 705 static void 706 i915_enable_irq(drm_i915_private_t *dev_priv, uint32_t mask) 707 { 708 dev_priv->irq_mask &= ~mask; 709 I915_WRITE(IMR, dev_priv->irq_mask); 710 POSTING_READ(IMR); 711 } 712 713 static void 714 i915_disable_irq(drm_i915_private_t *dev_priv, uint32_t mask) 715 { 716 dev_priv->irq_mask |= mask; 717 I915_WRITE(IMR, dev_priv->irq_mask); 718 POSTING_READ(IMR); 719 } 720 721 static bool 722 render_ring_get_irq(struct intel_ring_buffer *ring) 723 { 724 struct drm_device *dev = ring->dev; 725 drm_i915_private_t *dev_priv = dev->dev_private; 726 727 if (!dev->irq_enabled) 728 return false; 729 730 KKASSERT(lockstatus(&ring->irq_lock, curthread) != 0); 731 if (ring->irq_refcount++ == 0) { 732 if (HAS_PCH_SPLIT(dev)) 733 ironlake_enable_irq(dev_priv, 734 GT_PIPE_NOTIFY | GT_USER_INTERRUPT); 735 else 736 i915_enable_irq(dev_priv, I915_USER_INTERRUPT); 737 } 738 739 return true; 740 } 741 742 static void 743 render_ring_put_irq(struct intel_ring_buffer *ring) 744 { 745 struct drm_device *dev = ring->dev; 746 drm_i915_private_t *dev_priv = dev->dev_private; 747 748 KKASSERT(lockstatus(&ring->irq_lock, curthread) != 0); 749 if (--ring->irq_refcount == 0) { 750 if (HAS_PCH_SPLIT(dev)) 751 ironlake_disable_irq(dev_priv, 752 GT_USER_INTERRUPT | 753 GT_PIPE_NOTIFY); 754 else 755 i915_disable_irq(dev_priv, I915_USER_INTERRUPT); 756 } 757 } 758 759 void intel_ring_setup_status_page(struct intel_ring_buffer *ring) 760 { 761 struct drm_device *dev = ring->dev; 762 drm_i915_private_t *dev_priv = dev->dev_private; 763 uint32_t mmio = 0; 764 765 /* The ring status page addresses are no longer next to the rest of 766 * the ring registers as of gen7. 767 */ 768 if (IS_GEN7(dev)) { 769 switch (ring->id) { 770 case RCS: 771 mmio = RENDER_HWS_PGA_GEN7; 772 break; 773 case BCS: 774 mmio = BLT_HWS_PGA_GEN7; 775 break; 776 case VCS: 777 mmio = BSD_HWS_PGA_GEN7; 778 break; 779 } 780 } else if (IS_GEN6(dev)) { 781 mmio = RING_HWS_PGA_GEN6(ring->mmio_base); 782 } else { 783 mmio = RING_HWS_PGA(ring->mmio_base); 784 } 785 786 I915_WRITE(mmio, (u32)ring->status_page.gfx_addr); 787 POSTING_READ(mmio); 788 } 789 790 static int 791 bsd_ring_flush(struct intel_ring_buffer *ring, 792 uint32_t invalidate_domains, 793 uint32_t flush_domains) 794 { 795 int ret; 796 797 ret = intel_ring_begin(ring, 2); 798 if (ret) 799 return ret; 800 801 intel_ring_emit(ring, MI_FLUSH); 802 intel_ring_emit(ring, MI_NOOP); 803 intel_ring_advance(ring); 804 return 0; 805 } 806 807 static int 808 ring_add_request(struct intel_ring_buffer *ring, 809 uint32_t *result) 810 { 811 uint32_t seqno; 812 int ret; 813 814 ret = intel_ring_begin(ring, 4); 815 if (ret) 816 return ret; 817 818 seqno = i915_gem_next_request_seqno(ring); 819 820 intel_ring_emit(ring, MI_STORE_DWORD_INDEX); 821 intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT); 822 intel_ring_emit(ring, seqno); 823 intel_ring_emit(ring, MI_USER_INTERRUPT); 824 intel_ring_advance(ring); 825 826 *result = seqno; 827 return 0; 828 } 829 830 static bool 831 gen6_ring_get_irq(struct intel_ring_buffer *ring, uint32_t gflag, uint32_t rflag) 832 { 833 struct drm_device *dev = ring->dev; 834 drm_i915_private_t *dev_priv = dev->dev_private; 835 836 if (!dev->irq_enabled) 837 return false; 838 839 gen6_gt_force_wake_get(dev_priv); 840 841 KKASSERT(lockstatus(&ring->irq_lock, curthread) != 0); 842 if (ring->irq_refcount++ == 0) { 843 ring->irq_mask &= ~rflag; 844 I915_WRITE_IMR(ring, ring->irq_mask); 845 ironlake_enable_irq(dev_priv, gflag); 846 } 847 848 return true; 849 } 850 851 static void 852 gen6_ring_put_irq(struct intel_ring_buffer *ring, uint32_t gflag, uint32_t rflag) 853 { 854 struct drm_device *dev = ring->dev; 855 drm_i915_private_t *dev_priv = dev->dev_private; 856 857 KKASSERT(lockstatus(&ring->irq_lock, curthread) != 0); 858 if (--ring->irq_refcount == 0) { 859 ring->irq_mask |= rflag; 860 I915_WRITE_IMR(ring, ring->irq_mask); 861 ironlake_disable_irq(dev_priv, gflag); 862 } 863 864 gen6_gt_force_wake_put(dev_priv); 865 } 866 867 static bool 868 bsd_ring_get_irq(struct intel_ring_buffer *ring) 869 { 870 struct drm_device *dev = ring->dev; 871 drm_i915_private_t *dev_priv = dev->dev_private; 872 873 if (!dev->irq_enabled) 874 return false; 875 876 KKASSERT(lockstatus(&ring->irq_lock, curthread) != 0); 877 if (ring->irq_refcount++ == 0) { 878 if (IS_G4X(dev)) 879 i915_enable_irq(dev_priv, I915_BSD_USER_INTERRUPT); 880 else 881 ironlake_enable_irq(dev_priv, GT_BSD_USER_INTERRUPT); 882 } 883 884 return true; 885 } 886 static void 887 bsd_ring_put_irq(struct intel_ring_buffer *ring) 888 { 889 struct drm_device *dev = ring->dev; 890 drm_i915_private_t *dev_priv = dev->dev_private; 891 892 KKASSERT(lockstatus(&ring->irq_lock, curthread) != 0); 893 if (--ring->irq_refcount == 0) { 894 if (IS_G4X(dev)) 895 i915_disable_irq(dev_priv, I915_BSD_USER_INTERRUPT); 896 else 897 ironlake_disable_irq(dev_priv, GT_BSD_USER_INTERRUPT); 898 } 899 } 900 901 static int 902 ring_dispatch_execbuffer(struct intel_ring_buffer *ring, uint32_t offset, 903 uint32_t length) 904 { 905 int ret; 906 907 ret = intel_ring_begin(ring, 2); 908 if (ret) 909 return ret; 910 911 intel_ring_emit(ring, 912 MI_BATCH_BUFFER_START | (2 << 6) | 913 MI_BATCH_NON_SECURE_I965); 914 intel_ring_emit(ring, offset); 915 intel_ring_advance(ring); 916 917 return 0; 918 } 919 920 static int 921 render_ring_dispatch_execbuffer(struct intel_ring_buffer *ring, 922 uint32_t offset, uint32_t len) 923 { 924 struct drm_device *dev = ring->dev; 925 int ret; 926 927 if (IS_I830(dev) || IS_845G(dev)) { 928 ret = intel_ring_begin(ring, 4); 929 if (ret) 930 return ret; 931 932 intel_ring_emit(ring, MI_BATCH_BUFFER); 933 intel_ring_emit(ring, offset | MI_BATCH_NON_SECURE); 934 intel_ring_emit(ring, offset + len - 8); 935 intel_ring_emit(ring, 0); 936 } else { 937 ret = intel_ring_begin(ring, 2); 938 if (ret) 939 return ret; 940 941 if (INTEL_INFO(dev)->gen >= 4) { 942 intel_ring_emit(ring, 943 MI_BATCH_BUFFER_START | (2 << 6) | 944 MI_BATCH_NON_SECURE_I965); 945 intel_ring_emit(ring, offset); 946 } else { 947 intel_ring_emit(ring, 948 MI_BATCH_BUFFER_START | (2 << 6)); 949 intel_ring_emit(ring, offset | MI_BATCH_NON_SECURE); 950 } 951 } 952 intel_ring_advance(ring); 953 954 return 0; 955 } 956 957 static void cleanup_status_page(struct intel_ring_buffer *ring) 958 { 959 drm_i915_private_t *dev_priv = ring->dev->dev_private; 960 struct drm_i915_gem_object *obj; 961 962 obj = ring->status_page.obj; 963 if (obj == NULL) 964 return; 965 966 pmap_qremove((vm_offset_t)ring->status_page.page_addr, 1); 967 kmem_free(&kernel_map, (vm_offset_t)ring->status_page.page_addr, 968 PAGE_SIZE); 969 i915_gem_object_unpin(obj); 970 drm_gem_object_unreference(&obj->base); 971 ring->status_page.obj = NULL; 972 973 memset(&dev_priv->hws_map, 0, sizeof(dev_priv->hws_map)); 974 } 975 976 static int init_status_page(struct intel_ring_buffer *ring) 977 { 978 struct drm_device *dev = ring->dev; 979 drm_i915_private_t *dev_priv = dev->dev_private; 980 struct drm_i915_gem_object *obj; 981 int ret; 982 983 obj = i915_gem_alloc_object(dev, 4096); 984 if (obj == NULL) { 985 DRM_ERROR("Failed to allocate status page\n"); 986 ret = -ENOMEM; 987 goto err; 988 } 989 990 i915_gem_object_set_cache_level(obj, I915_CACHE_LLC); 991 992 ret = i915_gem_object_pin(obj, 4096, true); 993 if (ret != 0) { 994 goto err_unref; 995 } 996 997 ring->status_page.gfx_addr = obj->gtt_offset; 998 ring->status_page.page_addr = (void *)kmem_alloc_nofault(&kernel_map, 999 PAGE_SIZE, PAGE_SIZE); 1000 if (ring->status_page.page_addr == NULL) { 1001 memset(&dev_priv->hws_map, 0, sizeof(dev_priv->hws_map)); 1002 goto err_unpin; 1003 } 1004 pmap_qenter((vm_offset_t)ring->status_page.page_addr, &obj->pages[0], 1005 1); 1006 pmap_invalidate_cache_range((vm_offset_t)ring->status_page.page_addr, 1007 (vm_offset_t)ring->status_page.page_addr + PAGE_SIZE); 1008 ring->status_page.obj = obj; 1009 memset(ring->status_page.page_addr, 0, PAGE_SIZE); 1010 1011 intel_ring_setup_status_page(ring); 1012 DRM_DEBUG("i915: init_status_page %s hws offset: 0x%08x\n", 1013 ring->name, ring->status_page.gfx_addr); 1014 1015 return 0; 1016 1017 err_unpin: 1018 i915_gem_object_unpin(obj); 1019 err_unref: 1020 drm_gem_object_unreference(&obj->base); 1021 err: 1022 return ret; 1023 } 1024 1025 static 1026 int intel_init_ring_buffer(struct drm_device *dev, 1027 struct intel_ring_buffer *ring) 1028 { 1029 struct drm_i915_gem_object *obj; 1030 int ret; 1031 1032 ring->dev = dev; 1033 INIT_LIST_HEAD(&ring->active_list); 1034 INIT_LIST_HEAD(&ring->request_list); 1035 INIT_LIST_HEAD(&ring->gpu_write_list); 1036 1037 lockinit(&ring->irq_lock, "ringb", 0, LK_CANRECURSE); 1038 ring->irq_mask = ~0; 1039 1040 if (I915_NEED_GFX_HWS(dev)) { 1041 ret = init_status_page(ring); 1042 if (ret) 1043 return ret; 1044 } 1045 1046 obj = i915_gem_alloc_object(dev, ring->size); 1047 if (obj == NULL) { 1048 DRM_ERROR("Failed to allocate ringbuffer\n"); 1049 ret = -ENOMEM; 1050 goto err_hws; 1051 } 1052 1053 ring->obj = obj; 1054 1055 ret = i915_gem_object_pin(obj, PAGE_SIZE, true); 1056 if (ret) 1057 goto err_unref; 1058 1059 ring->map.size = ring->size; 1060 ring->map.offset = dev->agp->base + obj->gtt_offset; 1061 ring->map.type = 0; 1062 ring->map.flags = 0; 1063 ring->map.mtrr = 0; 1064 1065 drm_core_ioremap_wc(&ring->map, dev); 1066 if (ring->map.virtual == NULL) { 1067 DRM_ERROR("Failed to map ringbuffer.\n"); 1068 ret = -EINVAL; 1069 goto err_unpin; 1070 } 1071 1072 ring->virtual_start = ring->map.virtual; 1073 ret = ring->init(ring); 1074 if (ret) 1075 goto err_unmap; 1076 1077 /* Workaround an erratum on the i830 which causes a hang if 1078 * the TAIL pointer points to within the last 2 cachelines 1079 * of the buffer. 1080 */ 1081 ring->effective_size = ring->size; 1082 if (IS_I830(ring->dev) || IS_845G(ring->dev)) 1083 ring->effective_size -= 128; 1084 1085 return 0; 1086 1087 err_unmap: 1088 drm_core_ioremapfree(&ring->map, dev); 1089 err_unpin: 1090 i915_gem_object_unpin(obj); 1091 err_unref: 1092 drm_gem_object_unreference(&obj->base); 1093 ring->obj = NULL; 1094 err_hws: 1095 cleanup_status_page(ring); 1096 return ret; 1097 } 1098 1099 void intel_cleanup_ring_buffer(struct intel_ring_buffer *ring) 1100 { 1101 struct drm_i915_private *dev_priv; 1102 int ret; 1103 1104 if (ring->obj == NULL) 1105 return; 1106 1107 /* Disable the ring buffer. The ring must be idle at this point */ 1108 dev_priv = ring->dev->dev_private; 1109 ret = intel_wait_ring_idle(ring); 1110 I915_WRITE_CTL(ring, 0); 1111 1112 drm_core_ioremapfree(&ring->map, ring->dev); 1113 1114 i915_gem_object_unpin(ring->obj); 1115 drm_gem_object_unreference(&ring->obj->base); 1116 ring->obj = NULL; 1117 1118 if (ring->cleanup) 1119 ring->cleanup(ring); 1120 1121 cleanup_status_page(ring); 1122 } 1123 1124 static int intel_wrap_ring_buffer(struct intel_ring_buffer *ring) 1125 { 1126 unsigned int *virt; 1127 int rem = ring->size - ring->tail; 1128 1129 if (ring->space < rem) { 1130 int ret = intel_wait_ring_buffer(ring, rem); 1131 if (ret) 1132 return ret; 1133 } 1134 1135 virt = (unsigned int *)((char *)ring->virtual_start + ring->tail); 1136 rem /= 8; 1137 while (rem--) { 1138 *virt++ = MI_NOOP; 1139 *virt++ = MI_NOOP; 1140 } 1141 1142 ring->tail = 0; 1143 ring->space = ring_space(ring); 1144 1145 return 0; 1146 } 1147 1148 static int intel_ring_wait_seqno(struct intel_ring_buffer *ring, u32 seqno) 1149 { 1150 struct drm_i915_private *dev_priv = ring->dev->dev_private; 1151 bool was_interruptible; 1152 int ret; 1153 1154 /* XXX As we have not yet audited all the paths to check that 1155 * they are ready for ERESTARTSYS from intel_ring_begin, do not 1156 * allow us to be interruptible by a signal. 1157 */ 1158 was_interruptible = dev_priv->mm.interruptible; 1159 dev_priv->mm.interruptible = false; 1160 1161 ret = i915_wait_request(ring, seqno, true); 1162 1163 dev_priv->mm.interruptible = was_interruptible; 1164 1165 return ret; 1166 } 1167 1168 static int intel_ring_wait_request(struct intel_ring_buffer *ring, int n) 1169 { 1170 struct drm_i915_gem_request *request; 1171 u32 seqno = 0; 1172 int ret; 1173 1174 i915_gem_retire_requests_ring(ring); 1175 1176 if (ring->last_retired_head != -1) { 1177 ring->head = ring->last_retired_head; 1178 ring->last_retired_head = -1; 1179 ring->space = ring_space(ring); 1180 if (ring->space >= n) 1181 return 0; 1182 } 1183 1184 list_for_each_entry(request, &ring->request_list, list) { 1185 int space; 1186 1187 if (request->tail == -1) 1188 continue; 1189 1190 space = request->tail - (ring->tail + 8); 1191 if (space < 0) 1192 space += ring->size; 1193 if (space >= n) { 1194 seqno = request->seqno; 1195 break; 1196 } 1197 1198 /* Consume this request in case we need more space than 1199 * is available and so need to prevent a race between 1200 * updating last_retired_head and direct reads of 1201 * I915_RING_HEAD. It also provides a nice sanity check. 1202 */ 1203 request->tail = -1; 1204 } 1205 1206 if (seqno == 0) 1207 return -ENOSPC; 1208 1209 ret = intel_ring_wait_seqno(ring, seqno); 1210 if (ret) 1211 return ret; 1212 1213 if (ring->last_retired_head == -1) 1214 return -ENOSPC; 1215 1216 ring->head = ring->last_retired_head; 1217 ring->last_retired_head = -1; 1218 ring->space = ring_space(ring); 1219 if (ring->space < n) 1220 return -ENOSPC; 1221 1222 return 0; 1223 } 1224 1225 int intel_wait_ring_buffer(struct intel_ring_buffer *ring, int n) 1226 { 1227 struct drm_device *dev = ring->dev; 1228 struct drm_i915_private *dev_priv = dev->dev_private; 1229 int end; 1230 int ret; 1231 1232 ret = intel_ring_wait_request(ring, n); 1233 if (ret != -ENOSPC) 1234 return ret; 1235 1236 if (drm_core_check_feature(dev, DRIVER_GEM)) 1237 /* With GEM the hangcheck timer should kick us out of the loop, 1238 * leaving it early runs the risk of corrupting GEM state (due 1239 * to running on almost untested codepaths). But on resume 1240 * timers don't work yet, so prevent a complete hang in that 1241 * case by choosing an insanely large timeout. */ 1242 end = ticks + hz * 60; 1243 else 1244 end = ticks + hz * 3; 1245 do { 1246 ring->head = I915_READ_HEAD(ring); 1247 ring->space = ring_space(ring); 1248 if (ring->space >= n) { 1249 return 0; 1250 } 1251 1252 #if 0 1253 if (dev->primary->master) { 1254 struct drm_i915_master_private *master_priv = dev->primary->master->driver_priv; 1255 if (master_priv->sarea_priv) 1256 master_priv->sarea_priv->perf_boxes |= I915_BOX_WAIT; 1257 } 1258 #else 1259 if (dev_priv->sarea_priv) 1260 dev_priv->sarea_priv->perf_boxes |= I915_BOX_WAIT; 1261 #endif 1262 1263 DELAY(1000); 1264 if (atomic_load_acq_32(&dev_priv->mm.wedged) != 0) { 1265 return -EAGAIN; 1266 } 1267 } while (!time_after(ticks, end)); 1268 return -EBUSY; 1269 } 1270 1271 int intel_ring_begin(struct intel_ring_buffer *ring, 1272 int num_dwords) 1273 { 1274 struct drm_i915_private *dev_priv = ring->dev->dev_private; 1275 int n = 4*num_dwords; 1276 int ret; 1277 1278 if (atomic_load_acq_int(&dev_priv->mm.wedged)) 1279 return -EIO; 1280 1281 if (ring->tail + n > ring->effective_size) { 1282 ret = intel_wrap_ring_buffer(ring); 1283 if (ret != 0) 1284 return ret; 1285 } 1286 1287 if (ring->space < n) { 1288 ret = intel_wait_ring_buffer(ring, n); 1289 if (ret != 0) 1290 return ret; 1291 } 1292 1293 ring->space -= n; 1294 return 0; 1295 } 1296 1297 void intel_ring_advance(struct intel_ring_buffer *ring) 1298 { 1299 ring->tail &= ring->size - 1; 1300 ring->write_tail(ring, ring->tail); 1301 } 1302 1303 static const struct intel_ring_buffer render_ring = { 1304 .name = "render ring", 1305 .id = RCS, 1306 .mmio_base = RENDER_RING_BASE, 1307 .size = 32 * PAGE_SIZE, 1308 .init = init_render_ring, 1309 .write_tail = ring_write_tail, 1310 .flush = render_ring_flush, 1311 .add_request = render_ring_add_request, 1312 .get_seqno = ring_get_seqno, 1313 .irq_get = render_ring_get_irq, 1314 .irq_put = render_ring_put_irq, 1315 .dispatch_execbuffer = render_ring_dispatch_execbuffer, 1316 .cleanup = render_ring_cleanup, 1317 .sync_to = render_ring_sync_to, 1318 .semaphore_register = {MI_SEMAPHORE_SYNC_INVALID, 1319 MI_SEMAPHORE_SYNC_RV, 1320 MI_SEMAPHORE_SYNC_RB}, 1321 .signal_mbox = {GEN6_VRSYNC, GEN6_BRSYNC}, 1322 }; 1323 1324 /* ring buffer for bit-stream decoder */ 1325 1326 static const struct intel_ring_buffer bsd_ring = { 1327 .name = "bsd ring", 1328 .id = VCS, 1329 .mmio_base = BSD_RING_BASE, 1330 .size = 32 * PAGE_SIZE, 1331 .init = init_ring_common, 1332 .write_tail = ring_write_tail, 1333 .flush = bsd_ring_flush, 1334 .add_request = ring_add_request, 1335 .get_seqno = ring_get_seqno, 1336 .irq_get = bsd_ring_get_irq, 1337 .irq_put = bsd_ring_put_irq, 1338 .dispatch_execbuffer = ring_dispatch_execbuffer, 1339 }; 1340 1341 1342 static void gen6_bsd_ring_write_tail(struct intel_ring_buffer *ring, 1343 uint32_t value) 1344 { 1345 drm_i915_private_t *dev_priv = ring->dev->dev_private; 1346 1347 /* Every tail move must follow the sequence below */ 1348 I915_WRITE(GEN6_BSD_SLEEP_PSMI_CONTROL, 1349 GEN6_BSD_SLEEP_PSMI_CONTROL_RC_ILDL_MESSAGE_MODIFY_MASK | 1350 GEN6_BSD_SLEEP_PSMI_CONTROL_RC_ILDL_MESSAGE_DISABLE); 1351 I915_WRITE(GEN6_BSD_RNCID, 0x0); 1352 1353 if (_intel_wait_for(ring->dev, 1354 (I915_READ(GEN6_BSD_SLEEP_PSMI_CONTROL) & 1355 GEN6_BSD_SLEEP_PSMI_CONTROL_IDLE_INDICATOR) == 0, 50, 1356 true, "915g6i") != 0) 1357 DRM_ERROR("timed out waiting for IDLE Indicator\n"); 1358 1359 I915_WRITE_TAIL(ring, value); 1360 I915_WRITE(GEN6_BSD_SLEEP_PSMI_CONTROL, 1361 GEN6_BSD_SLEEP_PSMI_CONTROL_RC_ILDL_MESSAGE_MODIFY_MASK | 1362 GEN6_BSD_SLEEP_PSMI_CONTROL_RC_ILDL_MESSAGE_ENABLE); 1363 } 1364 1365 static int gen6_ring_flush(struct intel_ring_buffer *ring, 1366 uint32_t invalidate, uint32_t flush) 1367 { 1368 uint32_t cmd; 1369 int ret; 1370 1371 ret = intel_ring_begin(ring, 4); 1372 if (ret) 1373 return ret; 1374 1375 cmd = MI_FLUSH_DW; 1376 if (invalidate & I915_GEM_GPU_DOMAINS) 1377 cmd |= MI_INVALIDATE_TLB | MI_INVALIDATE_BSD; 1378 intel_ring_emit(ring, cmd); 1379 intel_ring_emit(ring, 0); 1380 intel_ring_emit(ring, 0); 1381 intel_ring_emit(ring, MI_NOOP); 1382 intel_ring_advance(ring); 1383 return 0; 1384 } 1385 1386 static int 1387 gen6_ring_dispatch_execbuffer(struct intel_ring_buffer *ring, 1388 uint32_t offset, uint32_t len) 1389 { 1390 int ret; 1391 1392 ret = intel_ring_begin(ring, 2); 1393 if (ret) 1394 return ret; 1395 1396 intel_ring_emit(ring, MI_BATCH_BUFFER_START | MI_BATCH_NON_SECURE_I965); 1397 /* bit0-7 is the length on GEN6+ */ 1398 intel_ring_emit(ring, offset); 1399 intel_ring_advance(ring); 1400 1401 return 0; 1402 } 1403 1404 static bool 1405 gen6_render_ring_get_irq(struct intel_ring_buffer *ring) 1406 { 1407 return gen6_ring_get_irq(ring, 1408 GT_USER_INTERRUPT, 1409 GEN6_RENDER_USER_INTERRUPT); 1410 } 1411 1412 static void 1413 gen6_render_ring_put_irq(struct intel_ring_buffer *ring) 1414 { 1415 return gen6_ring_put_irq(ring, 1416 GT_USER_INTERRUPT, 1417 GEN6_RENDER_USER_INTERRUPT); 1418 } 1419 1420 static bool 1421 gen6_bsd_ring_get_irq(struct intel_ring_buffer *ring) 1422 { 1423 return gen6_ring_get_irq(ring, 1424 GT_GEN6_BSD_USER_INTERRUPT, 1425 GEN6_BSD_USER_INTERRUPT); 1426 } 1427 1428 static void 1429 gen6_bsd_ring_put_irq(struct intel_ring_buffer *ring) 1430 { 1431 return gen6_ring_put_irq(ring, 1432 GT_GEN6_BSD_USER_INTERRUPT, 1433 GEN6_BSD_USER_INTERRUPT); 1434 } 1435 1436 /* ring buffer for Video Codec for Gen6+ */ 1437 static const struct intel_ring_buffer gen6_bsd_ring = { 1438 .name = "gen6 bsd ring", 1439 .id = VCS, 1440 .mmio_base = GEN6_BSD_RING_BASE, 1441 .size = 32 * PAGE_SIZE, 1442 .init = init_ring_common, 1443 .write_tail = gen6_bsd_ring_write_tail, 1444 .flush = gen6_ring_flush, 1445 .add_request = gen6_add_request, 1446 .get_seqno = gen6_ring_get_seqno, 1447 .irq_get = gen6_bsd_ring_get_irq, 1448 .irq_put = gen6_bsd_ring_put_irq, 1449 .dispatch_execbuffer = gen6_ring_dispatch_execbuffer, 1450 .sync_to = gen6_bsd_ring_sync_to, 1451 .semaphore_register = {MI_SEMAPHORE_SYNC_VR, 1452 MI_SEMAPHORE_SYNC_INVALID, 1453 MI_SEMAPHORE_SYNC_VB}, 1454 .signal_mbox = {GEN6_RVSYNC, GEN6_BVSYNC}, 1455 }; 1456 1457 /* Blitter support (SandyBridge+) */ 1458 1459 static bool 1460 blt_ring_get_irq(struct intel_ring_buffer *ring) 1461 { 1462 return gen6_ring_get_irq(ring, 1463 GT_BLT_USER_INTERRUPT, 1464 GEN6_BLITTER_USER_INTERRUPT); 1465 } 1466 1467 static void 1468 blt_ring_put_irq(struct intel_ring_buffer *ring) 1469 { 1470 gen6_ring_put_irq(ring, 1471 GT_BLT_USER_INTERRUPT, 1472 GEN6_BLITTER_USER_INTERRUPT); 1473 } 1474 1475 static int blt_ring_flush(struct intel_ring_buffer *ring, 1476 uint32_t invalidate, uint32_t flush) 1477 { 1478 uint32_t cmd; 1479 int ret; 1480 1481 ret = intel_ring_begin(ring, 4); 1482 if (ret) 1483 return ret; 1484 1485 cmd = MI_FLUSH_DW; 1486 if (invalidate & I915_GEM_DOMAIN_RENDER) 1487 cmd |= MI_INVALIDATE_TLB; 1488 intel_ring_emit(ring, cmd); 1489 intel_ring_emit(ring, 0); 1490 intel_ring_emit(ring, 0); 1491 intel_ring_emit(ring, MI_NOOP); 1492 intel_ring_advance(ring); 1493 return 0; 1494 } 1495 1496 static const struct intel_ring_buffer gen6_blt_ring = { 1497 .name = "blt ring", 1498 .id = BCS, 1499 .mmio_base = BLT_RING_BASE, 1500 .size = 32 * PAGE_SIZE, 1501 .init = init_ring_common, 1502 .write_tail = ring_write_tail, 1503 .flush = blt_ring_flush, 1504 .add_request = gen6_add_request, 1505 .get_seqno = gen6_ring_get_seqno, 1506 .irq_get = blt_ring_get_irq, 1507 .irq_put = blt_ring_put_irq, 1508 .dispatch_execbuffer = gen6_ring_dispatch_execbuffer, 1509 .sync_to = gen6_blt_ring_sync_to, 1510 .semaphore_register = {MI_SEMAPHORE_SYNC_BR, 1511 MI_SEMAPHORE_SYNC_BV, 1512 MI_SEMAPHORE_SYNC_INVALID}, 1513 .signal_mbox = {GEN6_RBSYNC, GEN6_VBSYNC}, 1514 }; 1515 1516 int intel_init_render_ring_buffer(struct drm_device *dev) 1517 { 1518 drm_i915_private_t *dev_priv = dev->dev_private; 1519 struct intel_ring_buffer *ring = &dev_priv->rings[RCS]; 1520 1521 *ring = render_ring; 1522 if (INTEL_INFO(dev)->gen >= 6) { 1523 ring->add_request = gen6_add_request; 1524 ring->flush = gen6_render_ring_flush; 1525 ring->irq_get = gen6_render_ring_get_irq; 1526 ring->irq_put = gen6_render_ring_put_irq; 1527 ring->get_seqno = gen6_ring_get_seqno; 1528 } else if (IS_GEN5(dev)) { 1529 ring->add_request = pc_render_add_request; 1530 ring->get_seqno = pc_render_get_seqno; 1531 } 1532 1533 if (!I915_NEED_GFX_HWS(dev)) { 1534 ring->status_page.page_addr = dev_priv->status_page_dmah->vaddr; 1535 memset(ring->status_page.page_addr, 0, PAGE_SIZE); 1536 } 1537 1538 return intel_init_ring_buffer(dev, ring); 1539 } 1540 1541 int intel_render_ring_init_dri(struct drm_device *dev, uint64_t start, 1542 uint32_t size) 1543 { 1544 drm_i915_private_t *dev_priv = dev->dev_private; 1545 struct intel_ring_buffer *ring = &dev_priv->rings[RCS]; 1546 1547 *ring = render_ring; 1548 if (INTEL_INFO(dev)->gen >= 6) { 1549 ring->add_request = gen6_add_request; 1550 ring->irq_get = gen6_render_ring_get_irq; 1551 ring->irq_put = gen6_render_ring_put_irq; 1552 } else if (IS_GEN5(dev)) { 1553 ring->add_request = pc_render_add_request; 1554 ring->get_seqno = pc_render_get_seqno; 1555 } 1556 1557 ring->dev = dev; 1558 INIT_LIST_HEAD(&ring->active_list); 1559 INIT_LIST_HEAD(&ring->request_list); 1560 INIT_LIST_HEAD(&ring->gpu_write_list); 1561 1562 ring->size = size; 1563 ring->effective_size = ring->size; 1564 if (IS_I830(ring->dev)) 1565 ring->effective_size -= 128; 1566 1567 ring->map.offset = start; 1568 ring->map.size = size; 1569 ring->map.type = 0; 1570 ring->map.flags = 0; 1571 ring->map.mtrr = 0; 1572 1573 drm_core_ioremap_wc(&ring->map, dev); 1574 if (ring->map.virtual == NULL) { 1575 DRM_ERROR("can not ioremap virtual address for" 1576 " ring buffer\n"); 1577 return -ENOMEM; 1578 } 1579 1580 ring->virtual_start = (void *)ring->map.virtual; 1581 return 0; 1582 } 1583 1584 int intel_init_bsd_ring_buffer(struct drm_device *dev) 1585 { 1586 drm_i915_private_t *dev_priv = dev->dev_private; 1587 struct intel_ring_buffer *ring = &dev_priv->rings[VCS]; 1588 1589 if (IS_GEN6(dev) || IS_GEN7(dev)) 1590 *ring = gen6_bsd_ring; 1591 else 1592 *ring = bsd_ring; 1593 1594 return intel_init_ring_buffer(dev, ring); 1595 } 1596 1597 int intel_init_blt_ring_buffer(struct drm_device *dev) 1598 { 1599 drm_i915_private_t *dev_priv = dev->dev_private; 1600 struct intel_ring_buffer *ring = &dev_priv->rings[BCS]; 1601 1602 *ring = gen6_blt_ring; 1603 1604 return intel_init_ring_buffer(dev, ring); 1605 } 1606