1 /* 2 * Copyright © 2008-2010 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Eric Anholt <eric@anholt.net> 25 * Zou Nan hai <nanhai.zou@intel.com> 26 * Xiang Hai hao<haihao.xiang@intel.com> 27 * 28 * $FreeBSD: head/sys/dev/drm2/i915/intel_ringbuffer.c 253709 2013-07-27 16:42:29Z kib $ 29 */ 30 31 #include <drm/drmP.h> 32 #include <drm/i915_drm.h> 33 #include "i915_drv.h" 34 #include "intel_drv.h" 35 #include "intel_ringbuffer.h" 36 #include <sys/sched.h> 37 38 /* 39 * 965+ support PIPE_CONTROL commands, which provide finer grained control 40 * over cache flushing. 41 */ 42 struct pipe_control { 43 struct drm_i915_gem_object *obj; 44 volatile u32 *cpu_page; 45 u32 gtt_offset; 46 }; 47 48 static inline int ring_space(struct intel_ring_buffer *ring) 49 { 50 int space = (ring->head & HEAD_ADDR) - (ring->tail + I915_RING_FREE_SPACE); 51 if (space < 0) 52 space += ring->size; 53 return space; 54 } 55 56 static int 57 gen2_render_ring_flush(struct intel_ring_buffer *ring, 58 u32 invalidate_domains, 59 u32 flush_domains) 60 { 61 u32 cmd; 62 int ret; 63 64 cmd = MI_FLUSH; 65 if (((invalidate_domains|flush_domains) & I915_GEM_DOMAIN_RENDER) == 0) 66 cmd |= MI_NO_WRITE_FLUSH; 67 68 if (invalidate_domains & I915_GEM_DOMAIN_SAMPLER) 69 cmd |= MI_READ_FLUSH; 70 71 ret = intel_ring_begin(ring, 2); 72 if (ret) 73 return ret; 74 75 intel_ring_emit(ring, cmd); 76 intel_ring_emit(ring, MI_NOOP); 77 intel_ring_advance(ring); 78 79 return 0; 80 } 81 82 static int 83 gen4_render_ring_flush(struct intel_ring_buffer *ring, 84 u32 invalidate_domains, 85 u32 flush_domains) 86 { 87 struct drm_device *dev = ring->dev; 88 u32 cmd; 89 int ret; 90 91 /* 92 * read/write caches: 93 * 94 * I915_GEM_DOMAIN_RENDER is always invalidated, but is 95 * only flushed if MI_NO_WRITE_FLUSH is unset. On 965, it is 96 * also flushed at 2d versus 3d pipeline switches. 97 * 98 * read-only caches: 99 * 100 * I915_GEM_DOMAIN_SAMPLER is flushed on pre-965 if 101 * MI_READ_FLUSH is set, and is always flushed on 965. 102 * 103 * I915_GEM_DOMAIN_COMMAND may not exist? 104 * 105 * I915_GEM_DOMAIN_INSTRUCTION, which exists on 965, is 106 * invalidated when MI_EXE_FLUSH is set. 107 * 108 * I915_GEM_DOMAIN_VERTEX, which exists on 965, is 109 * invalidated with every MI_FLUSH. 110 * 111 * TLBs: 112 * 113 * On 965, TLBs associated with I915_GEM_DOMAIN_COMMAND 114 * and I915_GEM_DOMAIN_CPU in are invalidated at PTE write and 115 * I915_GEM_DOMAIN_RENDER and I915_GEM_DOMAIN_SAMPLER 116 * are flushed at any MI_FLUSH. 117 */ 118 119 cmd = MI_FLUSH | MI_NO_WRITE_FLUSH; 120 if ((invalidate_domains|flush_domains) & I915_GEM_DOMAIN_RENDER) 121 cmd &= ~MI_NO_WRITE_FLUSH; 122 if (invalidate_domains & I915_GEM_DOMAIN_INSTRUCTION) 123 cmd |= MI_EXE_FLUSH; 124 125 if (invalidate_domains & I915_GEM_DOMAIN_COMMAND && 126 (IS_G4X(dev) || IS_GEN5(dev))) 127 cmd |= MI_INVALIDATE_ISP; 128 129 ret = intel_ring_begin(ring, 2); 130 if (ret) 131 return ret; 132 133 intel_ring_emit(ring, cmd); 134 intel_ring_emit(ring, MI_NOOP); 135 intel_ring_advance(ring); 136 137 return 0; 138 } 139 140 /** 141 * Emits a PIPE_CONTROL with a non-zero post-sync operation, for 142 * implementing two workarounds on gen6. From section 1.4.7.1 143 * "PIPE_CONTROL" of the Sandy Bridge PRM volume 2 part 1: 144 * 145 * [DevSNB-C+{W/A}] Before any depth stall flush (including those 146 * produced by non-pipelined state commands), software needs to first 147 * send a PIPE_CONTROL with no bits set except Post-Sync Operation != 148 * 0. 149 * 150 * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush Enable 151 * =1, a PIPE_CONTROL with any non-zero post-sync-op is required. 152 * 153 * And the workaround for these two requires this workaround first: 154 * 155 * [Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent 156 * BEFORE the pipe-control with a post-sync op and no write-cache 157 * flushes. 158 * 159 * And this last workaround is tricky because of the requirements on 160 * that bit. From section 1.4.7.2.3 "Stall" of the Sandy Bridge PRM 161 * volume 2 part 1: 162 * 163 * "1 of the following must also be set: 164 * - Render Target Cache Flush Enable ([12] of DW1) 165 * - Depth Cache Flush Enable ([0] of DW1) 166 * - Stall at Pixel Scoreboard ([1] of DW1) 167 * - Depth Stall ([13] of DW1) 168 * - Post-Sync Operation ([13] of DW1) 169 * - Notify Enable ([8] of DW1)" 170 * 171 * The cache flushes require the workaround flush that triggered this 172 * one, so we can't use it. Depth stall would trigger the same. 173 * Post-sync nonzero is what triggered this second workaround, so we 174 * can't use that one either. Notify enable is IRQs, which aren't 175 * really our business. That leaves only stall at scoreboard. 176 */ 177 static int 178 intel_emit_post_sync_nonzero_flush(struct intel_ring_buffer *ring) 179 { 180 struct pipe_control *pc = ring->private; 181 u32 scratch_addr = pc->gtt_offset + 128; 182 int ret; 183 184 185 ret = intel_ring_begin(ring, 6); 186 if (ret) 187 return ret; 188 189 intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(5)); 190 intel_ring_emit(ring, PIPE_CONTROL_CS_STALL | 191 PIPE_CONTROL_STALL_AT_SCOREBOARD); 192 intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); /* address */ 193 intel_ring_emit(ring, 0); /* low dword */ 194 intel_ring_emit(ring, 0); /* high dword */ 195 intel_ring_emit(ring, MI_NOOP); 196 intel_ring_advance(ring); 197 198 ret = intel_ring_begin(ring, 6); 199 if (ret) 200 return ret; 201 202 intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(5)); 203 intel_ring_emit(ring, PIPE_CONTROL_QW_WRITE); 204 intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); /* address */ 205 intel_ring_emit(ring, 0); 206 intel_ring_emit(ring, 0); 207 intel_ring_emit(ring, MI_NOOP); 208 intel_ring_advance(ring); 209 210 return 0; 211 } 212 213 static int 214 gen6_render_ring_flush(struct intel_ring_buffer *ring, 215 u32 invalidate_domains, u32 flush_domains) 216 { 217 u32 flags = 0; 218 struct pipe_control *pc = ring->private; 219 u32 scratch_addr = pc->gtt_offset + 128; 220 int ret; 221 222 /* Force SNB workarounds for PIPE_CONTROL flushes */ 223 ret = intel_emit_post_sync_nonzero_flush(ring); 224 if (ret) 225 return ret; 226 227 /* Just flush everything. Experiments have shown that reducing the 228 * number of bits based on the write domains has little performance 229 * impact. 230 */ 231 flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; 232 flags |= PIPE_CONTROL_TLB_INVALIDATE; 233 flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE; 234 flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE; 235 flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; 236 flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE; 237 flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE; 238 flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE; 239 /* 240 * Ensure that any following seqno writes only happen when the render 241 * cache is indeed flushed (but only if the caller actually wants that). 242 */ 243 if (flush_domains) 244 flags |= PIPE_CONTROL_CS_STALL; 245 246 ret = intel_ring_begin(ring, 6); 247 if (ret) 248 return ret; 249 250 intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(5)); 251 intel_ring_emit(ring, flags); 252 intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); 253 intel_ring_emit(ring, 0); /* lower dword */ 254 intel_ring_emit(ring, 0); /* uppwer dword */ 255 intel_ring_emit(ring, MI_NOOP); 256 intel_ring_advance(ring); 257 258 return 0; 259 } 260 261 static void ring_write_tail(struct intel_ring_buffer *ring, 262 uint32_t value) 263 { 264 drm_i915_private_t *dev_priv = ring->dev->dev_private; 265 I915_WRITE_TAIL(ring, value); 266 } 267 268 u32 intel_ring_get_active_head(struct intel_ring_buffer *ring) 269 { 270 drm_i915_private_t *dev_priv = ring->dev->dev_private; 271 uint32_t acthd_reg = INTEL_INFO(ring->dev)->gen >= 4 ? 272 RING_ACTHD(ring->mmio_base) : ACTHD; 273 274 return I915_READ(acthd_reg); 275 } 276 277 static int init_ring_common(struct intel_ring_buffer *ring) 278 { 279 struct drm_device *dev = ring->dev; 280 drm_i915_private_t *dev_priv = dev->dev_private; 281 struct drm_i915_gem_object *obj = ring->obj; 282 int ret = 0; 283 uint32_t head; 284 285 if (HAS_FORCE_WAKE(dev)) 286 gen6_gt_force_wake_get(dev_priv); 287 288 /* Stop the ring if it's running. */ 289 I915_WRITE_CTL(ring, 0); 290 I915_WRITE_HEAD(ring, 0); 291 ring->write_tail(ring, 0); 292 293 /* Initialize the ring. */ 294 I915_WRITE_START(ring, obj->gtt_offset); 295 head = I915_READ_HEAD(ring) & HEAD_ADDR; 296 297 /* G45 ring initialization fails to reset head to zero */ 298 if (head != 0) { 299 DRM_DEBUG("%s head not reset to zero " 300 "ctl %08x head %08x tail %08x start %08x\n", 301 ring->name, 302 I915_READ_CTL(ring), 303 I915_READ_HEAD(ring), 304 I915_READ_TAIL(ring), 305 I915_READ_START(ring)); 306 307 I915_WRITE_HEAD(ring, 0); 308 309 if (I915_READ_HEAD(ring) & HEAD_ADDR) { 310 DRM_ERROR("failed to set %s head to zero " 311 "ctl %08x head %08x tail %08x start %08x\n", 312 ring->name, 313 I915_READ_CTL(ring), 314 I915_READ_HEAD(ring), 315 I915_READ_TAIL(ring), 316 I915_READ_START(ring)); 317 } 318 } 319 320 I915_WRITE_CTL(ring, 321 ((ring->size - PAGE_SIZE) & RING_NR_PAGES) 322 | RING_VALID); 323 324 /* If the head is still not zero, the ring is dead */ 325 if (_intel_wait_for(ring->dev, 326 (I915_READ_CTL(ring) & RING_VALID) != 0 && 327 I915_READ_START(ring) == obj->gtt_offset && 328 (I915_READ_HEAD(ring) & HEAD_ADDR) == 0, 329 50, 1, "915rii")) { 330 DRM_ERROR("%s initialization failed " 331 "ctl %08x head %08x tail %08x start %08x\n", 332 ring->name, 333 I915_READ_CTL(ring), 334 I915_READ_HEAD(ring), 335 I915_READ_TAIL(ring), 336 I915_READ_START(ring)); 337 ret = -EIO; 338 goto out; 339 } 340 341 if (!drm_core_check_feature(ring->dev, DRIVER_MODESET)) 342 i915_kernel_lost_context(ring->dev); 343 else { 344 ring->head = I915_READ_HEAD(ring); 345 ring->tail = I915_READ_TAIL(ring) & TAIL_ADDR; 346 ring->space = ring_space(ring); 347 ring->last_retired_head = -1; 348 } 349 350 out: 351 if (HAS_FORCE_WAKE(dev)) 352 gen6_gt_force_wake_put(dev_priv); 353 354 return ret; 355 } 356 357 static int 358 init_pipe_control(struct intel_ring_buffer *ring) 359 { 360 struct pipe_control *pc; 361 struct drm_i915_gem_object *obj; 362 int ret; 363 364 if (ring->private) 365 return 0; 366 367 pc = kmalloc(sizeof(*pc), DRM_I915_GEM, M_WAITOK); 368 if (!pc) 369 return -ENOMEM; 370 371 obj = i915_gem_alloc_object(ring->dev, 4096); 372 if (obj == NULL) { 373 DRM_ERROR("Failed to allocate seqno page\n"); 374 ret = -ENOMEM; 375 goto err; 376 } 377 378 i915_gem_object_set_cache_level(obj, I915_CACHE_LLC); 379 380 ret = i915_gem_object_pin(obj, 4096, true); 381 if (ret) 382 goto err_unref; 383 384 pc->gtt_offset = obj->gtt_offset; 385 pc->cpu_page = (uint32_t *)kmem_alloc_nofault(&kernel_map, PAGE_SIZE, PAGE_SIZE); 386 if (pc->cpu_page == NULL) 387 goto err_unpin; 388 pmap_qenter((uintptr_t)pc->cpu_page, &obj->pages[0], 1); 389 pmap_invalidate_cache_range((vm_offset_t)pc->cpu_page, 390 (vm_offset_t)pc->cpu_page + PAGE_SIZE); 391 392 pc->obj = obj; 393 ring->private = pc; 394 return 0; 395 396 err_unpin: 397 i915_gem_object_unpin(obj); 398 err_unref: 399 drm_gem_object_unreference(&obj->base); 400 err: 401 drm_free(pc, DRM_I915_GEM); 402 return ret; 403 } 404 405 static void 406 cleanup_pipe_control(struct intel_ring_buffer *ring) 407 { 408 struct pipe_control *pc = ring->private; 409 struct drm_i915_gem_object *obj; 410 411 if (!ring->private) 412 return; 413 414 obj = pc->obj; 415 pmap_qremove((vm_offset_t)pc->cpu_page, 1); 416 kmem_free(&kernel_map, (uintptr_t)pc->cpu_page, PAGE_SIZE); 417 i915_gem_object_unpin(obj); 418 drm_gem_object_unreference(&obj->base); 419 420 drm_free(pc, DRM_I915_GEM); 421 ring->private = NULL; 422 } 423 424 static int init_render_ring(struct intel_ring_buffer *ring) 425 { 426 struct drm_device *dev = ring->dev; 427 struct drm_i915_private *dev_priv = dev->dev_private; 428 int ret = init_ring_common(ring); 429 430 if (INTEL_INFO(dev)->gen > 3) 431 I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(VS_TIMER_DISPATCH)); 432 433 /* We need to disable the AsyncFlip performance optimisations in order 434 * to use MI_WAIT_FOR_EVENT within the CS. It should already be 435 * programmed to '1' on all products. 436 */ 437 if (INTEL_INFO(dev)->gen >= 6) 438 I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(ASYNC_FLIP_PERF_DISABLE)); 439 440 /* Required for the hardware to program scanline values for waiting */ 441 if (INTEL_INFO(dev)->gen == 6) 442 I915_WRITE(GFX_MODE, 443 _MASKED_BIT_ENABLE(GFX_TLB_INVALIDATE_ALWAYS)); 444 445 if (IS_GEN7(dev)) 446 I915_WRITE(GFX_MODE_GEN7, 447 _MASKED_BIT_DISABLE(GFX_TLB_INVALIDATE_ALWAYS) | 448 _MASKED_BIT_ENABLE(GFX_REPLAY_MODE)); 449 450 if (INTEL_INFO(dev)->gen >= 5) { 451 ret = init_pipe_control(ring); 452 if (ret) 453 return ret; 454 } 455 456 if (IS_GEN6(dev)) { 457 /* From the Sandybridge PRM, volume 1 part 3, page 24: 458 * "If this bit is set, STCunit will have LRA as replacement 459 * policy. [...] This bit must be reset. LRA replacement 460 * policy is not supported." 461 */ 462 I915_WRITE(CACHE_MODE_0, 463 _MASKED_BIT_DISABLE(CM0_STC_EVICT_DISABLE_LRA_SNB)); 464 465 /* This is not explicitly set for GEN6, so read the register. 466 * see intel_ring_mi_set_context() for why we care. 467 * TODO: consider explicitly setting the bit for GEN5 468 */ 469 ring->itlb_before_ctx_switch = 470 !!(I915_READ(GFX_MODE) & GFX_TLB_INVALIDATE_ALWAYS); 471 } 472 473 if (INTEL_INFO(dev)->gen >= 6) 474 I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING)); 475 476 if (HAS_L3_GPU_CACHE(dev)) 477 I915_WRITE_IMR(ring, ~GEN6_RENDER_L3_PARITY_ERROR); 478 479 return ret; 480 } 481 482 static void render_ring_cleanup(struct intel_ring_buffer *ring) 483 { 484 if (!ring->private) 485 return; 486 487 cleanup_pipe_control(ring); 488 } 489 490 static void 491 update_mboxes(struct intel_ring_buffer *ring, 492 u32 seqno, 493 u32 mmio_offset) 494 { 495 intel_ring_emit(ring, MI_SEMAPHORE_MBOX | 496 MI_SEMAPHORE_GLOBAL_GTT | 497 MI_SEMAPHORE_REGISTER | 498 MI_SEMAPHORE_UPDATE); 499 intel_ring_emit(ring, seqno); 500 intel_ring_emit(ring, mmio_offset); 501 } 502 503 /** 504 * gen6_add_request - Update the semaphore mailbox registers 505 * 506 * @ring - ring that is adding a request 507 * @seqno - return seqno stuck into the ring 508 * 509 * Update the mailbox registers in the *other* rings with the current seqno. 510 * This acts like a signal in the canonical semaphore. 511 */ 512 static int 513 gen6_add_request(struct intel_ring_buffer *ring, 514 u32 *seqno) 515 { 516 u32 mbox1_reg; 517 u32 mbox2_reg; 518 int ret; 519 520 ret = intel_ring_begin(ring, 10); 521 if (ret) 522 return ret; 523 524 mbox1_reg = ring->signal_mbox[0]; 525 mbox2_reg = ring->signal_mbox[1]; 526 527 *seqno = i915_gem_next_request_seqno(ring); 528 529 update_mboxes(ring, *seqno, mbox1_reg); 530 update_mboxes(ring, *seqno, mbox2_reg); 531 intel_ring_emit(ring, MI_STORE_DWORD_INDEX); 532 intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT); 533 intel_ring_emit(ring, *seqno); 534 intel_ring_emit(ring, MI_USER_INTERRUPT); 535 intel_ring_advance(ring); 536 537 return 0; 538 } 539 540 /** 541 * intel_ring_sync - sync the waiter to the signaller on seqno 542 * 543 * @waiter - ring that is waiting 544 * @signaller - ring which has, or will signal 545 * @seqno - seqno which the waiter will block on 546 */ 547 static int 548 gen6_ring_sync(struct intel_ring_buffer *waiter, 549 struct intel_ring_buffer *signaller, 550 u32 seqno) 551 { 552 int ret; 553 u32 dw1 = MI_SEMAPHORE_MBOX | 554 MI_SEMAPHORE_COMPARE | 555 MI_SEMAPHORE_REGISTER; 556 557 /* Throughout all of the GEM code, seqno passed implies our current 558 * seqno is >= the last seqno executed. However for hardware the 559 * comparison is strictly greater than. 560 */ 561 seqno -= 1; 562 563 WARN_ON(signaller->semaphore_register[waiter->id] == 564 MI_SEMAPHORE_SYNC_INVALID); 565 566 ret = intel_ring_begin(waiter, 4); 567 if (ret) 568 return ret; 569 570 intel_ring_emit(waiter, 571 dw1 | signaller->semaphore_register[waiter->id]); 572 intel_ring_emit(waiter, seqno); 573 intel_ring_emit(waiter, 0); 574 intel_ring_emit(waiter, MI_NOOP); 575 intel_ring_advance(waiter); 576 577 return 0; 578 } 579 580 int render_ring_sync_to(struct intel_ring_buffer *waiter, 581 struct intel_ring_buffer *signaller, u32 seqno); 582 int gen6_bsd_ring_sync_to(struct intel_ring_buffer *waiter, 583 struct intel_ring_buffer *signaller, u32 seqno); 584 int gen6_blt_ring_sync_to(struct intel_ring_buffer *waiter, 585 struct intel_ring_buffer *signaller, u32 seqno); 586 587 #define PIPE_CONTROL_FLUSH(ring__, addr__) \ 588 do { \ 589 intel_ring_emit(ring__, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE | \ 590 PIPE_CONTROL_DEPTH_STALL); \ 591 intel_ring_emit(ring__, (addr__) | PIPE_CONTROL_GLOBAL_GTT); \ 592 intel_ring_emit(ring__, 0); \ 593 intel_ring_emit(ring__, 0); \ 594 } while (0) 595 596 static int 597 pc_render_add_request(struct intel_ring_buffer *ring, 598 uint32_t *result) 599 { 600 u32 seqno = i915_gem_next_request_seqno(ring); 601 struct pipe_control *pc = ring->private; 602 u32 scratch_addr = pc->gtt_offset + 128; 603 int ret; 604 605 /* For Ironlake, MI_USER_INTERRUPT was deprecated and apparently 606 * incoherent with writes to memory, i.e. completely fubar, 607 * so we need to use PIPE_NOTIFY instead. 608 * 609 * However, we also need to workaround the qword write 610 * incoherence by flushing the 6 PIPE_NOTIFY buffers out to 611 * memory before requesting an interrupt. 612 */ 613 ret = intel_ring_begin(ring, 32); 614 if (ret) 615 return ret; 616 617 intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE | 618 PIPE_CONTROL_WRITE_FLUSH | 619 PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE); 620 intel_ring_emit(ring, pc->gtt_offset | PIPE_CONTROL_GLOBAL_GTT); 621 intel_ring_emit(ring, seqno); 622 intel_ring_emit(ring, 0); 623 PIPE_CONTROL_FLUSH(ring, scratch_addr); 624 scratch_addr += 128; /* write to separate cachelines */ 625 PIPE_CONTROL_FLUSH(ring, scratch_addr); 626 scratch_addr += 128; 627 PIPE_CONTROL_FLUSH(ring, scratch_addr); 628 scratch_addr += 128; 629 PIPE_CONTROL_FLUSH(ring, scratch_addr); 630 scratch_addr += 128; 631 PIPE_CONTROL_FLUSH(ring, scratch_addr); 632 scratch_addr += 128; 633 PIPE_CONTROL_FLUSH(ring, scratch_addr); 634 intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE | 635 PIPE_CONTROL_WRITE_FLUSH | 636 PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE | 637 PIPE_CONTROL_NOTIFY); 638 intel_ring_emit(ring, pc->gtt_offset | PIPE_CONTROL_GLOBAL_GTT); 639 intel_ring_emit(ring, seqno); 640 intel_ring_emit(ring, 0); 641 intel_ring_advance(ring); 642 643 *result = seqno; 644 return 0; 645 } 646 647 static u32 648 gen6_ring_get_seqno(struct intel_ring_buffer *ring, bool lazy_coherency) 649 { 650 /* Workaround to force correct ordering between irq and seqno writes on 651 * ivb (and maybe also on snb) by reading from a CS register (like 652 * ACTHD) before reading the status page. */ 653 if (!lazy_coherency) 654 intel_ring_get_active_head(ring); 655 return intel_read_status_page(ring, I915_GEM_HWS_INDEX); 656 } 657 658 static u32 659 ring_get_seqno(struct intel_ring_buffer *ring, bool lazy_coherency) 660 { 661 return intel_read_status_page(ring, I915_GEM_HWS_INDEX); 662 } 663 664 static u32 665 pc_render_get_seqno(struct intel_ring_buffer *ring, bool lazy_coherency) 666 { 667 struct pipe_control *pc = ring->private; 668 return pc->cpu_page[0]; 669 } 670 671 static bool 672 gen5_ring_get_irq(struct intel_ring_buffer *ring) 673 { 674 struct drm_device *dev = ring->dev; 675 drm_i915_private_t *dev_priv = dev->dev_private; 676 677 if (!dev->irq_enabled) 678 return false; 679 680 lockmgr(&dev_priv->irq_lock, LK_EXCLUSIVE); 681 if (ring->irq_refcount++ == 0) { 682 dev_priv->gt_irq_mask &= ~ring->irq_enable_mask; 683 I915_WRITE(GTIMR, dev_priv->gt_irq_mask); 684 POSTING_READ(GTIMR); 685 } 686 lockmgr(&dev_priv->irq_lock, LK_RELEASE); 687 688 return true; 689 } 690 691 static void 692 gen5_ring_put_irq(struct intel_ring_buffer *ring) 693 { 694 struct drm_device *dev = ring->dev; 695 drm_i915_private_t *dev_priv = dev->dev_private; 696 697 lockmgr(&dev_priv->irq_lock, LK_EXCLUSIVE); 698 if (--ring->irq_refcount == 0) { 699 dev_priv->gt_irq_mask |= ring->irq_enable_mask; 700 I915_WRITE(GTIMR, dev_priv->gt_irq_mask); 701 POSTING_READ(GTIMR); 702 } 703 lockmgr(&dev_priv->irq_lock, LK_RELEASE); 704 } 705 706 static bool 707 i9xx_ring_get_irq(struct intel_ring_buffer *ring) 708 { 709 struct drm_device *dev = ring->dev; 710 drm_i915_private_t *dev_priv = dev->dev_private; 711 712 if (!dev->irq_enabled) 713 return false; 714 715 lockmgr(&dev_priv->irq_lock, LK_EXCLUSIVE); 716 if (ring->irq_refcount++ == 0) { 717 dev_priv->irq_mask &= ~ring->irq_enable_mask; 718 I915_WRITE(IMR, dev_priv->irq_mask); 719 POSTING_READ(IMR); 720 } 721 lockmgr(&dev_priv->irq_lock, LK_RELEASE); 722 723 return true; 724 } 725 726 static void 727 i9xx_ring_put_irq(struct intel_ring_buffer *ring) 728 { 729 struct drm_device *dev = ring->dev; 730 drm_i915_private_t *dev_priv = dev->dev_private; 731 732 lockmgr(&dev_priv->irq_lock, LK_EXCLUSIVE); 733 if (--ring->irq_refcount == 0) { 734 dev_priv->irq_mask |= ring->irq_enable_mask; 735 I915_WRITE(IMR, dev_priv->irq_mask); 736 POSTING_READ(IMR); 737 } 738 lockmgr(&dev_priv->irq_lock, LK_RELEASE); 739 } 740 741 static bool 742 i8xx_ring_get_irq(struct intel_ring_buffer *ring) 743 { 744 struct drm_device *dev = ring->dev; 745 drm_i915_private_t *dev_priv = dev->dev_private; 746 747 if (!dev->irq_enabled) 748 return false; 749 750 lockmgr(&dev_priv->irq_lock, LK_EXCLUSIVE); 751 if (ring->irq_refcount++ == 0) { 752 dev_priv->irq_mask &= ~ring->irq_enable_mask; 753 I915_WRITE16(IMR, dev_priv->irq_mask); 754 POSTING_READ16(IMR); 755 } 756 lockmgr(&dev_priv->irq_lock, LK_RELEASE); 757 758 return true; 759 } 760 761 static void 762 i8xx_ring_put_irq(struct intel_ring_buffer *ring) 763 { 764 struct drm_device *dev = ring->dev; 765 drm_i915_private_t *dev_priv = dev->dev_private; 766 767 lockmgr(&dev_priv->irq_lock, LK_EXCLUSIVE); 768 if (--ring->irq_refcount == 0) { 769 dev_priv->irq_mask |= ring->irq_enable_mask; 770 I915_WRITE16(IMR, dev_priv->irq_mask); 771 POSTING_READ16(IMR); 772 } 773 lockmgr(&dev_priv->irq_lock, LK_RELEASE); 774 } 775 776 void intel_ring_setup_status_page(struct intel_ring_buffer *ring) 777 { 778 struct drm_device *dev = ring->dev; 779 drm_i915_private_t *dev_priv = dev->dev_private; 780 uint32_t mmio = 0; 781 782 /* The ring status page addresses are no longer next to the rest of 783 * the ring registers as of gen7. 784 */ 785 if (IS_GEN7(dev)) { 786 switch (ring->id) { 787 case RCS: 788 mmio = RENDER_HWS_PGA_GEN7; 789 break; 790 case BCS: 791 mmio = BLT_HWS_PGA_GEN7; 792 break; 793 case VCS: 794 mmio = BSD_HWS_PGA_GEN7; 795 break; 796 } 797 } else if (IS_GEN6(dev)) { 798 mmio = RING_HWS_PGA_GEN6(ring->mmio_base); 799 } else { 800 mmio = RING_HWS_PGA(ring->mmio_base); 801 } 802 803 I915_WRITE(mmio, (u32)ring->status_page.gfx_addr); 804 POSTING_READ(mmio); 805 } 806 807 static int 808 bsd_ring_flush(struct intel_ring_buffer *ring, 809 uint32_t invalidate_domains, 810 uint32_t flush_domains) 811 { 812 int ret; 813 814 ret = intel_ring_begin(ring, 2); 815 if (ret) 816 return ret; 817 818 intel_ring_emit(ring, MI_FLUSH); 819 intel_ring_emit(ring, MI_NOOP); 820 intel_ring_advance(ring); 821 return 0; 822 } 823 824 static int 825 i9xx_add_request(struct intel_ring_buffer *ring, 826 u32 *result) 827 { 828 uint32_t seqno; 829 int ret; 830 831 ret = intel_ring_begin(ring, 4); 832 if (ret) 833 return ret; 834 835 seqno = i915_gem_next_request_seqno(ring); 836 837 intel_ring_emit(ring, MI_STORE_DWORD_INDEX); 838 intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT); 839 intel_ring_emit(ring, seqno); 840 intel_ring_emit(ring, MI_USER_INTERRUPT); 841 intel_ring_advance(ring); 842 843 *result = seqno; 844 return 0; 845 } 846 847 static bool 848 gen6_ring_get_irq(struct intel_ring_buffer *ring) 849 { 850 struct drm_device *dev = ring->dev; 851 drm_i915_private_t *dev_priv = dev->dev_private; 852 853 if (!dev->irq_enabled) 854 return false; 855 856 /* It looks like we need to prevent the gt from suspending while waiting 857 * for an notifiy irq, otherwise irqs seem to get lost on at least the 858 * blt/bsd rings on ivb. */ 859 gen6_gt_force_wake_get(dev_priv); 860 861 lockmgr(&dev_priv->irq_lock, LK_EXCLUSIVE); 862 if (ring->irq_refcount++ == 0) { 863 if (HAS_L3_GPU_CACHE(dev) && ring->id == RCS) 864 I915_WRITE_IMR(ring, ~(ring->irq_enable_mask | 865 GEN6_RENDER_L3_PARITY_ERROR)); 866 else 867 I915_WRITE_IMR(ring, ~ring->irq_enable_mask); 868 dev_priv->gt_irq_mask &= ~ring->irq_enable_mask; 869 I915_WRITE(GTIMR, dev_priv->gt_irq_mask); 870 POSTING_READ(GTIMR); 871 } 872 lockmgr(&dev_priv->irq_lock, LK_RELEASE); 873 874 return true; 875 } 876 877 static void 878 gen6_ring_put_irq(struct intel_ring_buffer *ring) 879 { 880 struct drm_device *dev = ring->dev; 881 drm_i915_private_t *dev_priv = dev->dev_private; 882 883 lockmgr(&dev_priv->irq_lock, LK_EXCLUSIVE); 884 if (--ring->irq_refcount == 0) { 885 if (HAS_L3_GPU_CACHE(dev) && ring->id == RCS) 886 I915_WRITE_IMR(ring, ~GEN6_RENDER_L3_PARITY_ERROR); 887 else 888 I915_WRITE_IMR(ring, ~0); 889 dev_priv->gt_irq_mask |= ring->irq_enable_mask; 890 I915_WRITE(GTIMR, dev_priv->gt_irq_mask); 891 POSTING_READ(GTIMR); 892 } 893 lockmgr(&dev_priv->irq_lock, LK_RELEASE); 894 895 gen6_gt_force_wake_put(dev_priv); 896 } 897 898 static int 899 i965_dispatch_execbuffer(struct intel_ring_buffer *ring, u32 offset, u32 length) 900 { 901 int ret; 902 903 ret = intel_ring_begin(ring, 2); 904 if (ret) 905 return ret; 906 907 intel_ring_emit(ring, 908 MI_BATCH_BUFFER_START | 909 MI_BATCH_NON_SECURE_I965); 910 intel_ring_emit(ring, offset); 911 intel_ring_advance(ring); 912 913 return 0; 914 } 915 916 static int 917 i830_dispatch_execbuffer(struct intel_ring_buffer *ring, 918 u32 offset, u32 len) 919 { 920 int ret; 921 922 ret = intel_ring_begin(ring, 4); 923 if (ret) 924 return ret; 925 926 intel_ring_emit(ring, MI_BATCH_BUFFER); 927 intel_ring_emit(ring, offset | MI_BATCH_NON_SECURE); 928 intel_ring_emit(ring, offset + len - 8); 929 intel_ring_emit(ring, 0); 930 intel_ring_advance(ring); 931 932 return 0; 933 } 934 935 static int 936 i915_dispatch_execbuffer(struct intel_ring_buffer *ring, 937 u32 offset, u32 len) 938 { 939 int ret; 940 unsigned flags = 0; 941 942 ret = intel_ring_begin(ring, 2); 943 if (ret) 944 return ret; 945 946 intel_ring_emit(ring, MI_BATCH_BUFFER_START | MI_BATCH_GTT); 947 intel_ring_emit(ring, offset | (flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE)); 948 intel_ring_advance(ring); 949 950 return 0; 951 } 952 953 static void cleanup_status_page(struct intel_ring_buffer *ring) 954 { 955 struct drm_i915_gem_object *obj; 956 957 obj = ring->status_page.obj; 958 if (obj == NULL) 959 return; 960 961 pmap_qremove((vm_offset_t)ring->status_page.page_addr, 1); 962 kmem_free(&kernel_map, (vm_offset_t)ring->status_page.page_addr, 963 PAGE_SIZE); 964 i915_gem_object_unpin(obj); 965 drm_gem_object_unreference(&obj->base); 966 ring->status_page.obj = NULL; 967 } 968 969 static int init_status_page(struct intel_ring_buffer *ring) 970 { 971 struct drm_device *dev = ring->dev; 972 struct drm_i915_gem_object *obj; 973 int ret; 974 975 obj = i915_gem_alloc_object(dev, 4096); 976 if (obj == NULL) { 977 DRM_ERROR("Failed to allocate status page\n"); 978 ret = -ENOMEM; 979 goto err; 980 } 981 982 i915_gem_object_set_cache_level(obj, I915_CACHE_LLC); 983 984 ret = i915_gem_object_pin(obj, 4096, true); 985 if (ret != 0) { 986 goto err_unref; 987 } 988 989 ring->status_page.gfx_addr = obj->gtt_offset; 990 ring->status_page.page_addr = (void *)kmem_alloc_nofault(&kernel_map, 991 PAGE_SIZE, PAGE_SIZE); 992 if (ring->status_page.page_addr == NULL) { 993 ret = -ENOMEM; 994 goto err_unpin; 995 } 996 pmap_qenter((vm_offset_t)ring->status_page.page_addr, &obj->pages[0], 997 1); 998 pmap_invalidate_cache_range((vm_offset_t)ring->status_page.page_addr, 999 (vm_offset_t)ring->status_page.page_addr + PAGE_SIZE); 1000 ring->status_page.obj = obj; 1001 memset(ring->status_page.page_addr, 0, PAGE_SIZE); 1002 1003 intel_ring_setup_status_page(ring); 1004 DRM_DEBUG("i915: init_status_page %s hws offset: 0x%08x\n", 1005 ring->name, ring->status_page.gfx_addr); 1006 1007 return 0; 1008 1009 err_unpin: 1010 i915_gem_object_unpin(obj); 1011 err_unref: 1012 drm_gem_object_unreference(&obj->base); 1013 err: 1014 return ret; 1015 } 1016 1017 static int init_phys_hws_pga(struct intel_ring_buffer *ring) 1018 { 1019 struct drm_i915_private *dev_priv = ring->dev->dev_private; 1020 u32 addr; 1021 1022 if (!dev_priv->status_page_dmah) { 1023 dev_priv->status_page_dmah = 1024 drm_pci_alloc(ring->dev, PAGE_SIZE, PAGE_SIZE, ~0); 1025 if (!dev_priv->status_page_dmah) 1026 return -ENOMEM; 1027 } 1028 1029 addr = dev_priv->status_page_dmah->busaddr; 1030 if (INTEL_INFO(ring->dev)->gen >= 4) 1031 addr |= (dev_priv->status_page_dmah->busaddr >> 28) & 0xf0; 1032 I915_WRITE(HWS_PGA, addr); 1033 1034 ring->status_page.page_addr = dev_priv->status_page_dmah->vaddr; 1035 memset(ring->status_page.page_addr, 0, PAGE_SIZE); 1036 1037 return 0; 1038 } 1039 1040 static inline void __iomem *ioremap_wc(resource_size_t phys_addr, unsigned long size) 1041 { 1042 return pmap_mapdev_attr(phys_addr, size, VM_MEMATTR_WRITE_COMBINING); 1043 } 1044 1045 static int intel_init_ring_buffer(struct drm_device *dev, 1046 struct intel_ring_buffer *ring) 1047 { 1048 struct drm_i915_gem_object *obj; 1049 int ret; 1050 1051 ring->dev = dev; 1052 INIT_LIST_HEAD(&ring->active_list); 1053 INIT_LIST_HEAD(&ring->request_list); 1054 ring->size = 32 * PAGE_SIZE; 1055 memset(ring->sync_seqno, 0, sizeof(ring->sync_seqno)); 1056 1057 init_waitqueue_head(&ring->irq_queue); 1058 1059 if (I915_NEED_GFX_HWS(dev)) { 1060 ret = init_status_page(ring); 1061 if (ret) 1062 return ret; 1063 } else { 1064 BUG_ON(ring->id != RCS); 1065 ret = init_phys_hws_pga(ring); 1066 if (ret) 1067 return ret; 1068 } 1069 1070 obj = i915_gem_alloc_object(dev, ring->size); 1071 if (obj == NULL) { 1072 DRM_ERROR("Failed to allocate ringbuffer\n"); 1073 ret = -ENOMEM; 1074 goto err_hws; 1075 } 1076 1077 ring->obj = obj; 1078 1079 ret = i915_gem_object_pin(obj, PAGE_SIZE, true); 1080 if (ret) 1081 goto err_unref; 1082 1083 ret = i915_gem_object_set_to_gtt_domain(obj, true); 1084 if (ret) 1085 goto err_unpin; 1086 1087 ring->virtual_start = ioremap_wc(dev->agp->base + obj->gtt_offset, 1088 ring->size); 1089 if (ring->virtual_start == NULL) { 1090 DRM_ERROR("Failed to map ringbuffer.\n"); 1091 ret = -EINVAL; 1092 goto err_unpin; 1093 } 1094 1095 ret = ring->init(ring); 1096 if (ret) 1097 goto err_unmap; 1098 1099 /* Workaround an erratum on the i830 which causes a hang if 1100 * the TAIL pointer points to within the last 2 cachelines 1101 * of the buffer. 1102 */ 1103 ring->effective_size = ring->size; 1104 if (IS_I830(ring->dev) || IS_845G(ring->dev)) 1105 ring->effective_size -= 128; 1106 1107 return 0; 1108 1109 err_unmap: 1110 pmap_unmapdev((vm_offset_t)ring->virtual_start, ring->size); 1111 err_unpin: 1112 i915_gem_object_unpin(obj); 1113 err_unref: 1114 drm_gem_object_unreference(&obj->base); 1115 ring->obj = NULL; 1116 err_hws: 1117 cleanup_status_page(ring); 1118 return ret; 1119 } 1120 1121 void intel_cleanup_ring_buffer(struct intel_ring_buffer *ring) 1122 { 1123 struct drm_i915_private *dev_priv; 1124 int ret; 1125 1126 if (ring->obj == NULL) 1127 return; 1128 1129 /* Disable the ring buffer. The ring must be idle at this point */ 1130 dev_priv = ring->dev->dev_private; 1131 ret = intel_ring_idle(ring); 1132 if (ret) 1133 DRM_ERROR("failed to quiesce %s whilst cleaning up: %d\n", 1134 ring->name, ret); 1135 1136 I915_WRITE_CTL(ring, 0); 1137 1138 pmap_unmapdev((vm_offset_t)ring->virtual_start, ring->size); 1139 1140 i915_gem_object_unpin(ring->obj); 1141 drm_gem_object_unreference(&ring->obj->base); 1142 ring->obj = NULL; 1143 1144 if (ring->cleanup) 1145 ring->cleanup(ring); 1146 1147 cleanup_status_page(ring); 1148 } 1149 1150 static int intel_ring_wait_seqno(struct intel_ring_buffer *ring, u32 seqno) 1151 { 1152 int ret; 1153 1154 ret = i915_wait_seqno(ring, seqno); 1155 if (!ret) 1156 i915_gem_retire_requests_ring(ring); 1157 1158 return ret; 1159 } 1160 1161 static int intel_ring_wait_request(struct intel_ring_buffer *ring, int n) 1162 { 1163 struct drm_i915_gem_request *request; 1164 u32 seqno = 0; 1165 int ret; 1166 1167 i915_gem_retire_requests_ring(ring); 1168 1169 if (ring->last_retired_head != -1) { 1170 ring->head = ring->last_retired_head; 1171 ring->last_retired_head = -1; 1172 ring->space = ring_space(ring); 1173 if (ring->space >= n) 1174 return 0; 1175 } 1176 1177 list_for_each_entry(request, &ring->request_list, list) { 1178 int space; 1179 1180 if (request->tail == -1) 1181 continue; 1182 1183 space = request->tail - (ring->tail + 8); 1184 if (space < 0) 1185 space += ring->size; 1186 if (space >= n) { 1187 seqno = request->seqno; 1188 break; 1189 } 1190 1191 /* Consume this request in case we need more space than 1192 * is available and so need to prevent a race between 1193 * updating last_retired_head and direct reads of 1194 * I915_RING_HEAD. It also provides a nice sanity check. 1195 */ 1196 request->tail = -1; 1197 } 1198 1199 if (seqno == 0) 1200 return -ENOSPC; 1201 1202 ret = intel_ring_wait_seqno(ring, seqno); 1203 if (ret) 1204 return ret; 1205 1206 if (ring->last_retired_head == -1) 1207 return -ENOSPC; 1208 1209 ring->head = ring->last_retired_head; 1210 ring->last_retired_head = -1; 1211 ring->space = ring_space(ring); 1212 if (ring->space < n) 1213 return -ENOSPC; 1214 1215 return 0; 1216 } 1217 1218 static int ring_wait_for_space(struct intel_ring_buffer *ring, int n) 1219 { 1220 struct drm_device *dev = ring->dev; 1221 struct drm_i915_private *dev_priv = dev->dev_private; 1222 unsigned long end; 1223 int ret; 1224 1225 ret = intel_ring_wait_request(ring, n); 1226 if (ret != -ENOSPC) 1227 return ret; 1228 1229 /* With GEM the hangcheck timer should kick us out of the loop, 1230 * leaving it early runs the risk of corrupting GEM state (due 1231 * to running on almost untested codepaths). But on resume 1232 * timers don't work yet, so prevent a complete hang in that 1233 * case by choosing an insanely large timeout. */ 1234 end = ticks + 60 * hz; 1235 1236 do { 1237 ring->head = I915_READ_HEAD(ring); 1238 ring->space = ring_space(ring); 1239 if (ring->space >= n) { 1240 return 0; 1241 } 1242 1243 #if 0 1244 if (dev->primary->master) { 1245 struct drm_i915_master_private *master_priv = dev->primary->master->driver_priv; 1246 if (master_priv->sarea_priv) 1247 master_priv->sarea_priv->perf_boxes |= I915_BOX_WAIT; 1248 } 1249 #else 1250 if (dev_priv->sarea_priv) 1251 dev_priv->sarea_priv->perf_boxes |= I915_BOX_WAIT; 1252 #endif 1253 1254 DELAY(1000); 1255 1256 ret = i915_gem_check_wedge(dev_priv, dev_priv->mm.interruptible); 1257 if (ret) 1258 return ret; 1259 } while (!time_after(ticks, end)); 1260 return -EBUSY; 1261 } 1262 1263 static int intel_wrap_ring_buffer(struct intel_ring_buffer *ring) 1264 { 1265 uint32_t __iomem *virt; 1266 int rem = ring->size - ring->tail; 1267 1268 if (ring->space < rem) { 1269 int ret = ring_wait_for_space(ring, rem); 1270 if (ret) 1271 return ret; 1272 } 1273 1274 virt = (unsigned int *)((char *)ring->virtual_start + ring->tail); 1275 rem /= 4; 1276 while (rem--) 1277 iowrite32(MI_NOOP, virt++); 1278 1279 ring->tail = 0; 1280 ring->space = ring_space(ring); 1281 1282 return 0; 1283 } 1284 1285 int intel_ring_idle(struct intel_ring_buffer *ring) 1286 { 1287 return ring_wait_for_space(ring, ring->size - 8); 1288 } 1289 1290 int intel_ring_begin(struct intel_ring_buffer *ring, 1291 int num_dwords) 1292 { 1293 struct drm_i915_private *dev_priv = ring->dev->dev_private; 1294 int n = 4*num_dwords; 1295 int ret; 1296 1297 ret = i915_gem_check_wedge(dev_priv, dev_priv->mm.interruptible); 1298 if (ret) 1299 return ret; 1300 1301 if (unlikely(ring->tail + n > ring->effective_size)) { 1302 ret = intel_wrap_ring_buffer(ring); 1303 if (unlikely(ret)) 1304 return ret; 1305 } 1306 1307 if (unlikely(ring->space < n)) { 1308 ret = ring_wait_for_space(ring, n); 1309 if (unlikely(ret)) 1310 return ret; 1311 } 1312 1313 ring->space -= n; 1314 return 0; 1315 } 1316 1317 void intel_ring_advance(struct intel_ring_buffer *ring) 1318 { 1319 struct drm_i915_private *dev_priv = ring->dev->dev_private; 1320 1321 ring->tail &= ring->size - 1; 1322 if (dev_priv->stop_rings & intel_ring_flag(ring)) 1323 return; 1324 ring->write_tail(ring, ring->tail); 1325 } 1326 1327 static void gen6_bsd_ring_write_tail(struct intel_ring_buffer *ring, 1328 u32 value) 1329 { 1330 drm_i915_private_t *dev_priv = ring->dev->dev_private; 1331 1332 /* Every tail move must follow the sequence below */ 1333 1334 /* Disable notification that the ring is IDLE. The GT 1335 * will then assume that it is busy and bring it out of rc6. 1336 */ 1337 I915_WRITE(GEN6_BSD_SLEEP_PSMI_CONTROL, 1338 _MASKED_BIT_ENABLE(GEN6_BSD_SLEEP_MSG_DISABLE)); 1339 1340 /* Clear the context id. Here be magic! */ 1341 I915_WRITE64(GEN6_BSD_RNCID, 0x0); 1342 1343 /* Wait for the ring not to be idle, i.e. for it to wake up. */ 1344 if (wait_for((I915_READ(GEN6_BSD_SLEEP_PSMI_CONTROL) & 1345 GEN6_BSD_SLEEP_INDICATOR) == 0, 1346 50)) 1347 DRM_ERROR("timed out waiting for the BSD ring to wake up\n"); 1348 1349 /* Now that the ring is fully powered up, update the tail */ 1350 I915_WRITE_TAIL(ring, value); 1351 POSTING_READ(RING_TAIL(ring->mmio_base)); 1352 1353 /* Let the ring send IDLE messages to the GT again, 1354 * and so let it sleep to conserve power when idle. 1355 */ 1356 I915_WRITE(GEN6_BSD_SLEEP_PSMI_CONTROL, 1357 _MASKED_BIT_DISABLE(GEN6_BSD_SLEEP_MSG_DISABLE)); 1358 } 1359 1360 static int gen6_ring_flush(struct intel_ring_buffer *ring, 1361 uint32_t invalidate, uint32_t flush) 1362 { 1363 uint32_t cmd; 1364 int ret; 1365 1366 ret = intel_ring_begin(ring, 4); 1367 if (ret) 1368 return ret; 1369 1370 cmd = MI_FLUSH_DW; 1371 if (invalidate & I915_GEM_GPU_DOMAINS) 1372 cmd |= MI_INVALIDATE_TLB | MI_INVALIDATE_BSD; 1373 intel_ring_emit(ring, cmd); 1374 intel_ring_emit(ring, 0); 1375 intel_ring_emit(ring, 0); 1376 intel_ring_emit(ring, MI_NOOP); 1377 intel_ring_advance(ring); 1378 return 0; 1379 } 1380 1381 static int 1382 gen6_ring_dispatch_execbuffer(struct intel_ring_buffer *ring, 1383 uint32_t offset, uint32_t len) 1384 { 1385 int ret; 1386 1387 ret = intel_ring_begin(ring, 2); 1388 if (ret) 1389 return ret; 1390 1391 intel_ring_emit(ring, MI_BATCH_BUFFER_START | MI_BATCH_NON_SECURE_I965); 1392 /* bit0-7 is the length on GEN6+ */ 1393 intel_ring_emit(ring, offset); 1394 intel_ring_advance(ring); 1395 1396 return 0; 1397 } 1398 1399 /* Blitter support (SandyBridge+) */ 1400 1401 static int blt_ring_flush(struct intel_ring_buffer *ring, 1402 uint32_t invalidate, uint32_t flush) 1403 { 1404 uint32_t cmd; 1405 int ret; 1406 1407 ret = intel_ring_begin(ring, 4); 1408 if (ret) 1409 return ret; 1410 1411 cmd = MI_FLUSH_DW; 1412 if (invalidate & I915_GEM_DOMAIN_RENDER) 1413 cmd |= MI_INVALIDATE_TLB; 1414 intel_ring_emit(ring, cmd); 1415 intel_ring_emit(ring, 0); 1416 intel_ring_emit(ring, 0); 1417 intel_ring_emit(ring, MI_NOOP); 1418 intel_ring_advance(ring); 1419 return 0; 1420 } 1421 1422 int intel_init_render_ring_buffer(struct drm_device *dev) 1423 { 1424 drm_i915_private_t *dev_priv = dev->dev_private; 1425 struct intel_ring_buffer *ring = &dev_priv->ring[RCS]; 1426 1427 ring->name = "render ring"; 1428 ring->id = RCS; 1429 ring->mmio_base = RENDER_RING_BASE; 1430 1431 if (INTEL_INFO(dev)->gen >= 6) { 1432 ring->add_request = gen6_add_request; 1433 ring->flush = gen6_render_ring_flush; 1434 ring->irq_get = gen6_ring_get_irq; 1435 ring->irq_put = gen6_ring_put_irq; 1436 ring->irq_enable_mask = GT_USER_INTERRUPT; 1437 ring->get_seqno = gen6_ring_get_seqno; 1438 ring->sync_to = gen6_ring_sync; 1439 ring->semaphore_register[0] = MI_SEMAPHORE_SYNC_INVALID; 1440 ring->semaphore_register[1] = MI_SEMAPHORE_SYNC_RV; 1441 ring->semaphore_register[2] = MI_SEMAPHORE_SYNC_RB; 1442 ring->signal_mbox[0] = GEN6_VRSYNC; 1443 ring->signal_mbox[1] = GEN6_BRSYNC; 1444 } else if (IS_GEN5(dev)) { 1445 ring->add_request = pc_render_add_request; 1446 ring->flush = gen4_render_ring_flush; 1447 ring->get_seqno = pc_render_get_seqno; 1448 ring->irq_get = gen5_ring_get_irq; 1449 ring->irq_put = gen5_ring_put_irq; 1450 ring->irq_enable_mask = GT_USER_INTERRUPT | GT_PIPE_NOTIFY; 1451 } else { 1452 ring->add_request = i9xx_add_request; 1453 if (INTEL_INFO(dev)->gen < 4) 1454 ring->flush = gen2_render_ring_flush; 1455 else 1456 ring->flush = gen4_render_ring_flush; 1457 ring->get_seqno = ring_get_seqno; 1458 if (IS_GEN2(dev)) { 1459 ring->irq_get = i8xx_ring_get_irq; 1460 ring->irq_put = i8xx_ring_put_irq; 1461 } else { 1462 ring->irq_get = i9xx_ring_get_irq; 1463 ring->irq_put = i9xx_ring_put_irq; 1464 } 1465 ring->irq_enable_mask = I915_USER_INTERRUPT; 1466 } 1467 ring->write_tail = ring_write_tail; 1468 if (INTEL_INFO(dev)->gen >= 6) 1469 ring->dispatch_execbuffer = gen6_ring_dispatch_execbuffer; 1470 else if (INTEL_INFO(dev)->gen >= 4) 1471 ring->dispatch_execbuffer = i965_dispatch_execbuffer; 1472 else if (IS_I830(dev) || IS_845G(dev)) 1473 ring->dispatch_execbuffer = i830_dispatch_execbuffer; 1474 else 1475 ring->dispatch_execbuffer = i915_dispatch_execbuffer; 1476 ring->init = init_render_ring; 1477 ring->cleanup = render_ring_cleanup; 1478 1479 if (!I915_NEED_GFX_HWS(dev)) { 1480 ring->status_page.page_addr = dev_priv->status_page_dmah->vaddr; 1481 memset(ring->status_page.page_addr, 0, PAGE_SIZE); 1482 } 1483 1484 return intel_init_ring_buffer(dev, ring); 1485 } 1486 1487 int intel_render_ring_init_dri(struct drm_device *dev, u64 start, u32 size) 1488 { 1489 drm_i915_private_t *dev_priv = dev->dev_private; 1490 struct intel_ring_buffer *ring = &dev_priv->ring[RCS]; 1491 1492 ring->name = "render ring"; 1493 ring->id = RCS; 1494 ring->mmio_base = RENDER_RING_BASE; 1495 1496 if (INTEL_INFO(dev)->gen >= 6) { 1497 /* non-kms not supported on gen6+ */ 1498 return -ENODEV; 1499 } 1500 1501 /* Note: gem is not supported on gen5/ilk without kms (the corresponding 1502 * gem_init ioctl returns with -ENODEV). Hence we do not need to set up 1503 * the special gen5 functions. */ 1504 ring->add_request = i9xx_add_request; 1505 if (INTEL_INFO(dev)->gen < 4) 1506 ring->flush = gen2_render_ring_flush; 1507 else 1508 ring->flush = gen4_render_ring_flush; 1509 ring->get_seqno = ring_get_seqno; 1510 if (IS_GEN2(dev)) { 1511 ring->irq_get = i8xx_ring_get_irq; 1512 ring->irq_put = i8xx_ring_put_irq; 1513 } else { 1514 ring->irq_get = i9xx_ring_get_irq; 1515 ring->irq_put = i9xx_ring_put_irq; 1516 } 1517 ring->irq_enable_mask = I915_USER_INTERRUPT; 1518 ring->write_tail = ring_write_tail; 1519 if (INTEL_INFO(dev)->gen >= 4) 1520 ring->dispatch_execbuffer = i965_dispatch_execbuffer; 1521 else if (IS_I830(dev) || IS_845G(dev)) 1522 ring->dispatch_execbuffer = i830_dispatch_execbuffer; 1523 else 1524 ring->dispatch_execbuffer = i915_dispatch_execbuffer; 1525 ring->init = init_render_ring; 1526 ring->cleanup = render_ring_cleanup; 1527 1528 if (!I915_NEED_GFX_HWS(dev)) 1529 ring->status_page.page_addr = dev_priv->status_page_dmah->vaddr; 1530 1531 ring->dev = dev; 1532 INIT_LIST_HEAD(&ring->active_list); 1533 INIT_LIST_HEAD(&ring->request_list); 1534 INIT_LIST_HEAD(&ring->gpu_write_list); 1535 1536 ring->size = size; 1537 ring->effective_size = ring->size; 1538 if (IS_I830(ring->dev)) 1539 ring->effective_size -= 128; 1540 1541 ring->virtual_start = ioremap_wc(start, size); 1542 if (ring->virtual_start == NULL) { 1543 DRM_ERROR("can not ioremap virtual address for" 1544 " ring buffer\n"); 1545 return -ENOMEM; 1546 } 1547 1548 return 0; 1549 } 1550 1551 int intel_init_bsd_ring_buffer(struct drm_device *dev) 1552 { 1553 drm_i915_private_t *dev_priv = dev->dev_private; 1554 struct intel_ring_buffer *ring = &dev_priv->ring[VCS]; 1555 1556 ring->name = "bsd ring"; 1557 ring->id = VCS; 1558 1559 ring->write_tail = ring_write_tail; 1560 if (IS_GEN6(dev) || IS_GEN7(dev)) { 1561 ring->mmio_base = GEN6_BSD_RING_BASE; 1562 /* gen6 bsd needs a special wa for tail updates */ 1563 if (IS_GEN6(dev)) 1564 ring->write_tail = gen6_bsd_ring_write_tail; 1565 ring->flush = gen6_ring_flush; 1566 ring->add_request = gen6_add_request; 1567 ring->get_seqno = gen6_ring_get_seqno; 1568 ring->irq_enable_mask = GEN6_BSD_USER_INTERRUPT; 1569 ring->irq_get = gen6_ring_get_irq; 1570 ring->irq_put = gen6_ring_put_irq; 1571 ring->dispatch_execbuffer = gen6_ring_dispatch_execbuffer; 1572 ring->sync_to = gen6_ring_sync; 1573 ring->semaphore_register[0] = MI_SEMAPHORE_SYNC_VR; 1574 ring->semaphore_register[1] = MI_SEMAPHORE_SYNC_INVALID; 1575 ring->semaphore_register[2] = MI_SEMAPHORE_SYNC_VB; 1576 ring->signal_mbox[0] = GEN6_RVSYNC; 1577 ring->signal_mbox[1] = GEN6_BVSYNC; 1578 } else { 1579 ring->mmio_base = BSD_RING_BASE; 1580 ring->flush = bsd_ring_flush; 1581 ring->add_request = i9xx_add_request; 1582 ring->get_seqno = ring_get_seqno; 1583 if (IS_GEN5(dev)) { 1584 ring->irq_enable_mask = GT_BSD_USER_INTERRUPT; 1585 ring->irq_get = gen5_ring_get_irq; 1586 ring->irq_put = gen5_ring_put_irq; 1587 } else { 1588 ring->irq_enable_mask = I915_BSD_USER_INTERRUPT; 1589 ring->irq_get = i9xx_ring_get_irq; 1590 ring->irq_put = i9xx_ring_put_irq; 1591 } 1592 ring->dispatch_execbuffer = i965_dispatch_execbuffer; 1593 } 1594 ring->init = init_ring_common; 1595 1596 return intel_init_ring_buffer(dev, ring); 1597 } 1598 1599 int intel_init_blt_ring_buffer(struct drm_device *dev) 1600 { 1601 drm_i915_private_t *dev_priv = dev->dev_private; 1602 struct intel_ring_buffer *ring = &dev_priv->ring[BCS]; 1603 1604 ring->name = "blitter ring"; 1605 ring->id = BCS; 1606 1607 ring->mmio_base = BLT_RING_BASE; 1608 ring->write_tail = ring_write_tail; 1609 ring->flush = blt_ring_flush; 1610 ring->add_request = gen6_add_request; 1611 ring->get_seqno = gen6_ring_get_seqno; 1612 ring->irq_enable_mask = GEN6_BLITTER_USER_INTERRUPT; 1613 ring->irq_get = gen6_ring_get_irq; 1614 ring->irq_put = gen6_ring_put_irq; 1615 ring->dispatch_execbuffer = gen6_ring_dispatch_execbuffer; 1616 ring->sync_to = gen6_ring_sync; 1617 ring->semaphore_register[0] = MI_SEMAPHORE_SYNC_BR; 1618 ring->semaphore_register[1] = MI_SEMAPHORE_SYNC_BV; 1619 ring->semaphore_register[2] = MI_SEMAPHORE_SYNC_INVALID; 1620 ring->signal_mbox[0] = GEN6_RBSYNC; 1621 ring->signal_mbox[1] = GEN6_VBSYNC; 1622 ring->init = init_ring_common; 1623 1624 return intel_init_ring_buffer(dev, ring); 1625 } 1626 1627 int 1628 intel_ring_flush_all_caches(struct intel_ring_buffer *ring) 1629 { 1630 int ret; 1631 1632 if (!ring->gpu_caches_dirty) 1633 return 0; 1634 1635 ret = ring->flush(ring, 0, I915_GEM_GPU_DOMAINS); 1636 if (ret) 1637 return ret; 1638 1639 ring->gpu_caches_dirty = false; 1640 return 0; 1641 } 1642 1643 int 1644 intel_ring_invalidate_all_caches(struct intel_ring_buffer *ring) 1645 { 1646 uint32_t flush_domains; 1647 int ret; 1648 1649 flush_domains = 0; 1650 if (ring->gpu_caches_dirty) 1651 flush_domains = I915_GEM_GPU_DOMAINS; 1652 1653 ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, flush_domains); 1654 if (ret) 1655 return ret; 1656 1657 ring->gpu_caches_dirty = false; 1658 return 0; 1659 } 1660