xref: /dflybsd-src/sys/dev/drm/i915/intel_ringbuffer.c (revision a14160bc65a8e6493cc912bd27de0a8a52d98af7)
1 /*
2  * Copyright © 2008-2010 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Eric Anholt <eric@anholt.net>
25  *    Zou Nan hai <nanhai.zou@intel.com>
26  *    Xiang Hai hao<haihao.xiang@intel.com>
27  *
28  * $FreeBSD: head/sys/dev/drm2/i915/intel_ringbuffer.c 253709 2013-07-27 16:42:29Z kib $
29  */
30 
31 #include <drm/drmP.h>
32 #include <drm/i915_drm.h>
33 #include "i915_drv.h"
34 #include "intel_drv.h"
35 #include "intel_ringbuffer.h"
36 #include <sys/sched.h>
37 
38 /*
39  * 965+ support PIPE_CONTROL commands, which provide finer grained control
40  * over cache flushing.
41  */
42 struct pipe_control {
43 	struct drm_i915_gem_object *obj;
44 	volatile u32 *cpu_page;
45 	u32 gtt_offset;
46 };
47 
48 static inline int ring_space(struct intel_ring_buffer *ring)
49 {
50 	int space = (ring->head & HEAD_ADDR) - (ring->tail + I915_RING_FREE_SPACE);
51 	if (space < 0)
52 		space += ring->size;
53 	return space;
54 }
55 
56 static int
57 gen2_render_ring_flush(struct intel_ring_buffer *ring,
58 		       u32	invalidate_domains,
59 		       u32	flush_domains)
60 {
61 	u32 cmd;
62 	int ret;
63 
64 	cmd = MI_FLUSH;
65 	if (((invalidate_domains|flush_domains) & I915_GEM_DOMAIN_RENDER) == 0)
66 		cmd |= MI_NO_WRITE_FLUSH;
67 
68 	if (invalidate_domains & I915_GEM_DOMAIN_SAMPLER)
69 		cmd |= MI_READ_FLUSH;
70 
71 	ret = intel_ring_begin(ring, 2);
72 	if (ret)
73 		return ret;
74 
75 	intel_ring_emit(ring, cmd);
76 	intel_ring_emit(ring, MI_NOOP);
77 	intel_ring_advance(ring);
78 
79 	return 0;
80 }
81 
82 static int
83 gen4_render_ring_flush(struct intel_ring_buffer *ring,
84 		       u32	invalidate_domains,
85 		       u32	flush_domains)
86 {
87 	struct drm_device *dev = ring->dev;
88 	u32 cmd;
89 	int ret;
90 
91 	/*
92 	 * read/write caches:
93 	 *
94 	 * I915_GEM_DOMAIN_RENDER is always invalidated, but is
95 	 * only flushed if MI_NO_WRITE_FLUSH is unset.  On 965, it is
96 	 * also flushed at 2d versus 3d pipeline switches.
97 	 *
98 	 * read-only caches:
99 	 *
100 	 * I915_GEM_DOMAIN_SAMPLER is flushed on pre-965 if
101 	 * MI_READ_FLUSH is set, and is always flushed on 965.
102 	 *
103 	 * I915_GEM_DOMAIN_COMMAND may not exist?
104 	 *
105 	 * I915_GEM_DOMAIN_INSTRUCTION, which exists on 965, is
106 	 * invalidated when MI_EXE_FLUSH is set.
107 	 *
108 	 * I915_GEM_DOMAIN_VERTEX, which exists on 965, is
109 	 * invalidated with every MI_FLUSH.
110 	 *
111 	 * TLBs:
112 	 *
113 	 * On 965, TLBs associated with I915_GEM_DOMAIN_COMMAND
114 	 * and I915_GEM_DOMAIN_CPU in are invalidated at PTE write and
115 	 * I915_GEM_DOMAIN_RENDER and I915_GEM_DOMAIN_SAMPLER
116 	 * are flushed at any MI_FLUSH.
117 	 */
118 
119 	cmd = MI_FLUSH | MI_NO_WRITE_FLUSH;
120 	if ((invalidate_domains|flush_domains) & I915_GEM_DOMAIN_RENDER)
121 		cmd &= ~MI_NO_WRITE_FLUSH;
122 	if (invalidate_domains & I915_GEM_DOMAIN_INSTRUCTION)
123 		cmd |= MI_EXE_FLUSH;
124 
125 	if (invalidate_domains & I915_GEM_DOMAIN_COMMAND &&
126 	    (IS_G4X(dev) || IS_GEN5(dev)))
127 		cmd |= MI_INVALIDATE_ISP;
128 
129 	ret = intel_ring_begin(ring, 2);
130 	if (ret)
131 		return ret;
132 
133 	intel_ring_emit(ring, cmd);
134 	intel_ring_emit(ring, MI_NOOP);
135 	intel_ring_advance(ring);
136 
137 	return 0;
138 }
139 
140 /**
141  * Emits a PIPE_CONTROL with a non-zero post-sync operation, for
142  * implementing two workarounds on gen6.  From section 1.4.7.1
143  * "PIPE_CONTROL" of the Sandy Bridge PRM volume 2 part 1:
144  *
145  * [DevSNB-C+{W/A}] Before any depth stall flush (including those
146  * produced by non-pipelined state commands), software needs to first
147  * send a PIPE_CONTROL with no bits set except Post-Sync Operation !=
148  * 0.
149  *
150  * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush Enable
151  * =1, a PIPE_CONTROL with any non-zero post-sync-op is required.
152  *
153  * And the workaround for these two requires this workaround first:
154  *
155  * [Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent
156  * BEFORE the pipe-control with a post-sync op and no write-cache
157  * flushes.
158  *
159  * And this last workaround is tricky because of the requirements on
160  * that bit.  From section 1.4.7.2.3 "Stall" of the Sandy Bridge PRM
161  * volume 2 part 1:
162  *
163  *     "1 of the following must also be set:
164  *      - Render Target Cache Flush Enable ([12] of DW1)
165  *      - Depth Cache Flush Enable ([0] of DW1)
166  *      - Stall at Pixel Scoreboard ([1] of DW1)
167  *      - Depth Stall ([13] of DW1)
168  *      - Post-Sync Operation ([13] of DW1)
169  *      - Notify Enable ([8] of DW1)"
170  *
171  * The cache flushes require the workaround flush that triggered this
172  * one, so we can't use it.  Depth stall would trigger the same.
173  * Post-sync nonzero is what triggered this second workaround, so we
174  * can't use that one either.  Notify enable is IRQs, which aren't
175  * really our business.  That leaves only stall at scoreboard.
176  */
177 static int
178 intel_emit_post_sync_nonzero_flush(struct intel_ring_buffer *ring)
179 {
180 	struct pipe_control *pc = ring->private;
181 	u32 scratch_addr = pc->gtt_offset + 128;
182 	int ret;
183 
184 
185 	ret = intel_ring_begin(ring, 6);
186 	if (ret)
187 		return ret;
188 
189 	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(5));
190 	intel_ring_emit(ring, PIPE_CONTROL_CS_STALL |
191 			PIPE_CONTROL_STALL_AT_SCOREBOARD);
192 	intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); /* address */
193 	intel_ring_emit(ring, 0); /* low dword */
194 	intel_ring_emit(ring, 0); /* high dword */
195 	intel_ring_emit(ring, MI_NOOP);
196 	intel_ring_advance(ring);
197 
198 	ret = intel_ring_begin(ring, 6);
199 	if (ret)
200 		return ret;
201 
202 	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(5));
203 	intel_ring_emit(ring, PIPE_CONTROL_QW_WRITE);
204 	intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); /* address */
205 	intel_ring_emit(ring, 0);
206 	intel_ring_emit(ring, 0);
207 	intel_ring_emit(ring, MI_NOOP);
208 	intel_ring_advance(ring);
209 
210 	return 0;
211 }
212 
213 static int
214 gen6_render_ring_flush(struct intel_ring_buffer *ring,
215                          u32 invalidate_domains, u32 flush_domains)
216 {
217 	u32 flags = 0;
218 	struct pipe_control *pc = ring->private;
219 	u32 scratch_addr = pc->gtt_offset + 128;
220 	int ret;
221 
222 	/* Force SNB workarounds for PIPE_CONTROL flushes */
223 	ret = intel_emit_post_sync_nonzero_flush(ring);
224 	if (ret)
225 		return ret;
226 
227 	/* Just flush everything.  Experiments have shown that reducing the
228 	 * number of bits based on the write domains has little performance
229 	 * impact.
230 	 */
231 	flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
232 	flags |= PIPE_CONTROL_TLB_INVALIDATE;
233 	flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
234 	flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
235 	flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
236 	flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
237 	flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
238 	flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
239 	/*
240 	 * Ensure that any following seqno writes only happen when the render
241 	 * cache is indeed flushed (but only if the caller actually wants that).
242 	 */
243 	if (flush_domains)
244 		flags |= PIPE_CONTROL_CS_STALL;
245 
246 	ret = intel_ring_begin(ring, 6);
247 	if (ret)
248 		return ret;
249 
250 	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(5));
251 	intel_ring_emit(ring, flags);
252 	intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT);
253 	intel_ring_emit(ring, 0); /* lower dword */
254 	intel_ring_emit(ring, 0); /* uppwer dword */
255 	intel_ring_emit(ring, MI_NOOP);
256 	intel_ring_advance(ring);
257 
258 	return 0;
259 }
260 
261 static void ring_write_tail(struct intel_ring_buffer *ring,
262 			    uint32_t value)
263 {
264 	drm_i915_private_t *dev_priv = ring->dev->dev_private;
265 	I915_WRITE_TAIL(ring, value);
266 }
267 
268 u32 intel_ring_get_active_head(struct intel_ring_buffer *ring)
269 {
270 	drm_i915_private_t *dev_priv = ring->dev->dev_private;
271 	uint32_t acthd_reg = INTEL_INFO(ring->dev)->gen >= 4 ?
272 			RING_ACTHD(ring->mmio_base) : ACTHD;
273 
274 	return I915_READ(acthd_reg);
275 }
276 
277 static int init_ring_common(struct intel_ring_buffer *ring)
278 {
279 	struct drm_device *dev = ring->dev;
280 	drm_i915_private_t *dev_priv = dev->dev_private;
281 	struct drm_i915_gem_object *obj = ring->obj;
282 	int ret = 0;
283 	uint32_t head;
284 
285 	if (HAS_FORCE_WAKE(dev))
286 		gen6_gt_force_wake_get(dev_priv);
287 
288 	/* Stop the ring if it's running. */
289 	I915_WRITE_CTL(ring, 0);
290 	I915_WRITE_HEAD(ring, 0);
291 	ring->write_tail(ring, 0);
292 
293 	/* Initialize the ring. */
294 	I915_WRITE_START(ring, obj->gtt_offset);
295 	head = I915_READ_HEAD(ring) & HEAD_ADDR;
296 
297 	/* G45 ring initialization fails to reset head to zero */
298 	if (head != 0) {
299 		DRM_DEBUG("%s head not reset to zero "
300 			      "ctl %08x head %08x tail %08x start %08x\n",
301 			      ring->name,
302 			      I915_READ_CTL(ring),
303 			      I915_READ_HEAD(ring),
304 			      I915_READ_TAIL(ring),
305 			      I915_READ_START(ring));
306 
307 		I915_WRITE_HEAD(ring, 0);
308 
309 		if (I915_READ_HEAD(ring) & HEAD_ADDR) {
310 			DRM_ERROR("failed to set %s head to zero "
311 				  "ctl %08x head %08x tail %08x start %08x\n",
312 				  ring->name,
313 				  I915_READ_CTL(ring),
314 				  I915_READ_HEAD(ring),
315 				  I915_READ_TAIL(ring),
316 				  I915_READ_START(ring));
317 		}
318 	}
319 
320 	I915_WRITE_CTL(ring,
321 			((ring->size - PAGE_SIZE) & RING_NR_PAGES)
322 			| RING_VALID);
323 
324 	/* If the head is still not zero, the ring is dead */
325 	if (_intel_wait_for(ring->dev,
326 	    (I915_READ_CTL(ring) & RING_VALID) != 0 &&
327 	     I915_READ_START(ring) == obj->gtt_offset &&
328 	     (I915_READ_HEAD(ring) & HEAD_ADDR) == 0,
329 	    50, 1, "915rii")) {
330 		DRM_ERROR("%s initialization failed "
331 				"ctl %08x head %08x tail %08x start %08x\n",
332 				ring->name,
333 				I915_READ_CTL(ring),
334 				I915_READ_HEAD(ring),
335 				I915_READ_TAIL(ring),
336 				I915_READ_START(ring));
337 		ret = -EIO;
338 		goto out;
339 	}
340 
341 	if (!drm_core_check_feature(ring->dev, DRIVER_MODESET))
342 		i915_kernel_lost_context(ring->dev);
343 	else {
344 		ring->head = I915_READ_HEAD(ring);
345 		ring->tail = I915_READ_TAIL(ring) & TAIL_ADDR;
346 		ring->space = ring_space(ring);
347 		ring->last_retired_head = -1;
348 	}
349 
350 out:
351 	if (HAS_FORCE_WAKE(dev))
352 		gen6_gt_force_wake_put(dev_priv);
353 
354 	return ret;
355 }
356 
357 static int
358 init_pipe_control(struct intel_ring_buffer *ring)
359 {
360 	struct pipe_control *pc;
361 	struct drm_i915_gem_object *obj;
362 	int ret;
363 
364 	if (ring->private)
365 		return 0;
366 
367 	pc = kmalloc(sizeof(*pc), DRM_I915_GEM, M_WAITOK);
368 	if (!pc)
369 		return -ENOMEM;
370 
371 	obj = i915_gem_alloc_object(ring->dev, 4096);
372 	if (obj == NULL) {
373 		DRM_ERROR("Failed to allocate seqno page\n");
374 		ret = -ENOMEM;
375 		goto err;
376 	}
377 
378 	i915_gem_object_set_cache_level(obj, I915_CACHE_LLC);
379 
380 	ret = i915_gem_object_pin(obj, 4096, true);
381 	if (ret)
382 		goto err_unref;
383 
384 	pc->gtt_offset = obj->gtt_offset;
385 	pc->cpu_page = (uint32_t *)kmem_alloc_nofault(&kernel_map, PAGE_SIZE, PAGE_SIZE);
386 	if (pc->cpu_page == NULL)
387 		goto err_unpin;
388 	pmap_qenter((uintptr_t)pc->cpu_page, &obj->pages[0], 1);
389 	pmap_invalidate_cache_range((vm_offset_t)pc->cpu_page,
390 	    (vm_offset_t)pc->cpu_page + PAGE_SIZE);
391 
392 	pc->obj = obj;
393 	ring->private = pc;
394 	return 0;
395 
396 err_unpin:
397 	i915_gem_object_unpin(obj);
398 err_unref:
399 	drm_gem_object_unreference(&obj->base);
400 err:
401 	drm_free(pc, DRM_I915_GEM);
402 	return ret;
403 }
404 
405 static void
406 cleanup_pipe_control(struct intel_ring_buffer *ring)
407 {
408 	struct pipe_control *pc = ring->private;
409 	struct drm_i915_gem_object *obj;
410 
411 	if (!ring->private)
412 		return;
413 
414 	obj = pc->obj;
415 	pmap_qremove((vm_offset_t)pc->cpu_page, 1);
416 	kmem_free(&kernel_map, (uintptr_t)pc->cpu_page, PAGE_SIZE);
417 	i915_gem_object_unpin(obj);
418 	drm_gem_object_unreference(&obj->base);
419 
420 	drm_free(pc, DRM_I915_GEM);
421 	ring->private = NULL;
422 }
423 
424 static int init_render_ring(struct intel_ring_buffer *ring)
425 {
426 	struct drm_device *dev = ring->dev;
427 	struct drm_i915_private *dev_priv = dev->dev_private;
428 	int ret = init_ring_common(ring);
429 
430 	if (INTEL_INFO(dev)->gen > 3)
431 		I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(VS_TIMER_DISPATCH));
432 
433 	/* We need to disable the AsyncFlip performance optimisations in order
434 	 * to use MI_WAIT_FOR_EVENT within the CS. It should already be
435 	 * programmed to '1' on all products.
436 	 */
437 	if (INTEL_INFO(dev)->gen >= 6)
438 		I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(ASYNC_FLIP_PERF_DISABLE));
439 
440 	/* Required for the hardware to program scanline values for waiting */
441 	if (INTEL_INFO(dev)->gen == 6)
442 		I915_WRITE(GFX_MODE,
443 			   _MASKED_BIT_ENABLE(GFX_TLB_INVALIDATE_ALWAYS));
444 
445 	if (IS_GEN7(dev))
446 		I915_WRITE(GFX_MODE_GEN7,
447 			   _MASKED_BIT_DISABLE(GFX_TLB_INVALIDATE_ALWAYS) |
448 			   _MASKED_BIT_ENABLE(GFX_REPLAY_MODE));
449 
450 	if (INTEL_INFO(dev)->gen >= 5) {
451 		ret = init_pipe_control(ring);
452 		if (ret)
453 			return ret;
454 	}
455 
456 	if (IS_GEN6(dev)) {
457 		/* From the Sandybridge PRM, volume 1 part 3, page 24:
458 		 * "If this bit is set, STCunit will have LRA as replacement
459 		 *  policy. [...] This bit must be reset.  LRA replacement
460 		 *  policy is not supported."
461 		 */
462 		I915_WRITE(CACHE_MODE_0,
463 			   _MASKED_BIT_DISABLE(CM0_STC_EVICT_DISABLE_LRA_SNB));
464 
465 		/* This is not explicitly set for GEN6, so read the register.
466 		 * see intel_ring_mi_set_context() for why we care.
467 		 * TODO: consider explicitly setting the bit for GEN5
468 		 */
469 		ring->itlb_before_ctx_switch =
470 			!!(I915_READ(GFX_MODE) & GFX_TLB_INVALIDATE_ALWAYS);
471 	}
472 
473 	if (INTEL_INFO(dev)->gen >= 6)
474 		I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING));
475 
476 	if (HAS_L3_GPU_CACHE(dev))
477 		I915_WRITE_IMR(ring, ~GEN6_RENDER_L3_PARITY_ERROR);
478 
479 	return ret;
480 }
481 
482 static void render_ring_cleanup(struct intel_ring_buffer *ring)
483 {
484 	if (!ring->private)
485 		return;
486 
487 	cleanup_pipe_control(ring);
488 }
489 
490 static void
491 update_mboxes(struct intel_ring_buffer *ring,
492 	    u32 seqno,
493 	    u32 mmio_offset)
494 {
495 	intel_ring_emit(ring, MI_SEMAPHORE_MBOX |
496 			      MI_SEMAPHORE_GLOBAL_GTT |
497 			      MI_SEMAPHORE_REGISTER |
498 			      MI_SEMAPHORE_UPDATE);
499 	intel_ring_emit(ring, seqno);
500 	intel_ring_emit(ring, mmio_offset);
501 }
502 
503 /**
504  * gen6_add_request - Update the semaphore mailbox registers
505  *
506  * @ring - ring that is adding a request
507  * @seqno - return seqno stuck into the ring
508  *
509  * Update the mailbox registers in the *other* rings with the current seqno.
510  * This acts like a signal in the canonical semaphore.
511  */
512 static int
513 gen6_add_request(struct intel_ring_buffer *ring,
514 		 u32 *seqno)
515 {
516 	u32 mbox1_reg;
517 	u32 mbox2_reg;
518 	int ret;
519 
520 	ret = intel_ring_begin(ring, 10);
521 	if (ret)
522 		return ret;
523 
524 	mbox1_reg = ring->signal_mbox[0];
525 	mbox2_reg = ring->signal_mbox[1];
526 
527 	*seqno = i915_gem_next_request_seqno(ring);
528 
529 	update_mboxes(ring, *seqno, mbox1_reg);
530 	update_mboxes(ring, *seqno, mbox2_reg);
531 	intel_ring_emit(ring, MI_STORE_DWORD_INDEX);
532 	intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
533 	intel_ring_emit(ring, *seqno);
534 	intel_ring_emit(ring, MI_USER_INTERRUPT);
535 	intel_ring_advance(ring);
536 
537 	return 0;
538 }
539 
540 /**
541  * intel_ring_sync - sync the waiter to the signaller on seqno
542  *
543  * @waiter - ring that is waiting
544  * @signaller - ring which has, or will signal
545  * @seqno - seqno which the waiter will block on
546  */
547 static int
548 gen6_ring_sync(struct intel_ring_buffer *waiter,
549 	       struct intel_ring_buffer *signaller,
550 	       u32 seqno)
551 {
552 	int ret;
553 	u32 dw1 = MI_SEMAPHORE_MBOX |
554 		  MI_SEMAPHORE_COMPARE |
555 		  MI_SEMAPHORE_REGISTER;
556 
557 	/* Throughout all of the GEM code, seqno passed implies our current
558 	 * seqno is >= the last seqno executed. However for hardware the
559 	 * comparison is strictly greater than.
560 	 */
561 	seqno -= 1;
562 
563 	WARN_ON(signaller->semaphore_register[waiter->id] ==
564 		MI_SEMAPHORE_SYNC_INVALID);
565 
566 	ret = intel_ring_begin(waiter, 4);
567 	if (ret)
568 		return ret;
569 
570 	intel_ring_emit(waiter,
571 			dw1 | signaller->semaphore_register[waiter->id]);
572 	intel_ring_emit(waiter, seqno);
573 	intel_ring_emit(waiter, 0);
574 	intel_ring_emit(waiter, MI_NOOP);
575 	intel_ring_advance(waiter);
576 
577 	return 0;
578 }
579 
580 int render_ring_sync_to(struct intel_ring_buffer *waiter,
581     struct intel_ring_buffer *signaller, u32 seqno);
582 int gen6_bsd_ring_sync_to(struct intel_ring_buffer *waiter,
583     struct intel_ring_buffer *signaller, u32 seqno);
584 int gen6_blt_ring_sync_to(struct intel_ring_buffer *waiter,
585     struct intel_ring_buffer *signaller, u32 seqno);
586 
587 #define PIPE_CONTROL_FLUSH(ring__, addr__)					\
588 do {									\
589 	intel_ring_emit(ring__, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE |		\
590 		 PIPE_CONTROL_DEPTH_STALL);				\
591 	intel_ring_emit(ring__, (addr__) | PIPE_CONTROL_GLOBAL_GTT);			\
592 	intel_ring_emit(ring__, 0);							\
593 	intel_ring_emit(ring__, 0);							\
594 } while (0)
595 
596 static int
597 pc_render_add_request(struct intel_ring_buffer *ring,
598 		      uint32_t *result)
599 {
600 	u32 seqno = i915_gem_next_request_seqno(ring);
601 	struct pipe_control *pc = ring->private;
602 	u32 scratch_addr = pc->gtt_offset + 128;
603 	int ret;
604 
605 	/* For Ironlake, MI_USER_INTERRUPT was deprecated and apparently
606 	 * incoherent with writes to memory, i.e. completely fubar,
607 	 * so we need to use PIPE_NOTIFY instead.
608 	 *
609 	 * However, we also need to workaround the qword write
610 	 * incoherence by flushing the 6 PIPE_NOTIFY buffers out to
611 	 * memory before requesting an interrupt.
612 	 */
613 	ret = intel_ring_begin(ring, 32);
614 	if (ret)
615 		return ret;
616 
617 	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE |
618 			PIPE_CONTROL_WRITE_FLUSH |
619 			PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE);
620 	intel_ring_emit(ring, pc->gtt_offset | PIPE_CONTROL_GLOBAL_GTT);
621 	intel_ring_emit(ring, seqno);
622 	intel_ring_emit(ring, 0);
623 	PIPE_CONTROL_FLUSH(ring, scratch_addr);
624 	scratch_addr += 128; /* write to separate cachelines */
625 	PIPE_CONTROL_FLUSH(ring, scratch_addr);
626 	scratch_addr += 128;
627 	PIPE_CONTROL_FLUSH(ring, scratch_addr);
628 	scratch_addr += 128;
629 	PIPE_CONTROL_FLUSH(ring, scratch_addr);
630 	scratch_addr += 128;
631 	PIPE_CONTROL_FLUSH(ring, scratch_addr);
632 	scratch_addr += 128;
633 	PIPE_CONTROL_FLUSH(ring, scratch_addr);
634 	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE |
635 			PIPE_CONTROL_WRITE_FLUSH |
636 			PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
637 			PIPE_CONTROL_NOTIFY);
638 	intel_ring_emit(ring, pc->gtt_offset | PIPE_CONTROL_GLOBAL_GTT);
639 	intel_ring_emit(ring, seqno);
640 	intel_ring_emit(ring, 0);
641 	intel_ring_advance(ring);
642 
643 	*result = seqno;
644 	return 0;
645 }
646 
647 static u32
648 gen6_ring_get_seqno(struct intel_ring_buffer *ring, bool lazy_coherency)
649 {
650 	/* Workaround to force correct ordering between irq and seqno writes on
651 	 * ivb (and maybe also on snb) by reading from a CS register (like
652 	 * ACTHD) before reading the status page. */
653 	if (!lazy_coherency)
654 		intel_ring_get_active_head(ring);
655 	return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
656 }
657 
658 static u32
659 ring_get_seqno(struct intel_ring_buffer *ring, bool lazy_coherency)
660 {
661 	return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
662 }
663 
664 static u32
665 pc_render_get_seqno(struct intel_ring_buffer *ring, bool lazy_coherency)
666 {
667 	struct pipe_control *pc = ring->private;
668 	return pc->cpu_page[0];
669 }
670 
671 static bool
672 gen5_ring_get_irq(struct intel_ring_buffer *ring)
673 {
674 	struct drm_device *dev = ring->dev;
675 	drm_i915_private_t *dev_priv = dev->dev_private;
676 
677 	if (!dev->irq_enabled)
678 		return false;
679 
680 	lockmgr(&dev_priv->irq_lock, LK_EXCLUSIVE);
681 	if (ring->irq_refcount++ == 0) {
682 		dev_priv->gt_irq_mask &= ~ring->irq_enable_mask;
683 		I915_WRITE(GTIMR, dev_priv->gt_irq_mask);
684 		POSTING_READ(GTIMR);
685 	}
686 	lockmgr(&dev_priv->irq_lock, LK_RELEASE);
687 
688 	return true;
689 }
690 
691 static void
692 gen5_ring_put_irq(struct intel_ring_buffer *ring)
693 {
694 	struct drm_device *dev = ring->dev;
695 	drm_i915_private_t *dev_priv = dev->dev_private;
696 
697 	lockmgr(&dev_priv->irq_lock, LK_EXCLUSIVE);
698 	if (--ring->irq_refcount == 0) {
699 		dev_priv->gt_irq_mask |= ring->irq_enable_mask;
700 		I915_WRITE(GTIMR, dev_priv->gt_irq_mask);
701 		POSTING_READ(GTIMR);
702 	}
703 	lockmgr(&dev_priv->irq_lock, LK_RELEASE);
704 }
705 
706 static bool
707 i9xx_ring_get_irq(struct intel_ring_buffer *ring)
708 {
709 	struct drm_device *dev = ring->dev;
710 	drm_i915_private_t *dev_priv = dev->dev_private;
711 
712 	if (!dev->irq_enabled)
713 		return false;
714 
715 	lockmgr(&dev_priv->irq_lock, LK_EXCLUSIVE);
716 	if (ring->irq_refcount++ == 0) {
717 		dev_priv->irq_mask &= ~ring->irq_enable_mask;
718 		I915_WRITE(IMR, dev_priv->irq_mask);
719 		POSTING_READ(IMR);
720 	}
721 	lockmgr(&dev_priv->irq_lock, LK_RELEASE);
722 
723 	return true;
724 }
725 
726 static void
727 i9xx_ring_put_irq(struct intel_ring_buffer *ring)
728 {
729 	struct drm_device *dev = ring->dev;
730 	drm_i915_private_t *dev_priv = dev->dev_private;
731 
732 	lockmgr(&dev_priv->irq_lock, LK_EXCLUSIVE);
733 	if (--ring->irq_refcount == 0) {
734 		dev_priv->irq_mask |= ring->irq_enable_mask;
735 		I915_WRITE(IMR, dev_priv->irq_mask);
736 		POSTING_READ(IMR);
737 	}
738 	lockmgr(&dev_priv->irq_lock, LK_RELEASE);
739 }
740 
741 static bool
742 i8xx_ring_get_irq(struct intel_ring_buffer *ring)
743 {
744 	struct drm_device *dev = ring->dev;
745 	drm_i915_private_t *dev_priv = dev->dev_private;
746 
747 	if (!dev->irq_enabled)
748 		return false;
749 
750 	lockmgr(&dev_priv->irq_lock, LK_EXCLUSIVE);
751 	if (ring->irq_refcount++ == 0) {
752 		dev_priv->irq_mask &= ~ring->irq_enable_mask;
753 		I915_WRITE16(IMR, dev_priv->irq_mask);
754 		POSTING_READ16(IMR);
755 	}
756 	lockmgr(&dev_priv->irq_lock, LK_RELEASE);
757 
758 	return true;
759 }
760 
761 static void
762 i8xx_ring_put_irq(struct intel_ring_buffer *ring)
763 {
764 	struct drm_device *dev = ring->dev;
765 	drm_i915_private_t *dev_priv = dev->dev_private;
766 
767 	lockmgr(&dev_priv->irq_lock, LK_EXCLUSIVE);
768 	if (--ring->irq_refcount == 0) {
769 		dev_priv->irq_mask |= ring->irq_enable_mask;
770 		I915_WRITE16(IMR, dev_priv->irq_mask);
771 		POSTING_READ16(IMR);
772 	}
773 	lockmgr(&dev_priv->irq_lock, LK_RELEASE);
774 }
775 
776 void intel_ring_setup_status_page(struct intel_ring_buffer *ring)
777 {
778 	struct drm_device *dev = ring->dev;
779 	drm_i915_private_t *dev_priv = dev->dev_private;
780 	uint32_t mmio = 0;
781 
782 	/* The ring status page addresses are no longer next to the rest of
783 	 * the ring registers as of gen7.
784 	 */
785 	if (IS_GEN7(dev)) {
786 		switch (ring->id) {
787 		case RCS:
788 			mmio = RENDER_HWS_PGA_GEN7;
789 			break;
790 		case BCS:
791 			mmio = BLT_HWS_PGA_GEN7;
792 			break;
793 		case VCS:
794 			mmio = BSD_HWS_PGA_GEN7;
795 			break;
796 		}
797 	} else if (IS_GEN6(dev)) {
798 		mmio = RING_HWS_PGA_GEN6(ring->mmio_base);
799 	} else {
800 		mmio = RING_HWS_PGA(ring->mmio_base);
801 	}
802 
803 	I915_WRITE(mmio, (u32)ring->status_page.gfx_addr);
804 	POSTING_READ(mmio);
805 }
806 
807 static int
808 bsd_ring_flush(struct intel_ring_buffer *ring,
809 	       uint32_t     invalidate_domains,
810 	       uint32_t     flush_domains)
811 {
812 	int ret;
813 
814 	ret = intel_ring_begin(ring, 2);
815 	if (ret)
816 		return ret;
817 
818 	intel_ring_emit(ring, MI_FLUSH);
819 	intel_ring_emit(ring, MI_NOOP);
820 	intel_ring_advance(ring);
821 	return 0;
822 }
823 
824 static int
825 i9xx_add_request(struct intel_ring_buffer *ring,
826 		 u32 *result)
827 {
828 	uint32_t seqno;
829 	int ret;
830 
831 	ret = intel_ring_begin(ring, 4);
832 	if (ret)
833 		return ret;
834 
835 	seqno = i915_gem_next_request_seqno(ring);
836 
837 	intel_ring_emit(ring, MI_STORE_DWORD_INDEX);
838 	intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
839 	intel_ring_emit(ring, seqno);
840 	intel_ring_emit(ring, MI_USER_INTERRUPT);
841 	intel_ring_advance(ring);
842 
843 	*result = seqno;
844 	return 0;
845 }
846 
847 static bool
848 gen6_ring_get_irq(struct intel_ring_buffer *ring)
849 {
850 	struct drm_device *dev = ring->dev;
851 	drm_i915_private_t *dev_priv = dev->dev_private;
852 
853 	if (!dev->irq_enabled)
854 	       return false;
855 
856 	/* It looks like we need to prevent the gt from suspending while waiting
857 	 * for an notifiy irq, otherwise irqs seem to get lost on at least the
858 	 * blt/bsd rings on ivb. */
859 	gen6_gt_force_wake_get(dev_priv);
860 
861 	lockmgr(&dev_priv->irq_lock, LK_EXCLUSIVE);
862 	if (ring->irq_refcount++ == 0) {
863 		if (HAS_L3_GPU_CACHE(dev) && ring->id == RCS)
864 			I915_WRITE_IMR(ring, ~(ring->irq_enable_mask |
865 						GEN6_RENDER_L3_PARITY_ERROR));
866 		else
867 			I915_WRITE_IMR(ring, ~ring->irq_enable_mask);
868 		dev_priv->gt_irq_mask &= ~ring->irq_enable_mask;
869 		I915_WRITE(GTIMR, dev_priv->gt_irq_mask);
870 		POSTING_READ(GTIMR);
871 	}
872 	lockmgr(&dev_priv->irq_lock, LK_RELEASE);
873 
874 	return true;
875 }
876 
877 static void
878 gen6_ring_put_irq(struct intel_ring_buffer *ring)
879 {
880 	struct drm_device *dev = ring->dev;
881 	drm_i915_private_t *dev_priv = dev->dev_private;
882 
883 	lockmgr(&dev_priv->irq_lock, LK_EXCLUSIVE);
884 	if (--ring->irq_refcount == 0) {
885 		if (HAS_L3_GPU_CACHE(dev) && ring->id == RCS)
886 			I915_WRITE_IMR(ring, ~GEN6_RENDER_L3_PARITY_ERROR);
887 		else
888 			I915_WRITE_IMR(ring, ~0);
889 		dev_priv->gt_irq_mask |= ring->irq_enable_mask;
890 		I915_WRITE(GTIMR, dev_priv->gt_irq_mask);
891 		POSTING_READ(GTIMR);
892 	}
893 	lockmgr(&dev_priv->irq_lock, LK_RELEASE);
894 
895 	gen6_gt_force_wake_put(dev_priv);
896 }
897 
898 static int
899 i965_dispatch_execbuffer(struct intel_ring_buffer *ring, u32 offset, u32 length)
900 {
901 	int ret;
902 
903 	ret = intel_ring_begin(ring, 2);
904 	if (ret)
905 		return ret;
906 
907 	intel_ring_emit(ring,
908 			MI_BATCH_BUFFER_START |
909 			MI_BATCH_NON_SECURE_I965);
910 	intel_ring_emit(ring, offset);
911 	intel_ring_advance(ring);
912 
913 	return 0;
914 }
915 
916 static int
917 i830_dispatch_execbuffer(struct intel_ring_buffer *ring,
918 				u32 offset, u32 len)
919 {
920 	int ret;
921 
922 	ret = intel_ring_begin(ring, 4);
923 	if (ret)
924 		return ret;
925 
926 	intel_ring_emit(ring, MI_BATCH_BUFFER);
927 	intel_ring_emit(ring, offset | MI_BATCH_NON_SECURE);
928 	intel_ring_emit(ring, offset + len - 8);
929 	intel_ring_emit(ring, 0);
930 	intel_ring_advance(ring);
931 
932 	return 0;
933 }
934 
935 static int
936 i915_dispatch_execbuffer(struct intel_ring_buffer *ring,
937 			 u32 offset, u32 len)
938 {
939 	int ret;
940 	unsigned flags = 0;
941 
942 	ret = intel_ring_begin(ring, 2);
943 	if (ret)
944 		return ret;
945 
946 	intel_ring_emit(ring, MI_BATCH_BUFFER_START | MI_BATCH_GTT);
947 	intel_ring_emit(ring, offset | (flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE));
948 	intel_ring_advance(ring);
949 
950 	return 0;
951 }
952 
953 static void cleanup_status_page(struct intel_ring_buffer *ring)
954 {
955 	struct drm_i915_gem_object *obj;
956 
957 	obj = ring->status_page.obj;
958 	if (obj == NULL)
959 		return;
960 
961 	pmap_qremove((vm_offset_t)ring->status_page.page_addr, 1);
962 	kmem_free(&kernel_map, (vm_offset_t)ring->status_page.page_addr,
963 	    PAGE_SIZE);
964 	i915_gem_object_unpin(obj);
965 	drm_gem_object_unreference(&obj->base);
966 	ring->status_page.obj = NULL;
967 }
968 
969 static int init_status_page(struct intel_ring_buffer *ring)
970 {
971 	struct drm_device *dev = ring->dev;
972 	struct drm_i915_gem_object *obj;
973 	int ret;
974 
975 	obj = i915_gem_alloc_object(dev, 4096);
976 	if (obj == NULL) {
977 		DRM_ERROR("Failed to allocate status page\n");
978 		ret = -ENOMEM;
979 		goto err;
980 	}
981 
982 	i915_gem_object_set_cache_level(obj, I915_CACHE_LLC);
983 
984 	ret = i915_gem_object_pin(obj, 4096, true);
985 	if (ret != 0) {
986 		goto err_unref;
987 	}
988 
989 	ring->status_page.gfx_addr = obj->gtt_offset;
990 	ring->status_page.page_addr = (void *)kmem_alloc_nofault(&kernel_map,
991 	    PAGE_SIZE, PAGE_SIZE);
992 	if (ring->status_page.page_addr == NULL) {
993 		ret = -ENOMEM;
994 		goto err_unpin;
995 	}
996 	pmap_qenter((vm_offset_t)ring->status_page.page_addr, &obj->pages[0],
997 	    1);
998 	pmap_invalidate_cache_range((vm_offset_t)ring->status_page.page_addr,
999 	    (vm_offset_t)ring->status_page.page_addr + PAGE_SIZE);
1000 	ring->status_page.obj = obj;
1001 	memset(ring->status_page.page_addr, 0, PAGE_SIZE);
1002 
1003 	intel_ring_setup_status_page(ring);
1004 	DRM_DEBUG("i915: init_status_page %s hws offset: 0x%08x\n",
1005 			ring->name, ring->status_page.gfx_addr);
1006 
1007 	return 0;
1008 
1009 err_unpin:
1010 	i915_gem_object_unpin(obj);
1011 err_unref:
1012 	drm_gem_object_unreference(&obj->base);
1013 err:
1014 	return ret;
1015 }
1016 
1017 static int init_phys_hws_pga(struct intel_ring_buffer *ring)
1018 {
1019 	struct drm_i915_private *dev_priv = ring->dev->dev_private;
1020 	u32 addr;
1021 
1022 	if (!dev_priv->status_page_dmah) {
1023 		dev_priv->status_page_dmah =
1024 			drm_pci_alloc(ring->dev, PAGE_SIZE, PAGE_SIZE, ~0);
1025 		if (!dev_priv->status_page_dmah)
1026 			return -ENOMEM;
1027 	}
1028 
1029 	addr = dev_priv->status_page_dmah->busaddr;
1030 	if (INTEL_INFO(ring->dev)->gen >= 4)
1031 		addr |= (dev_priv->status_page_dmah->busaddr >> 28) & 0xf0;
1032 	I915_WRITE(HWS_PGA, addr);
1033 
1034 	ring->status_page.page_addr = dev_priv->status_page_dmah->vaddr;
1035 	memset(ring->status_page.page_addr, 0, PAGE_SIZE);
1036 
1037 	return 0;
1038 }
1039 
1040 static inline void __iomem *ioremap_wc(resource_size_t phys_addr, unsigned long size)
1041 {
1042 	return pmap_mapdev_attr(phys_addr, size, VM_MEMATTR_WRITE_COMBINING);
1043 }
1044 
1045 static int intel_init_ring_buffer(struct drm_device *dev,
1046 				  struct intel_ring_buffer *ring)
1047 {
1048 	struct drm_i915_gem_object *obj;
1049 	int ret;
1050 
1051 	ring->dev = dev;
1052 	INIT_LIST_HEAD(&ring->active_list);
1053 	INIT_LIST_HEAD(&ring->request_list);
1054 	ring->size = 32 * PAGE_SIZE;
1055 	memset(ring->sync_seqno, 0, sizeof(ring->sync_seqno));
1056 
1057 	init_waitqueue_head(&ring->irq_queue);
1058 
1059 	if (I915_NEED_GFX_HWS(dev)) {
1060 		ret = init_status_page(ring);
1061 		if (ret)
1062 			return ret;
1063 	} else {
1064 		BUG_ON(ring->id != RCS);
1065 		ret = init_phys_hws_pga(ring);
1066 		if (ret)
1067 			return ret;
1068 	}
1069 
1070 	obj = i915_gem_alloc_object(dev, ring->size);
1071 	if (obj == NULL) {
1072 		DRM_ERROR("Failed to allocate ringbuffer\n");
1073 		ret = -ENOMEM;
1074 		goto err_hws;
1075 	}
1076 
1077 	ring->obj = obj;
1078 
1079 	ret = i915_gem_object_pin(obj, PAGE_SIZE, true);
1080 	if (ret)
1081 		goto err_unref;
1082 
1083 	ret = i915_gem_object_set_to_gtt_domain(obj, true);
1084 	if (ret)
1085 		goto err_unpin;
1086 
1087 	ring->virtual_start = ioremap_wc(dev->agp->base + obj->gtt_offset,
1088 					 ring->size);
1089 	if (ring->virtual_start == NULL) {
1090 		DRM_ERROR("Failed to map ringbuffer.\n");
1091 		ret = -EINVAL;
1092 		goto err_unpin;
1093 	}
1094 
1095 	ret = ring->init(ring);
1096 	if (ret)
1097 		goto err_unmap;
1098 
1099 	/* Workaround an erratum on the i830 which causes a hang if
1100 	 * the TAIL pointer points to within the last 2 cachelines
1101 	 * of the buffer.
1102 	 */
1103 	ring->effective_size = ring->size;
1104 	if (IS_I830(ring->dev) || IS_845G(ring->dev))
1105 		ring->effective_size -= 128;
1106 
1107 	return 0;
1108 
1109 err_unmap:
1110 	pmap_unmapdev((vm_offset_t)ring->virtual_start, ring->size);
1111 err_unpin:
1112 	i915_gem_object_unpin(obj);
1113 err_unref:
1114 	drm_gem_object_unreference(&obj->base);
1115 	ring->obj = NULL;
1116 err_hws:
1117 	cleanup_status_page(ring);
1118 	return ret;
1119 }
1120 
1121 void intel_cleanup_ring_buffer(struct intel_ring_buffer *ring)
1122 {
1123 	struct drm_i915_private *dev_priv;
1124 	int ret;
1125 
1126 	if (ring->obj == NULL)
1127 		return;
1128 
1129 	/* Disable the ring buffer. The ring must be idle at this point */
1130 	dev_priv = ring->dev->dev_private;
1131 	ret = intel_ring_idle(ring);
1132 	if (ret)
1133 		DRM_ERROR("failed to quiesce %s whilst cleaning up: %d\n",
1134 			  ring->name, ret);
1135 
1136 	I915_WRITE_CTL(ring, 0);
1137 
1138 	pmap_unmapdev((vm_offset_t)ring->virtual_start, ring->size);
1139 
1140 	i915_gem_object_unpin(ring->obj);
1141 	drm_gem_object_unreference(&ring->obj->base);
1142 	ring->obj = NULL;
1143 
1144 	if (ring->cleanup)
1145 		ring->cleanup(ring);
1146 
1147 	cleanup_status_page(ring);
1148 }
1149 
1150 static int intel_ring_wait_seqno(struct intel_ring_buffer *ring, u32 seqno)
1151 {
1152 	int ret;
1153 
1154 	ret = i915_wait_seqno(ring, seqno);
1155 	if (!ret)
1156 		i915_gem_retire_requests_ring(ring);
1157 
1158 	return ret;
1159 }
1160 
1161 static int intel_ring_wait_request(struct intel_ring_buffer *ring, int n)
1162 {
1163 	struct drm_i915_gem_request *request;
1164 	u32 seqno = 0;
1165 	int ret;
1166 
1167 	i915_gem_retire_requests_ring(ring);
1168 
1169 	if (ring->last_retired_head != -1) {
1170 		ring->head = ring->last_retired_head;
1171 		ring->last_retired_head = -1;
1172 		ring->space = ring_space(ring);
1173 		if (ring->space >= n)
1174 			return 0;
1175 	}
1176 
1177 	list_for_each_entry(request, &ring->request_list, list) {
1178 		int space;
1179 
1180 		if (request->tail == -1)
1181 			continue;
1182 
1183 		space = request->tail - (ring->tail + 8);
1184 		if (space < 0)
1185 			space += ring->size;
1186 		if (space >= n) {
1187 			seqno = request->seqno;
1188 			break;
1189 		}
1190 
1191 		/* Consume this request in case we need more space than
1192 		 * is available and so need to prevent a race between
1193 		 * updating last_retired_head and direct reads of
1194 		 * I915_RING_HEAD. It also provides a nice sanity check.
1195 		 */
1196 		request->tail = -1;
1197 	}
1198 
1199 	if (seqno == 0)
1200 		return -ENOSPC;
1201 
1202 	ret = intel_ring_wait_seqno(ring, seqno);
1203 	if (ret)
1204 		return ret;
1205 
1206 	if (ring->last_retired_head == -1)
1207 		return -ENOSPC;
1208 
1209 	ring->head = ring->last_retired_head;
1210 	ring->last_retired_head = -1;
1211 	ring->space = ring_space(ring);
1212 	if (ring->space < n)
1213 		return -ENOSPC;
1214 
1215 	return 0;
1216 }
1217 
1218 static int ring_wait_for_space(struct intel_ring_buffer *ring, int n)
1219 {
1220 	struct drm_device *dev = ring->dev;
1221 	struct drm_i915_private *dev_priv = dev->dev_private;
1222 	unsigned long end;
1223 	int ret;
1224 
1225 	ret = intel_ring_wait_request(ring, n);
1226 	if (ret != -ENOSPC)
1227 		return ret;
1228 
1229 	/* With GEM the hangcheck timer should kick us out of the loop,
1230 	 * leaving it early runs the risk of corrupting GEM state (due
1231 	 * to running on almost untested codepaths). But on resume
1232 	 * timers don't work yet, so prevent a complete hang in that
1233 	 * case by choosing an insanely large timeout. */
1234 	end = ticks + 60 * hz;
1235 
1236 	do {
1237 		ring->head = I915_READ_HEAD(ring);
1238 		ring->space = ring_space(ring);
1239 		if (ring->space >= n) {
1240 			return 0;
1241 		}
1242 
1243 #if 0
1244 		if (dev->primary->master) {
1245 			struct drm_i915_master_private *master_priv = dev->primary->master->driver_priv;
1246 			if (master_priv->sarea_priv)
1247 				master_priv->sarea_priv->perf_boxes |= I915_BOX_WAIT;
1248 		}
1249 #else
1250 		if (dev_priv->sarea_priv)
1251 			dev_priv->sarea_priv->perf_boxes |= I915_BOX_WAIT;
1252 #endif
1253 
1254 		DELAY(1000);
1255 
1256 		ret = i915_gem_check_wedge(dev_priv, dev_priv->mm.interruptible);
1257 		if (ret)
1258 			return ret;
1259 	} while (!time_after(ticks, end));
1260 	return -EBUSY;
1261 }
1262 
1263 static int intel_wrap_ring_buffer(struct intel_ring_buffer *ring)
1264 {
1265 	uint32_t __iomem *virt;
1266 	int rem = ring->size - ring->tail;
1267 
1268 	if (ring->space < rem) {
1269 		int ret = ring_wait_for_space(ring, rem);
1270 		if (ret)
1271 			return ret;
1272 	}
1273 
1274 	virt = (unsigned int *)((char *)ring->virtual_start + ring->tail);
1275 	rem /= 4;
1276 	while (rem--)
1277 		iowrite32(MI_NOOP, virt++);
1278 
1279 	ring->tail = 0;
1280 	ring->space = ring_space(ring);
1281 
1282 	return 0;
1283 }
1284 
1285 int intel_ring_idle(struct intel_ring_buffer *ring)
1286 {
1287 	return ring_wait_for_space(ring, ring->size - 8);
1288 }
1289 
1290 int intel_ring_begin(struct intel_ring_buffer *ring,
1291 		     int num_dwords)
1292 {
1293 	struct drm_i915_private *dev_priv = ring->dev->dev_private;
1294 	int n = 4*num_dwords;
1295 	int ret;
1296 
1297 	ret = i915_gem_check_wedge(dev_priv, dev_priv->mm.interruptible);
1298 	if (ret)
1299 		return ret;
1300 
1301 	if (unlikely(ring->tail + n > ring->effective_size)) {
1302 		ret = intel_wrap_ring_buffer(ring);
1303 		if (unlikely(ret))
1304 			return ret;
1305 	}
1306 
1307 	if (unlikely(ring->space < n)) {
1308 		ret = ring_wait_for_space(ring, n);
1309 		if (unlikely(ret))
1310 			return ret;
1311 	}
1312 
1313 	ring->space -= n;
1314 	return 0;
1315 }
1316 
1317 void intel_ring_advance(struct intel_ring_buffer *ring)
1318 {
1319 	struct drm_i915_private *dev_priv = ring->dev->dev_private;
1320 
1321 	ring->tail &= ring->size - 1;
1322 	if (dev_priv->stop_rings & intel_ring_flag(ring))
1323 		return;
1324 	ring->write_tail(ring, ring->tail);
1325 }
1326 
1327 static void gen6_bsd_ring_write_tail(struct intel_ring_buffer *ring,
1328 				     u32 value)
1329 {
1330 	drm_i915_private_t *dev_priv = ring->dev->dev_private;
1331 
1332        /* Every tail move must follow the sequence below */
1333 
1334 	/* Disable notification that the ring is IDLE. The GT
1335 	 * will then assume that it is busy and bring it out of rc6.
1336 	 */
1337 	I915_WRITE(GEN6_BSD_SLEEP_PSMI_CONTROL,
1338 		   _MASKED_BIT_ENABLE(GEN6_BSD_SLEEP_MSG_DISABLE));
1339 
1340 	/* Clear the context id. Here be magic! */
1341 	I915_WRITE64(GEN6_BSD_RNCID, 0x0);
1342 
1343 	/* Wait for the ring not to be idle, i.e. for it to wake up. */
1344 	if (wait_for((I915_READ(GEN6_BSD_SLEEP_PSMI_CONTROL) &
1345 		      GEN6_BSD_SLEEP_INDICATOR) == 0,
1346 		     50))
1347 		DRM_ERROR("timed out waiting for the BSD ring to wake up\n");
1348 
1349 	/* Now that the ring is fully powered up, update the tail */
1350 	I915_WRITE_TAIL(ring, value);
1351 	POSTING_READ(RING_TAIL(ring->mmio_base));
1352 
1353 	/* Let the ring send IDLE messages to the GT again,
1354 	 * and so let it sleep to conserve power when idle.
1355 	 */
1356 	I915_WRITE(GEN6_BSD_SLEEP_PSMI_CONTROL,
1357 		   _MASKED_BIT_DISABLE(GEN6_BSD_SLEEP_MSG_DISABLE));
1358 }
1359 
1360 static int gen6_ring_flush(struct intel_ring_buffer *ring,
1361 			   uint32_t invalidate, uint32_t flush)
1362 {
1363 	uint32_t cmd;
1364 	int ret;
1365 
1366 	ret = intel_ring_begin(ring, 4);
1367 	if (ret)
1368 		return ret;
1369 
1370 	cmd = MI_FLUSH_DW;
1371 	if (invalidate & I915_GEM_GPU_DOMAINS)
1372 		cmd |= MI_INVALIDATE_TLB | MI_INVALIDATE_BSD;
1373 	intel_ring_emit(ring, cmd);
1374 	intel_ring_emit(ring, 0);
1375 	intel_ring_emit(ring, 0);
1376 	intel_ring_emit(ring, MI_NOOP);
1377 	intel_ring_advance(ring);
1378 	return 0;
1379 }
1380 
1381 static int
1382 gen6_ring_dispatch_execbuffer(struct intel_ring_buffer *ring,
1383 			      uint32_t offset, uint32_t len)
1384 {
1385 	int ret;
1386 
1387 	ret = intel_ring_begin(ring, 2);
1388 	if (ret)
1389 		return ret;
1390 
1391 	intel_ring_emit(ring, MI_BATCH_BUFFER_START | MI_BATCH_NON_SECURE_I965);
1392 	/* bit0-7 is the length on GEN6+ */
1393 	intel_ring_emit(ring, offset);
1394 	intel_ring_advance(ring);
1395 
1396 	return 0;
1397 }
1398 
1399 /* Blitter support (SandyBridge+) */
1400 
1401 static int blt_ring_flush(struct intel_ring_buffer *ring,
1402 			  uint32_t invalidate, uint32_t flush)
1403 {
1404 	uint32_t cmd;
1405 	int ret;
1406 
1407 	ret = intel_ring_begin(ring, 4);
1408 	if (ret)
1409 		return ret;
1410 
1411 	cmd = MI_FLUSH_DW;
1412 	if (invalidate & I915_GEM_DOMAIN_RENDER)
1413 		cmd |= MI_INVALIDATE_TLB;
1414 	intel_ring_emit(ring, cmd);
1415 	intel_ring_emit(ring, 0);
1416 	intel_ring_emit(ring, 0);
1417 	intel_ring_emit(ring, MI_NOOP);
1418 	intel_ring_advance(ring);
1419 	return 0;
1420 }
1421 
1422 int intel_init_render_ring_buffer(struct drm_device *dev)
1423 {
1424 	drm_i915_private_t *dev_priv = dev->dev_private;
1425 	struct intel_ring_buffer *ring = &dev_priv->ring[RCS];
1426 
1427 	ring->name = "render ring";
1428 	ring->id = RCS;
1429 	ring->mmio_base = RENDER_RING_BASE;
1430 
1431 	if (INTEL_INFO(dev)->gen >= 6) {
1432 		ring->add_request = gen6_add_request;
1433 		ring->flush = gen6_render_ring_flush;
1434 		ring->irq_get = gen6_ring_get_irq;
1435 		ring->irq_put = gen6_ring_put_irq;
1436 		ring->irq_enable_mask = GT_USER_INTERRUPT;
1437 		ring->get_seqno = gen6_ring_get_seqno;
1438 		ring->sync_to = gen6_ring_sync;
1439 		ring->semaphore_register[0] = MI_SEMAPHORE_SYNC_INVALID;
1440 		ring->semaphore_register[1] = MI_SEMAPHORE_SYNC_RV;
1441 		ring->semaphore_register[2] = MI_SEMAPHORE_SYNC_RB;
1442 		ring->signal_mbox[0] = GEN6_VRSYNC;
1443 		ring->signal_mbox[1] = GEN6_BRSYNC;
1444 	} else if (IS_GEN5(dev)) {
1445 		ring->add_request = pc_render_add_request;
1446 		ring->flush = gen4_render_ring_flush;
1447 		ring->get_seqno = pc_render_get_seqno;
1448 		ring->irq_get = gen5_ring_get_irq;
1449 		ring->irq_put = gen5_ring_put_irq;
1450 		ring->irq_enable_mask = GT_USER_INTERRUPT | GT_PIPE_NOTIFY;
1451 	} else {
1452 		ring->add_request = i9xx_add_request;
1453 		if (INTEL_INFO(dev)->gen < 4)
1454 			ring->flush = gen2_render_ring_flush;
1455 		else
1456 			ring->flush = gen4_render_ring_flush;
1457 		ring->get_seqno = ring_get_seqno;
1458 		if (IS_GEN2(dev)) {
1459 			ring->irq_get = i8xx_ring_get_irq;
1460 			ring->irq_put = i8xx_ring_put_irq;
1461 		} else {
1462 			ring->irq_get = i9xx_ring_get_irq;
1463 			ring->irq_put = i9xx_ring_put_irq;
1464 		}
1465 		ring->irq_enable_mask = I915_USER_INTERRUPT;
1466 	}
1467 	ring->write_tail = ring_write_tail;
1468 	if (INTEL_INFO(dev)->gen >= 6)
1469 		ring->dispatch_execbuffer = gen6_ring_dispatch_execbuffer;
1470 	else if (INTEL_INFO(dev)->gen >= 4)
1471 		ring->dispatch_execbuffer = i965_dispatch_execbuffer;
1472 	else if (IS_I830(dev) || IS_845G(dev))
1473 		ring->dispatch_execbuffer = i830_dispatch_execbuffer;
1474 	else
1475 		ring->dispatch_execbuffer = i915_dispatch_execbuffer;
1476 	ring->init = init_render_ring;
1477 	ring->cleanup = render_ring_cleanup;
1478 
1479 	if (!I915_NEED_GFX_HWS(dev)) {
1480 		ring->status_page.page_addr = dev_priv->status_page_dmah->vaddr;
1481 		memset(ring->status_page.page_addr, 0, PAGE_SIZE);
1482 	}
1483 
1484 	return intel_init_ring_buffer(dev, ring);
1485 }
1486 
1487 int intel_render_ring_init_dri(struct drm_device *dev, u64 start, u32 size)
1488 {
1489 	drm_i915_private_t *dev_priv = dev->dev_private;
1490 	struct intel_ring_buffer *ring = &dev_priv->ring[RCS];
1491 
1492 	ring->name = "render ring";
1493 	ring->id = RCS;
1494 	ring->mmio_base = RENDER_RING_BASE;
1495 
1496 	if (INTEL_INFO(dev)->gen >= 6) {
1497 		/* non-kms not supported on gen6+ */
1498 		return -ENODEV;
1499 	}
1500 
1501 	/* Note: gem is not supported on gen5/ilk without kms (the corresponding
1502 	 * gem_init ioctl returns with -ENODEV). Hence we do not need to set up
1503 	 * the special gen5 functions. */
1504 	ring->add_request = i9xx_add_request;
1505 	if (INTEL_INFO(dev)->gen < 4)
1506 		ring->flush = gen2_render_ring_flush;
1507 	else
1508 		ring->flush = gen4_render_ring_flush;
1509 	ring->get_seqno = ring_get_seqno;
1510 	if (IS_GEN2(dev)) {
1511 		ring->irq_get = i8xx_ring_get_irq;
1512 		ring->irq_put = i8xx_ring_put_irq;
1513 	} else {
1514 		ring->irq_get = i9xx_ring_get_irq;
1515 		ring->irq_put = i9xx_ring_put_irq;
1516 	}
1517 	ring->irq_enable_mask = I915_USER_INTERRUPT;
1518 	ring->write_tail = ring_write_tail;
1519 	if (INTEL_INFO(dev)->gen >= 4)
1520 		ring->dispatch_execbuffer = i965_dispatch_execbuffer;
1521 	else if (IS_I830(dev) || IS_845G(dev))
1522 		ring->dispatch_execbuffer = i830_dispatch_execbuffer;
1523 	else
1524 		ring->dispatch_execbuffer = i915_dispatch_execbuffer;
1525 	ring->init = init_render_ring;
1526 	ring->cleanup = render_ring_cleanup;
1527 
1528 	if (!I915_NEED_GFX_HWS(dev))
1529 		ring->status_page.page_addr = dev_priv->status_page_dmah->vaddr;
1530 
1531 	ring->dev = dev;
1532 	INIT_LIST_HEAD(&ring->active_list);
1533 	INIT_LIST_HEAD(&ring->request_list);
1534 	INIT_LIST_HEAD(&ring->gpu_write_list);
1535 
1536 	ring->size = size;
1537 	ring->effective_size = ring->size;
1538 	if (IS_I830(ring->dev))
1539 		ring->effective_size -= 128;
1540 
1541 	ring->virtual_start = ioremap_wc(start, size);
1542 	if (ring->virtual_start == NULL) {
1543 		DRM_ERROR("can not ioremap virtual address for"
1544 			  " ring buffer\n");
1545 		return -ENOMEM;
1546 	}
1547 
1548 	return 0;
1549 }
1550 
1551 int intel_init_bsd_ring_buffer(struct drm_device *dev)
1552 {
1553 	drm_i915_private_t *dev_priv = dev->dev_private;
1554 	struct intel_ring_buffer *ring = &dev_priv->ring[VCS];
1555 
1556 	ring->name = "bsd ring";
1557 	ring->id = VCS;
1558 
1559 	ring->write_tail = ring_write_tail;
1560 	if (IS_GEN6(dev) || IS_GEN7(dev)) {
1561 		ring->mmio_base = GEN6_BSD_RING_BASE;
1562 		/* gen6 bsd needs a special wa for tail updates */
1563 		if (IS_GEN6(dev))
1564 			ring->write_tail = gen6_bsd_ring_write_tail;
1565 		ring->flush = gen6_ring_flush;
1566 		ring->add_request = gen6_add_request;
1567 		ring->get_seqno = gen6_ring_get_seqno;
1568 		ring->irq_enable_mask = GEN6_BSD_USER_INTERRUPT;
1569 		ring->irq_get = gen6_ring_get_irq;
1570 		ring->irq_put = gen6_ring_put_irq;
1571 		ring->dispatch_execbuffer = gen6_ring_dispatch_execbuffer;
1572 		ring->sync_to = gen6_ring_sync;
1573 		ring->semaphore_register[0] = MI_SEMAPHORE_SYNC_VR;
1574 		ring->semaphore_register[1] = MI_SEMAPHORE_SYNC_INVALID;
1575 		ring->semaphore_register[2] = MI_SEMAPHORE_SYNC_VB;
1576 		ring->signal_mbox[0] = GEN6_RVSYNC;
1577 		ring->signal_mbox[1] = GEN6_BVSYNC;
1578 	} else {
1579 		ring->mmio_base = BSD_RING_BASE;
1580 		ring->flush = bsd_ring_flush;
1581 		ring->add_request = i9xx_add_request;
1582 		ring->get_seqno = ring_get_seqno;
1583 		if (IS_GEN5(dev)) {
1584 			ring->irq_enable_mask = GT_BSD_USER_INTERRUPT;
1585 			ring->irq_get = gen5_ring_get_irq;
1586 			ring->irq_put = gen5_ring_put_irq;
1587 		} else {
1588 			ring->irq_enable_mask = I915_BSD_USER_INTERRUPT;
1589 			ring->irq_get = i9xx_ring_get_irq;
1590 			ring->irq_put = i9xx_ring_put_irq;
1591 		}
1592 		ring->dispatch_execbuffer = i965_dispatch_execbuffer;
1593 	}
1594 	ring->init = init_ring_common;
1595 
1596 	return intel_init_ring_buffer(dev, ring);
1597 }
1598 
1599 int intel_init_blt_ring_buffer(struct drm_device *dev)
1600 {
1601 	drm_i915_private_t *dev_priv = dev->dev_private;
1602 	struct intel_ring_buffer *ring = &dev_priv->ring[BCS];
1603 
1604 	ring->name = "blitter ring";
1605 	ring->id = BCS;
1606 
1607 	ring->mmio_base = BLT_RING_BASE;
1608 	ring->write_tail = ring_write_tail;
1609 	ring->flush = blt_ring_flush;
1610 	ring->add_request = gen6_add_request;
1611 	ring->get_seqno = gen6_ring_get_seqno;
1612 	ring->irq_enable_mask = GEN6_BLITTER_USER_INTERRUPT;
1613 	ring->irq_get = gen6_ring_get_irq;
1614 	ring->irq_put = gen6_ring_put_irq;
1615 	ring->dispatch_execbuffer = gen6_ring_dispatch_execbuffer;
1616 	ring->sync_to = gen6_ring_sync;
1617 	ring->semaphore_register[0] = MI_SEMAPHORE_SYNC_BR;
1618 	ring->semaphore_register[1] = MI_SEMAPHORE_SYNC_BV;
1619 	ring->semaphore_register[2] = MI_SEMAPHORE_SYNC_INVALID;
1620 	ring->signal_mbox[0] = GEN6_RBSYNC;
1621 	ring->signal_mbox[1] = GEN6_VBSYNC;
1622 	ring->init = init_ring_common;
1623 
1624 	return intel_init_ring_buffer(dev, ring);
1625 }
1626 
1627 int
1628 intel_ring_flush_all_caches(struct intel_ring_buffer *ring)
1629 {
1630 	int ret;
1631 
1632 	if (!ring->gpu_caches_dirty)
1633 		return 0;
1634 
1635 	ret = ring->flush(ring, 0, I915_GEM_GPU_DOMAINS);
1636 	if (ret)
1637 		return ret;
1638 
1639 	ring->gpu_caches_dirty = false;
1640 	return 0;
1641 }
1642 
1643 int
1644 intel_ring_invalidate_all_caches(struct intel_ring_buffer *ring)
1645 {
1646 	uint32_t flush_domains;
1647 	int ret;
1648 
1649 	flush_domains = 0;
1650 	if (ring->gpu_caches_dirty)
1651 		flush_domains = I915_GEM_GPU_DOMAINS;
1652 
1653 	ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, flush_domains);
1654 	if (ret)
1655 		return ret;
1656 
1657 	ring->gpu_caches_dirty = false;
1658 	return 0;
1659 }
1660