xref: /dflybsd-src/sys/dev/drm/i915/intel_ringbuffer.c (revision 965b839fa3b6a8029b586326f283ad2260a4871c)
1 /*
2  * Copyright © 2008-2010 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Eric Anholt <eric@anholt.net>
25  *    Zou Nan hai <nanhai.zou@intel.com>
26  *    Xiang Hai hao<haihao.xiang@intel.com>
27  *
28  */
29 
30 #include <drm/drmP.h>
31 #include "i915_drv.h"
32 #include <drm/i915_drm.h>
33 #include "i915_trace.h"
34 #include "intel_drv.h"
35 
36 static inline int ring_space(struct intel_ring_buffer *ring)
37 {
38 	int space = (ring->head & HEAD_ADDR) - (ring->tail + I915_RING_FREE_SPACE);
39 	if (space < 0)
40 		space += ring->size;
41 	return space;
42 }
43 
44 void __intel_ring_advance(struct intel_ring_buffer *ring)
45 {
46 	struct drm_i915_private *dev_priv = ring->dev->dev_private;
47 
48 	ring->tail &= ring->size - 1;
49 	if (dev_priv->gpu_error.stop_rings & intel_ring_flag(ring))
50 		return;
51 	ring->write_tail(ring, ring->tail);
52 }
53 
54 static int
55 gen2_render_ring_flush(struct intel_ring_buffer *ring,
56 		       u32	invalidate_domains,
57 		       u32	flush_domains)
58 {
59 	u32 cmd;
60 	int ret;
61 
62 	cmd = MI_FLUSH;
63 	if (((invalidate_domains|flush_domains) & I915_GEM_DOMAIN_RENDER) == 0)
64 		cmd |= MI_NO_WRITE_FLUSH;
65 
66 	if (invalidate_domains & I915_GEM_DOMAIN_SAMPLER)
67 		cmd |= MI_READ_FLUSH;
68 
69 	ret = intel_ring_begin(ring, 2);
70 	if (ret)
71 		return ret;
72 
73 	intel_ring_emit(ring, cmd);
74 	intel_ring_emit(ring, MI_NOOP);
75 	intel_ring_advance(ring);
76 
77 	return 0;
78 }
79 
80 static int
81 gen4_render_ring_flush(struct intel_ring_buffer *ring,
82 		       u32	invalidate_domains,
83 		       u32	flush_domains)
84 {
85 	struct drm_device *dev = ring->dev;
86 	u32 cmd;
87 	int ret;
88 
89 	/*
90 	 * read/write caches:
91 	 *
92 	 * I915_GEM_DOMAIN_RENDER is always invalidated, but is
93 	 * only flushed if MI_NO_WRITE_FLUSH is unset.  On 965, it is
94 	 * also flushed at 2d versus 3d pipeline switches.
95 	 *
96 	 * read-only caches:
97 	 *
98 	 * I915_GEM_DOMAIN_SAMPLER is flushed on pre-965 if
99 	 * MI_READ_FLUSH is set, and is always flushed on 965.
100 	 *
101 	 * I915_GEM_DOMAIN_COMMAND may not exist?
102 	 *
103 	 * I915_GEM_DOMAIN_INSTRUCTION, which exists on 965, is
104 	 * invalidated when MI_EXE_FLUSH is set.
105 	 *
106 	 * I915_GEM_DOMAIN_VERTEX, which exists on 965, is
107 	 * invalidated with every MI_FLUSH.
108 	 *
109 	 * TLBs:
110 	 *
111 	 * On 965, TLBs associated with I915_GEM_DOMAIN_COMMAND
112 	 * and I915_GEM_DOMAIN_CPU in are invalidated at PTE write and
113 	 * I915_GEM_DOMAIN_RENDER and I915_GEM_DOMAIN_SAMPLER
114 	 * are flushed at any MI_FLUSH.
115 	 */
116 
117 	cmd = MI_FLUSH | MI_NO_WRITE_FLUSH;
118 	if ((invalidate_domains|flush_domains) & I915_GEM_DOMAIN_RENDER)
119 		cmd &= ~MI_NO_WRITE_FLUSH;
120 	if (invalidate_domains & I915_GEM_DOMAIN_INSTRUCTION)
121 		cmd |= MI_EXE_FLUSH;
122 
123 	if (invalidate_domains & I915_GEM_DOMAIN_COMMAND &&
124 	    (IS_G4X(dev) || IS_GEN5(dev)))
125 		cmd |= MI_INVALIDATE_ISP;
126 
127 	ret = intel_ring_begin(ring, 2);
128 	if (ret)
129 		return ret;
130 
131 	intel_ring_emit(ring, cmd);
132 	intel_ring_emit(ring, MI_NOOP);
133 	intel_ring_advance(ring);
134 
135 	return 0;
136 }
137 
138 /**
139  * Emits a PIPE_CONTROL with a non-zero post-sync operation, for
140  * implementing two workarounds on gen6.  From section 1.4.7.1
141  * "PIPE_CONTROL" of the Sandy Bridge PRM volume 2 part 1:
142  *
143  * [DevSNB-C+{W/A}] Before any depth stall flush (including those
144  * produced by non-pipelined state commands), software needs to first
145  * send a PIPE_CONTROL with no bits set except Post-Sync Operation !=
146  * 0.
147  *
148  * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush Enable
149  * =1, a PIPE_CONTROL with any non-zero post-sync-op is required.
150  *
151  * And the workaround for these two requires this workaround first:
152  *
153  * [Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent
154  * BEFORE the pipe-control with a post-sync op and no write-cache
155  * flushes.
156  *
157  * And this last workaround is tricky because of the requirements on
158  * that bit.  From section 1.4.7.2.3 "Stall" of the Sandy Bridge PRM
159  * volume 2 part 1:
160  *
161  *     "1 of the following must also be set:
162  *      - Render Target Cache Flush Enable ([12] of DW1)
163  *      - Depth Cache Flush Enable ([0] of DW1)
164  *      - Stall at Pixel Scoreboard ([1] of DW1)
165  *      - Depth Stall ([13] of DW1)
166  *      - Post-Sync Operation ([13] of DW1)
167  *      - Notify Enable ([8] of DW1)"
168  *
169  * The cache flushes require the workaround flush that triggered this
170  * one, so we can't use it.  Depth stall would trigger the same.
171  * Post-sync nonzero is what triggered this second workaround, so we
172  * can't use that one either.  Notify enable is IRQs, which aren't
173  * really our business.  That leaves only stall at scoreboard.
174  */
175 static int
176 intel_emit_post_sync_nonzero_flush(struct intel_ring_buffer *ring)
177 {
178 	u32 scratch_addr = ring->scratch.gtt_offset + 128;
179 	int ret;
180 
181 
182 	ret = intel_ring_begin(ring, 6);
183 	if (ret)
184 		return ret;
185 
186 	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(5));
187 	intel_ring_emit(ring, PIPE_CONTROL_CS_STALL |
188 			PIPE_CONTROL_STALL_AT_SCOREBOARD);
189 	intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); /* address */
190 	intel_ring_emit(ring, 0); /* low dword */
191 	intel_ring_emit(ring, 0); /* high dword */
192 	intel_ring_emit(ring, MI_NOOP);
193 	intel_ring_advance(ring);
194 
195 	ret = intel_ring_begin(ring, 6);
196 	if (ret)
197 		return ret;
198 
199 	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(5));
200 	intel_ring_emit(ring, PIPE_CONTROL_QW_WRITE);
201 	intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); /* address */
202 	intel_ring_emit(ring, 0);
203 	intel_ring_emit(ring, 0);
204 	intel_ring_emit(ring, MI_NOOP);
205 	intel_ring_advance(ring);
206 
207 	return 0;
208 }
209 
210 static int
211 gen6_render_ring_flush(struct intel_ring_buffer *ring,
212                          u32 invalidate_domains, u32 flush_domains)
213 {
214 	u32 flags = 0;
215 	u32 scratch_addr = ring->scratch.gtt_offset + 128;
216 	int ret;
217 
218 	/* Force SNB workarounds for PIPE_CONTROL flushes */
219 	ret = intel_emit_post_sync_nonzero_flush(ring);
220 	if (ret)
221 		return ret;
222 
223 	/* Just flush everything.  Experiments have shown that reducing the
224 	 * number of bits based on the write domains has little performance
225 	 * impact.
226 	 */
227 	if (flush_domains) {
228 		flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
229 		flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
230 		/*
231 		 * Ensure that any following seqno writes only happen
232 		 * when the render cache is indeed flushed.
233 		 */
234 		flags |= PIPE_CONTROL_CS_STALL;
235 	}
236 	if (invalidate_domains) {
237 		flags |= PIPE_CONTROL_TLB_INVALIDATE;
238 		flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
239 		flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
240 		flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
241 		flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
242 		flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
243 		/*
244 		 * TLB invalidate requires a post-sync write.
245 		 */
246 		flags |= PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_CS_STALL;
247 	}
248 
249 	ret = intel_ring_begin(ring, 4);
250 	if (ret)
251 		return ret;
252 
253 	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4));
254 	intel_ring_emit(ring, flags);
255 	intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT);
256 	intel_ring_emit(ring, 0);
257 	intel_ring_advance(ring);
258 
259 	return 0;
260 }
261 
262 static int
263 gen7_render_ring_cs_stall_wa(struct intel_ring_buffer *ring)
264 {
265 	int ret;
266 
267 	ret = intel_ring_begin(ring, 4);
268 	if (ret)
269 		return ret;
270 
271 	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4));
272 	intel_ring_emit(ring, PIPE_CONTROL_CS_STALL |
273 			      PIPE_CONTROL_STALL_AT_SCOREBOARD);
274 	intel_ring_emit(ring, 0);
275 	intel_ring_emit(ring, 0);
276 	intel_ring_advance(ring);
277 
278 	return 0;
279 }
280 
281 static int gen7_ring_fbc_flush(struct intel_ring_buffer *ring, u32 value)
282 {
283 	int ret;
284 
285 	if (!ring->fbc_dirty)
286 		return 0;
287 
288 	ret = intel_ring_begin(ring, 6);
289 	if (ret)
290 		return ret;
291 	/* WaFbcNukeOn3DBlt:ivb/hsw */
292 	intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
293 	intel_ring_emit(ring, MSG_FBC_REND_STATE);
294 	intel_ring_emit(ring, value);
295 	intel_ring_emit(ring, MI_STORE_REGISTER_MEM(1) | MI_SRM_LRM_GLOBAL_GTT);
296 	intel_ring_emit(ring, MSG_FBC_REND_STATE);
297 	intel_ring_emit(ring, ring->scratch.gtt_offset + 256);
298 	intel_ring_advance(ring);
299 
300 	ring->fbc_dirty = false;
301 	return 0;
302 }
303 
304 static int
305 gen7_render_ring_flush(struct intel_ring_buffer *ring,
306 		       u32 invalidate_domains, u32 flush_domains)
307 {
308 	u32 flags = 0;
309 	u32 scratch_addr = ring->scratch.gtt_offset + 128;
310 	int ret;
311 
312 	/*
313 	 * Ensure that any following seqno writes only happen when the render
314 	 * cache is indeed flushed.
315 	 *
316 	 * Workaround: 4th PIPE_CONTROL command (except the ones with only
317 	 * read-cache invalidate bits set) must have the CS_STALL bit set. We
318 	 * don't try to be clever and just set it unconditionally.
319 	 */
320 	flags |= PIPE_CONTROL_CS_STALL;
321 
322 	/* Just flush everything.  Experiments have shown that reducing the
323 	 * number of bits based on the write domains has little performance
324 	 * impact.
325 	 */
326 	if (flush_domains) {
327 		flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
328 		flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
329 	}
330 	if (invalidate_domains) {
331 		flags |= PIPE_CONTROL_TLB_INVALIDATE;
332 		flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
333 		flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
334 		flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
335 		flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
336 		flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
337 		/*
338 		 * TLB invalidate requires a post-sync write.
339 		 */
340 		flags |= PIPE_CONTROL_QW_WRITE;
341 		flags |= PIPE_CONTROL_GLOBAL_GTT_IVB;
342 
343 		/* Workaround: we must issue a pipe_control with CS-stall bit
344 		 * set before a pipe_control command that has the state cache
345 		 * invalidate bit set. */
346 		gen7_render_ring_cs_stall_wa(ring);
347 	}
348 
349 	ret = intel_ring_begin(ring, 4);
350 	if (ret)
351 		return ret;
352 
353 	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4));
354 	intel_ring_emit(ring, flags);
355 	intel_ring_emit(ring, scratch_addr);
356 	intel_ring_emit(ring, 0);
357 	intel_ring_advance(ring);
358 
359 	if (!invalidate_domains && flush_domains)
360 		return gen7_ring_fbc_flush(ring, FBC_REND_NUKE);
361 
362 	return 0;
363 }
364 
365 static int
366 gen8_render_ring_flush(struct intel_ring_buffer *ring,
367 		       u32 invalidate_domains, u32 flush_domains)
368 {
369 	u32 flags = 0;
370 	u32 scratch_addr = ring->scratch.gtt_offset + 128;
371 	int ret;
372 
373 	flags |= PIPE_CONTROL_CS_STALL;
374 
375 	if (flush_domains) {
376 		flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
377 		flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
378 	}
379 	if (invalidate_domains) {
380 		flags |= PIPE_CONTROL_TLB_INVALIDATE;
381 		flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
382 		flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
383 		flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
384 		flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
385 		flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
386 		flags |= PIPE_CONTROL_QW_WRITE;
387 		flags |= PIPE_CONTROL_GLOBAL_GTT_IVB;
388 	}
389 
390 	ret = intel_ring_begin(ring, 6);
391 	if (ret)
392 		return ret;
393 
394 	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(6));
395 	intel_ring_emit(ring, flags);
396 	intel_ring_emit(ring, scratch_addr);
397 	intel_ring_emit(ring, 0);
398 	intel_ring_emit(ring, 0);
399 	intel_ring_emit(ring, 0);
400 	intel_ring_advance(ring);
401 
402 	return 0;
403 
404 }
405 
406 static void ring_write_tail(struct intel_ring_buffer *ring,
407 			    u32 value)
408 {
409 	drm_i915_private_t *dev_priv = ring->dev->dev_private;
410 	I915_WRITE_TAIL(ring, value);
411 }
412 
413 u32 intel_ring_get_active_head(struct intel_ring_buffer *ring)
414 {
415 	drm_i915_private_t *dev_priv = ring->dev->dev_private;
416 	u32 acthd_reg = INTEL_INFO(ring->dev)->gen >= 4 ?
417 			RING_ACTHD(ring->mmio_base) : ACTHD;
418 
419 	return I915_READ(acthd_reg);
420 }
421 
422 static void ring_setup_phys_status_page(struct intel_ring_buffer *ring)
423 {
424 	struct drm_i915_private *dev_priv = ring->dev->dev_private;
425 	u32 addr;
426 
427 	addr = dev_priv->status_page_dmah->busaddr;
428 	if (INTEL_INFO(ring->dev)->gen >= 4)
429 		addr |= (dev_priv->status_page_dmah->busaddr >> 28) & 0xf0;
430 	I915_WRITE(HWS_PGA, addr);
431 }
432 
433 static int init_ring_common(struct intel_ring_buffer *ring)
434 {
435 	struct drm_device *dev = ring->dev;
436 	drm_i915_private_t *dev_priv = dev->dev_private;
437 	struct drm_i915_gem_object *obj = ring->obj;
438 	int ret = 0;
439 	u32 head;
440 
441 	gen6_gt_force_wake_get(dev_priv, FORCEWAKE_ALL);
442 
443 	if (I915_NEED_GFX_HWS(dev))
444 		intel_ring_setup_status_page(ring);
445 	else
446 		ring_setup_phys_status_page(ring);
447 
448 	/* Stop the ring if it's running. */
449 	I915_WRITE_CTL(ring, 0);
450 	I915_WRITE_HEAD(ring, 0);
451 	ring->write_tail(ring, 0);
452 
453 	head = I915_READ_HEAD(ring) & HEAD_ADDR;
454 
455 	/* G45 ring initialization fails to reset head to zero */
456 	if (head != 0) {
457 		DRM_DEBUG_KMS("%s head not reset to zero "
458 			      "ctl %08x head %08x tail %08x start %08x\n",
459 			      ring->name,
460 			      I915_READ_CTL(ring),
461 			      I915_READ_HEAD(ring),
462 			      I915_READ_TAIL(ring),
463 			      I915_READ_START(ring));
464 
465 		I915_WRITE_HEAD(ring, 0);
466 
467 		if (I915_READ_HEAD(ring) & HEAD_ADDR) {
468 			DRM_ERROR("failed to set %s head to zero "
469 				  "ctl %08x head %08x tail %08x start %08x\n",
470 				  ring->name,
471 				  I915_READ_CTL(ring),
472 				  I915_READ_HEAD(ring),
473 				  I915_READ_TAIL(ring),
474 				  I915_READ_START(ring));
475 		}
476 	}
477 
478 	/* Initialize the ring. This must happen _after_ we've cleared the ring
479 	 * registers with the above sequence (the readback of the HEAD registers
480 	 * also enforces ordering), otherwise the hw might lose the new ring
481 	 * register values. */
482 	I915_WRITE_START(ring, i915_gem_obj_ggtt_offset(obj));
483 	I915_WRITE_CTL(ring,
484 			((ring->size - PAGE_SIZE) & RING_NR_PAGES)
485 			| RING_VALID);
486 
487 	/* If the head is still not zero, the ring is dead */
488 	if (wait_for((I915_READ_CTL(ring) & RING_VALID) != 0 &&
489 		     I915_READ_START(ring) == i915_gem_obj_ggtt_offset(obj) &&
490 		     (I915_READ_HEAD(ring) & HEAD_ADDR) == 0, 50)) {
491 		DRM_ERROR("%s initialization failed "
492 				"ctl %08x head %08x tail %08x start %08x\n",
493 				ring->name,
494 				I915_READ_CTL(ring),
495 				I915_READ_HEAD(ring),
496 				I915_READ_TAIL(ring),
497 				I915_READ_START(ring));
498 		ret = -EIO;
499 		goto out;
500 	}
501 
502 	if (!drm_core_check_feature(ring->dev, DRIVER_MODESET))
503 		i915_kernel_lost_context(ring->dev);
504 	else {
505 		ring->head = I915_READ_HEAD(ring);
506 		ring->tail = I915_READ_TAIL(ring) & TAIL_ADDR;
507 		ring->space = ring_space(ring);
508 		ring->last_retired_head = -1;
509 	}
510 
511 	memset(&ring->hangcheck, 0, sizeof(ring->hangcheck));
512 
513 out:
514 	gen6_gt_force_wake_put(dev_priv, FORCEWAKE_ALL);
515 
516 	return ret;
517 }
518 
519 static int
520 init_pipe_control(struct intel_ring_buffer *ring)
521 {
522 	int ret;
523 
524 	if (ring->scratch.obj)
525 		return 0;
526 
527 	ring->scratch.obj = i915_gem_alloc_object(ring->dev, 4096);
528 	if (ring->scratch.obj == NULL) {
529 		DRM_ERROR("Failed to allocate seqno page\n");
530 		ret = -ENOMEM;
531 		goto err;
532 	}
533 
534 	i915_gem_object_set_cache_level(ring->scratch.obj, I915_CACHE_LLC);
535 
536 	ret = i915_gem_obj_ggtt_pin(ring->scratch.obj, 4096, true, false);
537 	if (ret)
538 		goto err_unref;
539 
540 	ring->scratch.gtt_offset = i915_gem_obj_ggtt_offset(ring->scratch.obj);
541 	ring->scratch.cpu_page = kmap(ring->scratch.obj->pages[0]);
542 	if (ring->scratch.cpu_page == NULL) {
543 		ret = -ENOMEM;
544 		goto err_unpin;
545 	}
546 
547 	DRM_DEBUG_DRIVER("%s pipe control offset: 0x%08x\n",
548 			 ring->name, ring->scratch.gtt_offset);
549 	return 0;
550 
551 err_unpin:
552 	i915_gem_object_unpin(ring->scratch.obj);
553 err_unref:
554 	drm_gem_object_unreference(&ring->scratch.obj->base);
555 err:
556 	return ret;
557 }
558 
559 static int init_render_ring(struct intel_ring_buffer *ring)
560 {
561 	struct drm_device *dev = ring->dev;
562 	struct drm_i915_private *dev_priv = dev->dev_private;
563 	int ret = init_ring_common(ring);
564 
565 	if (INTEL_INFO(dev)->gen > 3)
566 		I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(VS_TIMER_DISPATCH));
567 
568 	/* We need to disable the AsyncFlip performance optimisations in order
569 	 * to use MI_WAIT_FOR_EVENT within the CS. It should already be
570 	 * programmed to '1' on all products.
571 	 *
572 	 * WaDisableAsyncFlipPerfMode:snb,ivb,hsw,vlv
573 	 */
574 	if (INTEL_INFO(dev)->gen >= 6)
575 		I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(ASYNC_FLIP_PERF_DISABLE));
576 
577 	/* Required for the hardware to program scanline values for waiting */
578 	if (INTEL_INFO(dev)->gen == 6)
579 		I915_WRITE(GFX_MODE,
580 			   _MASKED_BIT_ENABLE(GFX_TLB_INVALIDATE_ALWAYS));
581 
582 	if (IS_GEN7(dev))
583 		I915_WRITE(GFX_MODE_GEN7,
584 			   _MASKED_BIT_DISABLE(GFX_TLB_INVALIDATE_ALWAYS) |
585 			   _MASKED_BIT_ENABLE(GFX_REPLAY_MODE));
586 
587 	if (INTEL_INFO(dev)->gen >= 5) {
588 		ret = init_pipe_control(ring);
589 		if (ret)
590 			return ret;
591 	}
592 
593 	if (IS_GEN6(dev)) {
594 		/* From the Sandybridge PRM, volume 1 part 3, page 24:
595 		 * "If this bit is set, STCunit will have LRA as replacement
596 		 *  policy. [...] This bit must be reset.  LRA replacement
597 		 *  policy is not supported."
598 		 */
599 		I915_WRITE(CACHE_MODE_0,
600 			   _MASKED_BIT_DISABLE(CM0_STC_EVICT_DISABLE_LRA_SNB));
601 
602 		/* This is not explicitly set for GEN6, so read the register.
603 		 * see intel_ring_mi_set_context() for why we care.
604 		 * TODO: consider explicitly setting the bit for GEN5
605 		 */
606 		ring->itlb_before_ctx_switch =
607 			!!(I915_READ(GFX_MODE) & GFX_TLB_INVALIDATE_ALWAYS);
608 	}
609 
610 	if (INTEL_INFO(dev)->gen >= 6)
611 		I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING));
612 
613 	if (HAS_L3_DPF(dev))
614 		I915_WRITE_IMR(ring, ~GT_PARITY_ERROR(dev));
615 
616 	return ret;
617 }
618 
619 static void render_ring_cleanup(struct intel_ring_buffer *ring)
620 {
621 	struct drm_device *dev = ring->dev;
622 
623 	if (ring->scratch.obj == NULL)
624 		return;
625 
626 	if (INTEL_INFO(dev)->gen >= 5) {
627 		kunmap(ring->scratch.obj->pages[0]);
628 		i915_gem_object_unpin(ring->scratch.obj);
629 	}
630 
631 	drm_gem_object_unreference(&ring->scratch.obj->base);
632 	ring->scratch.obj = NULL;
633 }
634 
635 static void
636 update_mboxes(struct intel_ring_buffer *ring,
637 	      u32 mmio_offset)
638 {
639 /* NB: In order to be able to do semaphore MBOX updates for varying number
640  * of rings, it's easiest if we round up each individual update to a
641  * multiple of 2 (since ring updates must always be a multiple of 2)
642  * even though the actual update only requires 3 dwords.
643  */
644 #define MBOX_UPDATE_DWORDS 4
645 	intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
646 	intel_ring_emit(ring, mmio_offset);
647 	intel_ring_emit(ring, ring->outstanding_lazy_seqno);
648 	intel_ring_emit(ring, MI_NOOP);
649 }
650 
651 /**
652  * gen6_add_request - Update the semaphore mailbox registers
653  *
654  * @ring - ring that is adding a request
655  * @seqno - return seqno stuck into the ring
656  *
657  * Update the mailbox registers in the *other* rings with the current seqno.
658  * This acts like a signal in the canonical semaphore.
659  */
660 static int
661 gen6_add_request(struct intel_ring_buffer *ring)
662 {
663 	struct drm_device *dev = ring->dev;
664 	struct drm_i915_private *dev_priv = dev->dev_private;
665 	struct intel_ring_buffer *useless;
666 	int i, ret, num_dwords = 4;
667 
668 	if (i915_semaphore_is_enabled(dev))
669 		num_dwords += ((I915_NUM_RINGS-1) * MBOX_UPDATE_DWORDS);
670 #undef MBOX_UPDATE_DWORDS
671 
672 	ret = intel_ring_begin(ring, num_dwords);
673 	if (ret)
674 		return ret;
675 
676 	if (i915_semaphore_is_enabled(dev)) {
677 		for_each_ring(useless, dev_priv, i) {
678 			u32 mbox_reg = ring->signal_mbox[i];
679 			if (mbox_reg != GEN6_NOSYNC)
680 				update_mboxes(ring, mbox_reg);
681 		}
682 	}
683 
684 	intel_ring_emit(ring, MI_STORE_DWORD_INDEX);
685 	intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
686 	intel_ring_emit(ring, ring->outstanding_lazy_seqno);
687 	intel_ring_emit(ring, MI_USER_INTERRUPT);
688 	__intel_ring_advance(ring);
689 
690 	return 0;
691 }
692 
693 static inline bool i915_gem_has_seqno_wrapped(struct drm_device *dev,
694 					      u32 seqno)
695 {
696 	struct drm_i915_private *dev_priv = dev->dev_private;
697 	return dev_priv->last_seqno < seqno;
698 }
699 
700 /**
701  * intel_ring_sync - sync the waiter to the signaller on seqno
702  *
703  * @waiter - ring that is waiting
704  * @signaller - ring which has, or will signal
705  * @seqno - seqno which the waiter will block on
706  */
707 static int
708 gen6_ring_sync(struct intel_ring_buffer *waiter,
709 	       struct intel_ring_buffer *signaller,
710 	       u32 seqno)
711 {
712 	int ret;
713 	u32 dw1 = MI_SEMAPHORE_MBOX |
714 		  MI_SEMAPHORE_COMPARE |
715 		  MI_SEMAPHORE_REGISTER;
716 
717 	/* Throughout all of the GEM code, seqno passed implies our current
718 	 * seqno is >= the last seqno executed. However for hardware the
719 	 * comparison is strictly greater than.
720 	 */
721 	seqno -= 1;
722 
723 	WARN_ON(signaller->semaphore_register[waiter->id] ==
724 		MI_SEMAPHORE_SYNC_INVALID);
725 
726 	ret = intel_ring_begin(waiter, 4);
727 	if (ret)
728 		return ret;
729 
730 	/* If seqno wrap happened, omit the wait with no-ops */
731 	if (likely(!i915_gem_has_seqno_wrapped(waiter->dev, seqno))) {
732 		intel_ring_emit(waiter,
733 				dw1 |
734 				signaller->semaphore_register[waiter->id]);
735 		intel_ring_emit(waiter, seqno);
736 		intel_ring_emit(waiter, 0);
737 		intel_ring_emit(waiter, MI_NOOP);
738 	} else {
739 		intel_ring_emit(waiter, MI_NOOP);
740 		intel_ring_emit(waiter, MI_NOOP);
741 		intel_ring_emit(waiter, MI_NOOP);
742 		intel_ring_emit(waiter, MI_NOOP);
743 	}
744 	intel_ring_advance(waiter);
745 
746 	return 0;
747 }
748 
749 #define PIPE_CONTROL_FLUSH(ring__, addr__)					\
750 do {									\
751 	intel_ring_emit(ring__, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE |		\
752 		 PIPE_CONTROL_DEPTH_STALL);				\
753 	intel_ring_emit(ring__, (addr__) | PIPE_CONTROL_GLOBAL_GTT);			\
754 	intel_ring_emit(ring__, 0);							\
755 	intel_ring_emit(ring__, 0);							\
756 } while (0)
757 
758 static int
759 pc_render_add_request(struct intel_ring_buffer *ring)
760 {
761 	u32 scratch_addr = ring->scratch.gtt_offset + 128;
762 	int ret;
763 
764 	/* For Ironlake, MI_USER_INTERRUPT was deprecated and apparently
765 	 * incoherent with writes to memory, i.e. completely fubar,
766 	 * so we need to use PIPE_NOTIFY instead.
767 	 *
768 	 * However, we also need to workaround the qword write
769 	 * incoherence by flushing the 6 PIPE_NOTIFY buffers out to
770 	 * memory before requesting an interrupt.
771 	 */
772 	ret = intel_ring_begin(ring, 32);
773 	if (ret)
774 		return ret;
775 
776 	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE |
777 			PIPE_CONTROL_WRITE_FLUSH |
778 			PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE);
779 	intel_ring_emit(ring, ring->scratch.gtt_offset | PIPE_CONTROL_GLOBAL_GTT);
780 	intel_ring_emit(ring, ring->outstanding_lazy_seqno);
781 	intel_ring_emit(ring, 0);
782 	PIPE_CONTROL_FLUSH(ring, scratch_addr);
783 	scratch_addr += 128; /* write to separate cachelines */
784 	PIPE_CONTROL_FLUSH(ring, scratch_addr);
785 	scratch_addr += 128;
786 	PIPE_CONTROL_FLUSH(ring, scratch_addr);
787 	scratch_addr += 128;
788 	PIPE_CONTROL_FLUSH(ring, scratch_addr);
789 	scratch_addr += 128;
790 	PIPE_CONTROL_FLUSH(ring, scratch_addr);
791 	scratch_addr += 128;
792 	PIPE_CONTROL_FLUSH(ring, scratch_addr);
793 
794 	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE |
795 			PIPE_CONTROL_WRITE_FLUSH |
796 			PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
797 			PIPE_CONTROL_NOTIFY);
798 	intel_ring_emit(ring, ring->scratch.gtt_offset | PIPE_CONTROL_GLOBAL_GTT);
799 	intel_ring_emit(ring, ring->outstanding_lazy_seqno);
800 	intel_ring_emit(ring, 0);
801 	__intel_ring_advance(ring);
802 
803 	return 0;
804 }
805 
806 static u32
807 gen6_ring_get_seqno(struct intel_ring_buffer *ring, bool lazy_coherency)
808 {
809 	/* Workaround to force correct ordering between irq and seqno writes on
810 	 * ivb (and maybe also on snb) by reading from a CS register (like
811 	 * ACTHD) before reading the status page. */
812 	if (!lazy_coherency)
813 		intel_ring_get_active_head(ring);
814 	return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
815 }
816 
817 static u32
818 ring_get_seqno(struct intel_ring_buffer *ring, bool lazy_coherency)
819 {
820 	return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
821 }
822 
823 static void
824 ring_set_seqno(struct intel_ring_buffer *ring, u32 seqno)
825 {
826 	intel_write_status_page(ring, I915_GEM_HWS_INDEX, seqno);
827 }
828 
829 static u32
830 pc_render_get_seqno(struct intel_ring_buffer *ring, bool lazy_coherency)
831 {
832 	return ring->scratch.cpu_page[0];
833 }
834 
835 static void
836 pc_render_set_seqno(struct intel_ring_buffer *ring, u32 seqno)
837 {
838 	ring->scratch.cpu_page[0] = seqno;
839 }
840 
841 static bool
842 gen5_ring_get_irq(struct intel_ring_buffer *ring)
843 {
844 	struct drm_device *dev = ring->dev;
845 	drm_i915_private_t *dev_priv = dev->dev_private;
846 
847 	if (!dev->irq_enabled)
848 		return false;
849 
850 	lockmgr(&dev_priv->irq_lock, LK_EXCLUSIVE);
851 	if (ring->irq_refcount++ == 0)
852 		ilk_enable_gt_irq(dev_priv, ring->irq_enable_mask);
853 	lockmgr(&dev_priv->irq_lock, LK_RELEASE);
854 
855 	return true;
856 }
857 
858 static void
859 gen5_ring_put_irq(struct intel_ring_buffer *ring)
860 {
861 	struct drm_device *dev = ring->dev;
862 	drm_i915_private_t *dev_priv = dev->dev_private;
863 
864 	lockmgr(&dev_priv->irq_lock, LK_EXCLUSIVE);
865 	if (--ring->irq_refcount == 0)
866 		ilk_disable_gt_irq(dev_priv, ring->irq_enable_mask);
867 	lockmgr(&dev_priv->irq_lock, LK_RELEASE);
868 }
869 
870 static bool
871 i9xx_ring_get_irq(struct intel_ring_buffer *ring)
872 {
873 	struct drm_device *dev = ring->dev;
874 	drm_i915_private_t *dev_priv = dev->dev_private;
875 
876 	if (!dev->irq_enabled)
877 		return false;
878 
879 	lockmgr(&dev_priv->irq_lock, LK_EXCLUSIVE);
880 	if (ring->irq_refcount++ == 0) {
881 		dev_priv->irq_mask &= ~ring->irq_enable_mask;
882 		I915_WRITE(IMR, dev_priv->irq_mask);
883 		POSTING_READ(IMR);
884 	}
885 	lockmgr(&dev_priv->irq_lock, LK_RELEASE);
886 
887 	return true;
888 }
889 
890 static void
891 i9xx_ring_put_irq(struct intel_ring_buffer *ring)
892 {
893 	struct drm_device *dev = ring->dev;
894 	drm_i915_private_t *dev_priv = dev->dev_private;
895 
896 	lockmgr(&dev_priv->irq_lock, LK_EXCLUSIVE);
897 	if (--ring->irq_refcount == 0) {
898 		dev_priv->irq_mask |= ring->irq_enable_mask;
899 		I915_WRITE(IMR, dev_priv->irq_mask);
900 		POSTING_READ(IMR);
901 	}
902 	lockmgr(&dev_priv->irq_lock, LK_RELEASE);
903 }
904 
905 static bool
906 i8xx_ring_get_irq(struct intel_ring_buffer *ring)
907 {
908 	struct drm_device *dev = ring->dev;
909 	drm_i915_private_t *dev_priv = dev->dev_private;
910 
911 	if (!dev->irq_enabled)
912 		return false;
913 
914 	lockmgr(&dev_priv->irq_lock, LK_EXCLUSIVE);
915 	if (ring->irq_refcount++ == 0) {
916 		dev_priv->irq_mask &= ~ring->irq_enable_mask;
917 		I915_WRITE16(IMR, dev_priv->irq_mask);
918 		POSTING_READ16(IMR);
919 	}
920 	lockmgr(&dev_priv->irq_lock, LK_RELEASE);
921 
922 	return true;
923 }
924 
925 static void
926 i8xx_ring_put_irq(struct intel_ring_buffer *ring)
927 {
928 	struct drm_device *dev = ring->dev;
929 	drm_i915_private_t *dev_priv = dev->dev_private;
930 
931 	lockmgr(&dev_priv->irq_lock, LK_EXCLUSIVE);
932 	if (--ring->irq_refcount == 0) {
933 		dev_priv->irq_mask |= ring->irq_enable_mask;
934 		I915_WRITE16(IMR, dev_priv->irq_mask);
935 		POSTING_READ16(IMR);
936 	}
937 	lockmgr(&dev_priv->irq_lock, LK_RELEASE);
938 }
939 
940 void intel_ring_setup_status_page(struct intel_ring_buffer *ring)
941 {
942 	struct drm_device *dev = ring->dev;
943 	drm_i915_private_t *dev_priv = ring->dev->dev_private;
944 	u32 mmio = 0;
945 
946 	/* The ring status page addresses are no longer next to the rest of
947 	 * the ring registers as of gen7.
948 	 */
949 	if (IS_GEN7(dev)) {
950 		switch (ring->id) {
951 		case RCS:
952 			mmio = RENDER_HWS_PGA_GEN7;
953 			break;
954 		case BCS:
955 			mmio = BLT_HWS_PGA_GEN7;
956 			break;
957 		case VCS:
958 			mmio = BSD_HWS_PGA_GEN7;
959 			break;
960 		case VECS:
961 			mmio = VEBOX_HWS_PGA_GEN7;
962 			break;
963 		}
964 	} else if (IS_GEN6(ring->dev)) {
965 		mmio = RING_HWS_PGA_GEN6(ring->mmio_base);
966 	} else {
967 		/* XXX: gen8 returns to sanity */
968 		mmio = RING_HWS_PGA(ring->mmio_base);
969 	}
970 
971 	I915_WRITE(mmio, (u32)ring->status_page.gfx_addr);
972 	POSTING_READ(mmio);
973 
974 	/* Flush the TLB for this page */
975 	if (INTEL_INFO(dev)->gen >= 6) {
976 		u32 reg = RING_INSTPM(ring->mmio_base);
977 		I915_WRITE(reg,
978 			   _MASKED_BIT_ENABLE(INSTPM_TLB_INVALIDATE |
979 					      INSTPM_SYNC_FLUSH));
980 		if (wait_for((I915_READ(reg) & INSTPM_SYNC_FLUSH) == 0,
981 			     1000))
982 			DRM_ERROR("%s: wait for SyncFlush to complete for TLB invalidation timed out\n",
983 				  ring->name);
984 	}
985 }
986 
987 static int
988 bsd_ring_flush(struct intel_ring_buffer *ring,
989 	       u32     invalidate_domains,
990 	       u32     flush_domains)
991 {
992 	int ret;
993 
994 	ret = intel_ring_begin(ring, 2);
995 	if (ret)
996 		return ret;
997 
998 	intel_ring_emit(ring, MI_FLUSH);
999 	intel_ring_emit(ring, MI_NOOP);
1000 	intel_ring_advance(ring);
1001 	return 0;
1002 }
1003 
1004 static int
1005 i9xx_add_request(struct intel_ring_buffer *ring)
1006 {
1007 	int ret;
1008 
1009 	ret = intel_ring_begin(ring, 4);
1010 	if (ret)
1011 		return ret;
1012 
1013 	intel_ring_emit(ring, MI_STORE_DWORD_INDEX);
1014 	intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
1015 	intel_ring_emit(ring, ring->outstanding_lazy_seqno);
1016 	intel_ring_emit(ring, MI_USER_INTERRUPT);
1017 	__intel_ring_advance(ring);
1018 
1019 	return 0;
1020 }
1021 
1022 static bool
1023 gen6_ring_get_irq(struct intel_ring_buffer *ring)
1024 {
1025 	struct drm_device *dev = ring->dev;
1026 	drm_i915_private_t *dev_priv = dev->dev_private;
1027 
1028 	if (!dev->irq_enabled)
1029 	       return false;
1030 
1031 	lockmgr(&dev_priv->irq_lock, LK_EXCLUSIVE);
1032 	if (ring->irq_refcount++ == 0) {
1033 		if (HAS_L3_DPF(dev) && ring->id == RCS)
1034 			I915_WRITE_IMR(ring,
1035 				       ~(ring->irq_enable_mask |
1036 					 GT_PARITY_ERROR(dev)));
1037 		else
1038 			I915_WRITE_IMR(ring, ~ring->irq_enable_mask);
1039 		ilk_enable_gt_irq(dev_priv, ring->irq_enable_mask);
1040 	}
1041 	lockmgr(&dev_priv->irq_lock, LK_RELEASE);
1042 
1043 	return true;
1044 }
1045 
1046 static void
1047 gen6_ring_put_irq(struct intel_ring_buffer *ring)
1048 {
1049 	struct drm_device *dev = ring->dev;
1050 	drm_i915_private_t *dev_priv = dev->dev_private;
1051 
1052 	lockmgr(&dev_priv->irq_lock, LK_EXCLUSIVE);
1053 	if (--ring->irq_refcount == 0) {
1054 		if (HAS_L3_DPF(dev) && ring->id == RCS)
1055 			I915_WRITE_IMR(ring, ~GT_PARITY_ERROR(dev));
1056 		else
1057 			I915_WRITE_IMR(ring, ~0);
1058 		ilk_disable_gt_irq(dev_priv, ring->irq_enable_mask);
1059 	}
1060 	lockmgr(&dev_priv->irq_lock, LK_RELEASE);
1061 }
1062 
1063 static bool
1064 hsw_vebox_get_irq(struct intel_ring_buffer *ring)
1065 {
1066 	struct drm_device *dev = ring->dev;
1067 	struct drm_i915_private *dev_priv = dev->dev_private;
1068 
1069 	if (!dev->irq_enabled)
1070 		return false;
1071 
1072 	lockmgr(&dev_priv->irq_lock, LK_EXCLUSIVE);
1073 	if (ring->irq_refcount++ == 0) {
1074 		I915_WRITE_IMR(ring, ~ring->irq_enable_mask);
1075 		snb_enable_pm_irq(dev_priv, ring->irq_enable_mask);
1076 	}
1077 	lockmgr(&dev_priv->irq_lock, LK_RELEASE);
1078 
1079 	return true;
1080 }
1081 
1082 static void
1083 hsw_vebox_put_irq(struct intel_ring_buffer *ring)
1084 {
1085 	struct drm_device *dev = ring->dev;
1086 	struct drm_i915_private *dev_priv = dev->dev_private;
1087 
1088 	if (!dev->irq_enabled)
1089 		return;
1090 
1091 	lockmgr(&dev_priv->irq_lock, LK_EXCLUSIVE);
1092 	if (--ring->irq_refcount == 0) {
1093 		I915_WRITE_IMR(ring, ~0);
1094 		snb_disable_pm_irq(dev_priv, ring->irq_enable_mask);
1095 	}
1096 	lockmgr(&dev_priv->irq_lock, LK_RELEASE);
1097 }
1098 
1099 static bool
1100 gen8_ring_get_irq(struct intel_ring_buffer *ring)
1101 {
1102 	struct drm_device *dev = ring->dev;
1103 	struct drm_i915_private *dev_priv = dev->dev_private;
1104 
1105 	if (!dev->irq_enabled)
1106 		return false;
1107 
1108 	lockmgr(&dev_priv->irq_lock, LK_EXCLUSIVE);
1109 	if (ring->irq_refcount++ == 0) {
1110 		if (HAS_L3_DPF(dev) && ring->id == RCS) {
1111 			I915_WRITE_IMR(ring,
1112 				       ~(ring->irq_enable_mask |
1113 					 GT_RENDER_L3_PARITY_ERROR_INTERRUPT));
1114 		} else {
1115 			I915_WRITE_IMR(ring, ~ring->irq_enable_mask);
1116 		}
1117 		POSTING_READ(RING_IMR(ring->mmio_base));
1118 	}
1119 	lockmgr(&dev_priv->irq_lock, LK_RELEASE);
1120 
1121 	return true;
1122 }
1123 
1124 static void
1125 gen8_ring_put_irq(struct intel_ring_buffer *ring)
1126 {
1127 	struct drm_device *dev = ring->dev;
1128 	struct drm_i915_private *dev_priv = dev->dev_private;
1129 
1130 	lockmgr(&dev_priv->irq_lock, LK_EXCLUSIVE);
1131 	if (--ring->irq_refcount == 0) {
1132 		if (HAS_L3_DPF(dev) && ring->id == RCS) {
1133 			I915_WRITE_IMR(ring,
1134 				       ~GT_RENDER_L3_PARITY_ERROR_INTERRUPT);
1135 		} else {
1136 			I915_WRITE_IMR(ring, ~0);
1137 		}
1138 		POSTING_READ(RING_IMR(ring->mmio_base));
1139 	}
1140 	lockmgr(&dev_priv->irq_lock, LK_RELEASE);
1141 }
1142 
1143 static int
1144 i965_dispatch_execbuffer(struct intel_ring_buffer *ring,
1145 			 u32 offset, u32 length,
1146 			 unsigned flags)
1147 {
1148 	int ret;
1149 
1150 	ret = intel_ring_begin(ring, 2);
1151 	if (ret)
1152 		return ret;
1153 
1154 	intel_ring_emit(ring,
1155 			MI_BATCH_BUFFER_START |
1156 			MI_BATCH_GTT |
1157 			(flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE_I965));
1158 	intel_ring_emit(ring, offset);
1159 	intel_ring_advance(ring);
1160 
1161 	return 0;
1162 }
1163 
1164 /* Just userspace ABI convention to limit the wa batch bo to a resonable size */
1165 #define I830_BATCH_LIMIT (256*1024)
1166 static int
1167 i830_dispatch_execbuffer(struct intel_ring_buffer *ring,
1168 				u32 offset, u32 len,
1169 				unsigned flags)
1170 {
1171 	int ret;
1172 
1173 	if (flags & I915_DISPATCH_PINNED) {
1174 		ret = intel_ring_begin(ring, 4);
1175 		if (ret)
1176 			return ret;
1177 
1178 		intel_ring_emit(ring, MI_BATCH_BUFFER);
1179 		intel_ring_emit(ring, offset | (flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE));
1180 		intel_ring_emit(ring, offset + len - 8);
1181 		intel_ring_emit(ring, MI_NOOP);
1182 		intel_ring_advance(ring);
1183 	} else {
1184 		u32 cs_offset = ring->scratch.gtt_offset;
1185 
1186 		if (len > I830_BATCH_LIMIT)
1187 			return -ENOSPC;
1188 
1189 		ret = intel_ring_begin(ring, 9+3);
1190 		if (ret)
1191 			return ret;
1192 		/* Blit the batch (which has now all relocs applied) to the stable batch
1193 		 * scratch bo area (so that the CS never stumbles over its tlb
1194 		 * invalidation bug) ... */
1195 		intel_ring_emit(ring, XY_SRC_COPY_BLT_CMD |
1196 				XY_SRC_COPY_BLT_WRITE_ALPHA |
1197 				XY_SRC_COPY_BLT_WRITE_RGB);
1198 		intel_ring_emit(ring, BLT_DEPTH_32 | BLT_ROP_GXCOPY | 4096);
1199 		intel_ring_emit(ring, 0);
1200 		intel_ring_emit(ring, (DIV_ROUND_UP(len, 4096) << 16) | 1024);
1201 		intel_ring_emit(ring, cs_offset);
1202 		intel_ring_emit(ring, 0);
1203 		intel_ring_emit(ring, 4096);
1204 		intel_ring_emit(ring, offset);
1205 		intel_ring_emit(ring, MI_FLUSH);
1206 
1207 		/* ... and execute it. */
1208 		intel_ring_emit(ring, MI_BATCH_BUFFER);
1209 		intel_ring_emit(ring, cs_offset | (flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE));
1210 		intel_ring_emit(ring, cs_offset + len - 8);
1211 		intel_ring_advance(ring);
1212 	}
1213 
1214 	return 0;
1215 }
1216 
1217 static int
1218 i915_dispatch_execbuffer(struct intel_ring_buffer *ring,
1219 			 u32 offset, u32 len,
1220 			 unsigned flags)
1221 {
1222 	int ret;
1223 
1224 	ret = intel_ring_begin(ring, 2);
1225 	if (ret)
1226 		return ret;
1227 
1228 	intel_ring_emit(ring, MI_BATCH_BUFFER_START | MI_BATCH_GTT);
1229 	intel_ring_emit(ring, offset | (flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE));
1230 	intel_ring_advance(ring);
1231 
1232 	return 0;
1233 }
1234 
1235 static void cleanup_status_page(struct intel_ring_buffer *ring)
1236 {
1237 	struct drm_i915_gem_object *obj;
1238 
1239 	obj = ring->status_page.obj;
1240 	if (obj == NULL)
1241 		return;
1242 
1243 	kunmap(obj->pages[0]);
1244 	i915_gem_object_unpin(obj);
1245 	drm_gem_object_unreference(&obj->base);
1246 	ring->status_page.obj = NULL;
1247 }
1248 
1249 static int init_status_page(struct intel_ring_buffer *ring)
1250 {
1251 	struct drm_device *dev = ring->dev;
1252 	struct drm_i915_gem_object *obj;
1253 	int ret;
1254 
1255 	obj = i915_gem_alloc_object(dev, 4096);
1256 	if (obj == NULL) {
1257 		DRM_ERROR("Failed to allocate status page\n");
1258 		ret = -ENOMEM;
1259 		goto err;
1260 	}
1261 
1262 	i915_gem_object_set_cache_level(obj, I915_CACHE_LLC);
1263 
1264 	ret = i915_gem_obj_ggtt_pin(obj, 4096, true, false);
1265 	if (ret != 0) {
1266 		goto err_unref;
1267 	}
1268 
1269 	ring->status_page.gfx_addr = i915_gem_obj_ggtt_offset(obj);
1270 	ring->status_page.page_addr = kmap(obj->pages[0]);
1271 	if (ring->status_page.page_addr == NULL) {
1272 		ret = -ENOMEM;
1273 		goto err_unpin;
1274 	}
1275 	ring->status_page.obj = obj;
1276 	memset(ring->status_page.page_addr, 0, PAGE_SIZE);
1277 
1278 	DRM_DEBUG_DRIVER("%s hws offset: 0x%08x\n",
1279 			ring->name, ring->status_page.gfx_addr);
1280 
1281 	return 0;
1282 
1283 err_unpin:
1284 	i915_gem_object_unpin(obj);
1285 err_unref:
1286 	drm_gem_object_unreference(&obj->base);
1287 err:
1288 	return ret;
1289 }
1290 
1291 static int init_phys_status_page(struct intel_ring_buffer *ring)
1292 {
1293 	struct drm_i915_private *dev_priv = ring->dev->dev_private;
1294 
1295 	if (!dev_priv->status_page_dmah) {
1296 		dev_priv->status_page_dmah =
1297 			drm_pci_alloc(ring->dev, PAGE_SIZE, PAGE_SIZE);
1298 		if (!dev_priv->status_page_dmah)
1299 			return -ENOMEM;
1300 	}
1301 
1302 	ring->status_page.page_addr = dev_priv->status_page_dmah->vaddr;
1303 	memset(ring->status_page.page_addr, 0, PAGE_SIZE);
1304 
1305 	return 0;
1306 }
1307 
1308 static int intel_init_ring_buffer(struct drm_device *dev,
1309 				  struct intel_ring_buffer *ring)
1310 {
1311 	struct drm_i915_gem_object *obj;
1312 	int ret;
1313 
1314 	ring->dev = dev;
1315 	INIT_LIST_HEAD(&ring->active_list);
1316 	INIT_LIST_HEAD(&ring->request_list);
1317 	ring->size = 32 * PAGE_SIZE;
1318 	memset(ring->sync_seqno, 0, sizeof(ring->sync_seqno));
1319 
1320 	init_waitqueue_head(&ring->irq_queue);
1321 
1322 	if (I915_NEED_GFX_HWS(dev)) {
1323 		ret = init_status_page(ring);
1324 		if (ret)
1325 			return ret;
1326 	} else {
1327 		BUG_ON(ring->id != RCS);
1328 		ret = init_phys_status_page(ring);
1329 		if (ret)
1330 			return ret;
1331 	}
1332 
1333 	obj = NULL;
1334 	if (!HAS_LLC(dev))
1335 		obj = i915_gem_object_create_stolen(dev, ring->size);
1336 	if (obj == NULL)
1337 		obj = i915_gem_alloc_object(dev, ring->size);
1338 	if (obj == NULL) {
1339 		DRM_ERROR("Failed to allocate ringbuffer\n");
1340 		ret = -ENOMEM;
1341 		goto err_hws;
1342 	}
1343 
1344 	ring->obj = obj;
1345 
1346 	ret = i915_gem_obj_ggtt_pin(obj, PAGE_SIZE, true, false);
1347 	if (ret)
1348 		goto err_unref;
1349 
1350 	ret = i915_gem_object_set_to_gtt_domain(obj, true);
1351 	if (ret)
1352 		goto err_unpin;
1353 
1354 	ring->virtual_start =
1355 		ioremap_wc(dev->agp->base + i915_gem_obj_ggtt_offset(obj),
1356 			   ring->size);
1357 	if (ring->virtual_start == NULL) {
1358 		DRM_ERROR("Failed to map ringbuffer.\n");
1359 		ret = -EINVAL;
1360 		goto err_unpin;
1361 	}
1362 
1363 	ret = ring->init(ring);
1364 	if (ret)
1365 		goto err_unmap;
1366 
1367 	/* Workaround an erratum on the i830 which causes a hang if
1368 	 * the TAIL pointer points to within the last 2 cachelines
1369 	 * of the buffer.
1370 	 */
1371 	ring->effective_size = ring->size;
1372 	if (IS_I830(ring->dev) || IS_845G(ring->dev))
1373 		ring->effective_size -= 128;
1374 
1375 	return 0;
1376 
1377 err_unmap:
1378 	pmap_unmapdev((vm_offset_t)ring->virtual_start, ring->size);
1379 err_unpin:
1380 	i915_gem_object_unpin(obj);
1381 err_unref:
1382 	drm_gem_object_unreference(&obj->base);
1383 	ring->obj = NULL;
1384 err_hws:
1385 	cleanup_status_page(ring);
1386 	return ret;
1387 }
1388 
1389 void intel_cleanup_ring_buffer(struct intel_ring_buffer *ring)
1390 {
1391 	struct drm_i915_private *dev_priv;
1392 	int ret;
1393 
1394 	if (ring->obj == NULL)
1395 		return;
1396 
1397 	/* Disable the ring buffer. The ring must be idle at this point */
1398 	dev_priv = ring->dev->dev_private;
1399 	ret = intel_ring_idle(ring);
1400 	if (ret && !i915_reset_in_progress(&dev_priv->gpu_error))
1401 		DRM_ERROR("failed to quiesce %s whilst cleaning up: %d\n",
1402 			  ring->name, ret);
1403 
1404 	I915_WRITE_CTL(ring, 0);
1405 
1406 	pmap_unmapdev((vm_offset_t)ring->virtual_start, ring->size);
1407 
1408 	i915_gem_object_unpin(ring->obj);
1409 	drm_gem_object_unreference(&ring->obj->base);
1410 	ring->obj = NULL;
1411 	ring->preallocated_lazy_request = NULL;
1412 	ring->outstanding_lazy_seqno = 0;
1413 
1414 	if (ring->cleanup)
1415 		ring->cleanup(ring);
1416 
1417 	cleanup_status_page(ring);
1418 }
1419 
1420 static int intel_ring_wait_seqno(struct intel_ring_buffer *ring, u32 seqno)
1421 {
1422 	int ret;
1423 
1424 	ret = i915_wait_seqno(ring, seqno);
1425 	if (!ret)
1426 		i915_gem_retire_requests_ring(ring);
1427 
1428 	return ret;
1429 }
1430 
1431 static int intel_ring_wait_request(struct intel_ring_buffer *ring, int n)
1432 {
1433 	struct drm_i915_gem_request *request;
1434 	u32 seqno = 0;
1435 	int ret;
1436 
1437 	i915_gem_retire_requests_ring(ring);
1438 
1439 	if (ring->last_retired_head != -1) {
1440 		ring->head = ring->last_retired_head;
1441 		ring->last_retired_head = -1;
1442 		ring->space = ring_space(ring);
1443 		if (ring->space >= n)
1444 			return 0;
1445 	}
1446 
1447 	list_for_each_entry(request, &ring->request_list, list) {
1448 		int space;
1449 
1450 		if (request->tail == -1)
1451 			continue;
1452 
1453 		space = request->tail - (ring->tail + I915_RING_FREE_SPACE);
1454 		if (space < 0)
1455 			space += ring->size;
1456 		if (space >= n) {
1457 			seqno = request->seqno;
1458 			break;
1459 		}
1460 
1461 		/* Consume this request in case we need more space than
1462 		 * is available and so need to prevent a race between
1463 		 * updating last_retired_head and direct reads of
1464 		 * I915_RING_HEAD. It also provides a nice sanity check.
1465 		 */
1466 		request->tail = -1;
1467 	}
1468 
1469 	if (seqno == 0)
1470 		return -ENOSPC;
1471 
1472 	ret = intel_ring_wait_seqno(ring, seqno);
1473 	if (ret)
1474 		return ret;
1475 
1476 	if (WARN_ON(ring->last_retired_head == -1))
1477 		return -ENOSPC;
1478 
1479 	ring->head = ring->last_retired_head;
1480 	ring->last_retired_head = -1;
1481 	ring->space = ring_space(ring);
1482 	if (WARN_ON(ring->space < n))
1483 		return -ENOSPC;
1484 
1485 	return 0;
1486 }
1487 
1488 static int ring_wait_for_space(struct intel_ring_buffer *ring, int n)
1489 {
1490 	struct drm_device *dev = ring->dev;
1491 	struct drm_i915_private *dev_priv = dev->dev_private;
1492 	unsigned long end;
1493 	int ret;
1494 
1495 	ret = intel_ring_wait_request(ring, n);
1496 	if (ret != -ENOSPC)
1497 		return ret;
1498 
1499 	/* force the tail write in case we have been skipping them */
1500 	__intel_ring_advance(ring);
1501 
1502 	trace_i915_ring_wait_begin(ring);
1503 	/* With GEM the hangcheck timer should kick us out of the loop,
1504 	 * leaving it early runs the risk of corrupting GEM state (due
1505 	 * to running on almost untested codepaths). But on resume
1506 	 * timers don't work yet, so prevent a complete hang in that
1507 	 * case by choosing an insanely large timeout. */
1508 	end = jiffies + 60 * HZ;
1509 
1510 	do {
1511 		ring->head = I915_READ_HEAD(ring);
1512 		ring->space = ring_space(ring);
1513 		if (ring->space >= n) {
1514 			trace_i915_ring_wait_end(ring);
1515 			return 0;
1516 		}
1517 
1518 #if 0
1519 		if (dev->primary->master) {
1520 			struct drm_i915_master_private *master_priv = dev->primary->master->driver_priv;
1521 			if (master_priv->sarea_priv)
1522 				master_priv->sarea_priv->perf_boxes |= I915_BOX_WAIT;
1523 		}
1524 #else
1525 		if (dev_priv->sarea_priv)
1526 			dev_priv->sarea_priv->perf_boxes |= I915_BOX_WAIT;
1527 #endif
1528 
1529 		msleep(1);
1530 
1531 		ret = i915_gem_check_wedge(&dev_priv->gpu_error,
1532 					   dev_priv->mm.interruptible);
1533 		if (ret)
1534 			return ret;
1535 	} while (!time_after(jiffies, end));
1536 	trace_i915_ring_wait_end(ring);
1537 	return -EBUSY;
1538 }
1539 
1540 static int intel_wrap_ring_buffer(struct intel_ring_buffer *ring)
1541 {
1542 	uint32_t __iomem *virt;
1543 	int rem = ring->size - ring->tail;
1544 
1545 	if (ring->space < rem) {
1546 		int ret = ring_wait_for_space(ring, rem);
1547 		if (ret)
1548 			return ret;
1549 	}
1550 
1551 	virt = (unsigned int *)((char *)ring->virtual_start + ring->tail);
1552 	rem /= 4;
1553 	while (rem--)
1554 		iowrite32(MI_NOOP, virt++);
1555 
1556 	ring->tail = 0;
1557 	ring->space = ring_space(ring);
1558 
1559 	return 0;
1560 }
1561 
1562 int intel_ring_idle(struct intel_ring_buffer *ring)
1563 {
1564 	u32 seqno;
1565 	int ret;
1566 
1567 	/* We need to add any requests required to flush the objects and ring */
1568 	if (ring->outstanding_lazy_seqno) {
1569 		ret = i915_add_request(ring, NULL);
1570 		if (ret)
1571 			return ret;
1572 	}
1573 
1574 	/* Wait upon the last request to be completed */
1575 	if (list_empty(&ring->request_list))
1576 		return 0;
1577 
1578 	seqno = list_entry(ring->request_list.prev,
1579 			   struct drm_i915_gem_request,
1580 			   list)->seqno;
1581 
1582 	return i915_wait_seqno(ring, seqno);
1583 }
1584 
1585 static int
1586 intel_ring_alloc_seqno(struct intel_ring_buffer *ring)
1587 {
1588 	if (ring->outstanding_lazy_seqno)
1589 		return 0;
1590 
1591 	if (ring->preallocated_lazy_request == NULL) {
1592 		struct drm_i915_gem_request *request;
1593 
1594 		request = kmalloc(sizeof(*request), M_DRM, M_WAITOK);
1595 		if (request == NULL)
1596 			return -ENOMEM;
1597 
1598 		ring->preallocated_lazy_request = request;
1599 	}
1600 
1601 	return i915_gem_get_seqno(ring->dev, &ring->outstanding_lazy_seqno);
1602 }
1603 
1604 static int __intel_ring_prepare(struct intel_ring_buffer *ring,
1605 				int bytes)
1606 {
1607 	int ret;
1608 
1609 	if (unlikely(ring->tail + bytes > ring->effective_size)) {
1610 		ret = intel_wrap_ring_buffer(ring);
1611 		if (unlikely(ret))
1612 			return ret;
1613 	}
1614 
1615 	if (unlikely(ring->space < bytes)) {
1616 		ret = ring_wait_for_space(ring, bytes);
1617 		if (unlikely(ret))
1618 			return ret;
1619 	}
1620 
1621 	return 0;
1622 }
1623 
1624 int intel_ring_begin(struct intel_ring_buffer *ring,
1625 		     int num_dwords)
1626 {
1627 	drm_i915_private_t *dev_priv = ring->dev->dev_private;
1628 	int ret;
1629 
1630 	ret = i915_gem_check_wedge(&dev_priv->gpu_error,
1631 				   dev_priv->mm.interruptible);
1632 	if (ret)
1633 		return ret;
1634 
1635 	ret = __intel_ring_prepare(ring, num_dwords * sizeof(uint32_t));
1636 	if (ret)
1637 		return ret;
1638 
1639 	/* Preallocate the olr before touching the ring */
1640 	ret = intel_ring_alloc_seqno(ring);
1641 	if (ret)
1642 		return ret;
1643 
1644 	ring->space -= num_dwords * sizeof(uint32_t);
1645 	return 0;
1646 }
1647 
1648 /* Align the ring tail to a cacheline boundary */
1649 int intel_ring_cacheline_align(struct intel_ring_buffer *ring)
1650 {
1651 	int num_dwords = (64 - (ring->tail & 63)) / sizeof(uint32_t);
1652 	int ret;
1653 
1654 	if (num_dwords == 0)
1655 		return 0;
1656 
1657 	ret = intel_ring_begin(ring, num_dwords);
1658 	if (ret)
1659 		return ret;
1660 
1661 	while (num_dwords--)
1662 		intel_ring_emit(ring, MI_NOOP);
1663 
1664 	intel_ring_advance(ring);
1665 
1666 	return 0;
1667 }
1668 
1669 void intel_ring_init_seqno(struct intel_ring_buffer *ring, u32 seqno)
1670 {
1671 	struct drm_i915_private *dev_priv = ring->dev->dev_private;
1672 
1673 	BUG_ON(ring->outstanding_lazy_seqno);
1674 
1675 	if (INTEL_INFO(ring->dev)->gen >= 6) {
1676 		I915_WRITE(RING_SYNC_0(ring->mmio_base), 0);
1677 		I915_WRITE(RING_SYNC_1(ring->mmio_base), 0);
1678 		if (HAS_VEBOX(ring->dev))
1679 			I915_WRITE(RING_SYNC_2(ring->mmio_base), 0);
1680 	}
1681 
1682 	ring->set_seqno(ring, seqno);
1683 	ring->hangcheck.seqno = seqno;
1684 }
1685 
1686 static void gen6_bsd_ring_write_tail(struct intel_ring_buffer *ring,
1687 				     u32 value)
1688 {
1689 	drm_i915_private_t *dev_priv = ring->dev->dev_private;
1690 
1691        /* Every tail move must follow the sequence below */
1692 
1693 	/* Disable notification that the ring is IDLE. The GT
1694 	 * will then assume that it is busy and bring it out of rc6.
1695 	 */
1696 	I915_WRITE(GEN6_BSD_SLEEP_PSMI_CONTROL,
1697 		   _MASKED_BIT_ENABLE(GEN6_BSD_SLEEP_MSG_DISABLE));
1698 
1699 	/* Clear the context id. Here be magic! */
1700 	I915_WRITE64(GEN6_BSD_RNCID, 0x0);
1701 
1702 	/* Wait for the ring not to be idle, i.e. for it to wake up. */
1703 	if (wait_for((I915_READ(GEN6_BSD_SLEEP_PSMI_CONTROL) &
1704 		      GEN6_BSD_SLEEP_INDICATOR) == 0,
1705 		     50))
1706 		DRM_ERROR("timed out waiting for the BSD ring to wake up\n");
1707 
1708 	/* Now that the ring is fully powered up, update the tail */
1709 	I915_WRITE_TAIL(ring, value);
1710 	POSTING_READ(RING_TAIL(ring->mmio_base));
1711 
1712 	/* Let the ring send IDLE messages to the GT again,
1713 	 * and so let it sleep to conserve power when idle.
1714 	 */
1715 	I915_WRITE(GEN6_BSD_SLEEP_PSMI_CONTROL,
1716 		   _MASKED_BIT_DISABLE(GEN6_BSD_SLEEP_MSG_DISABLE));
1717 }
1718 
1719 static int gen6_bsd_ring_flush(struct intel_ring_buffer *ring,
1720 			       u32 invalidate, u32 flush)
1721 {
1722 	uint32_t cmd;
1723 	int ret;
1724 
1725 	ret = intel_ring_begin(ring, 4);
1726 	if (ret)
1727 		return ret;
1728 
1729 	cmd = MI_FLUSH_DW;
1730 	if (INTEL_INFO(ring->dev)->gen >= 8)
1731 		cmd += 1;
1732 	/*
1733 	 * Bspec vol 1c.5 - video engine command streamer:
1734 	 * "If ENABLED, all TLBs will be invalidated once the flush
1735 	 * operation is complete. This bit is only valid when the
1736 	 * Post-Sync Operation field is a value of 1h or 3h."
1737 	 */
1738 	if (invalidate & I915_GEM_GPU_DOMAINS)
1739 		cmd |= MI_INVALIDATE_TLB | MI_INVALIDATE_BSD |
1740 			MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
1741 	intel_ring_emit(ring, cmd);
1742 	intel_ring_emit(ring, I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT);
1743 	if (INTEL_INFO(ring->dev)->gen >= 8) {
1744 		intel_ring_emit(ring, 0); /* upper addr */
1745 		intel_ring_emit(ring, 0); /* value */
1746 	} else  {
1747 		intel_ring_emit(ring, 0);
1748 		intel_ring_emit(ring, MI_NOOP);
1749 	}
1750 	intel_ring_advance(ring);
1751 	return 0;
1752 }
1753 
1754 static int
1755 gen8_ring_dispatch_execbuffer(struct intel_ring_buffer *ring,
1756 			      u32 offset, u32 len,
1757 			      unsigned flags)
1758 {
1759 	struct drm_i915_private *dev_priv = ring->dev->dev_private;
1760 	bool ppgtt = dev_priv->mm.aliasing_ppgtt != NULL &&
1761 		!(flags & I915_DISPATCH_SECURE);
1762 	int ret;
1763 
1764 	ret = intel_ring_begin(ring, 4);
1765 	if (ret)
1766 		return ret;
1767 
1768 	/* FIXME(BDW): Address space and security selectors. */
1769 	intel_ring_emit(ring, MI_BATCH_BUFFER_START_GEN8 | (ppgtt<<8));
1770 	intel_ring_emit(ring, offset);
1771 	intel_ring_emit(ring, 0);
1772 	intel_ring_emit(ring, MI_NOOP);
1773 	intel_ring_advance(ring);
1774 
1775 	return 0;
1776 }
1777 
1778 static int
1779 hsw_ring_dispatch_execbuffer(struct intel_ring_buffer *ring,
1780 			      u32 offset, u32 len,
1781 			      unsigned flags)
1782 {
1783 	int ret;
1784 
1785 	ret = intel_ring_begin(ring, 2);
1786 	if (ret)
1787 		return ret;
1788 
1789 	intel_ring_emit(ring,
1790 			MI_BATCH_BUFFER_START | MI_BATCH_PPGTT_HSW |
1791 			(flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE_HSW));
1792 	/* bit0-7 is the length on GEN6+ */
1793 	intel_ring_emit(ring, offset);
1794 	intel_ring_advance(ring);
1795 
1796 	return 0;
1797 }
1798 
1799 static int
1800 gen6_ring_dispatch_execbuffer(struct intel_ring_buffer *ring,
1801 			      u32 offset, u32 len,
1802 			      unsigned flags)
1803 {
1804 	int ret;
1805 
1806 	ret = intel_ring_begin(ring, 2);
1807 	if (ret)
1808 		return ret;
1809 
1810 	intel_ring_emit(ring,
1811 			MI_BATCH_BUFFER_START |
1812 			(flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE_I965));
1813 	/* bit0-7 is the length on GEN6+ */
1814 	intel_ring_emit(ring, offset);
1815 	intel_ring_advance(ring);
1816 
1817 	return 0;
1818 }
1819 
1820 /* Blitter support (SandyBridge+) */
1821 
1822 static int gen6_ring_flush(struct intel_ring_buffer *ring,
1823 			   u32 invalidate, u32 flush)
1824 {
1825 	struct drm_device *dev = ring->dev;
1826 	uint32_t cmd;
1827 	int ret;
1828 
1829 	ret = intel_ring_begin(ring, 4);
1830 	if (ret)
1831 		return ret;
1832 
1833 	cmd = MI_FLUSH_DW;
1834 	if (INTEL_INFO(ring->dev)->gen >= 8)
1835 		cmd += 1;
1836 	/*
1837 	 * Bspec vol 1c.3 - blitter engine command streamer:
1838 	 * "If ENABLED, all TLBs will be invalidated once the flush
1839 	 * operation is complete. This bit is only valid when the
1840 	 * Post-Sync Operation field is a value of 1h or 3h."
1841 	 */
1842 	if (invalidate & I915_GEM_DOMAIN_RENDER)
1843 		cmd |= MI_INVALIDATE_TLB | MI_FLUSH_DW_STORE_INDEX |
1844 			MI_FLUSH_DW_OP_STOREDW;
1845 	intel_ring_emit(ring, cmd);
1846 	intel_ring_emit(ring, I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT);
1847 	if (INTEL_INFO(ring->dev)->gen >= 8) {
1848 		intel_ring_emit(ring, 0); /* upper addr */
1849 		intel_ring_emit(ring, 0); /* value */
1850 	} else  {
1851 		intel_ring_emit(ring, 0);
1852 		intel_ring_emit(ring, MI_NOOP);
1853 	}
1854 	intel_ring_advance(ring);
1855 
1856 	if (IS_GEN7(dev) && !invalidate && flush)
1857 		return gen7_ring_fbc_flush(ring, FBC_REND_CACHE_CLEAN);
1858 
1859 	return 0;
1860 }
1861 
1862 int intel_init_render_ring_buffer(struct drm_device *dev)
1863 {
1864 	drm_i915_private_t *dev_priv = dev->dev_private;
1865 	struct intel_ring_buffer *ring = &dev_priv->ring[RCS];
1866 
1867 	ring->name = "render ring";
1868 	ring->id = RCS;
1869 	ring->mmio_base = RENDER_RING_BASE;
1870 
1871 	if (INTEL_INFO(dev)->gen >= 6) {
1872 		ring->add_request = gen6_add_request;
1873 		ring->flush = gen7_render_ring_flush;
1874 		if (INTEL_INFO(dev)->gen == 6)
1875 			ring->flush = gen6_render_ring_flush;
1876 		if (INTEL_INFO(dev)->gen >= 8) {
1877 			ring->flush = gen8_render_ring_flush;
1878 			ring->irq_get = gen8_ring_get_irq;
1879 			ring->irq_put = gen8_ring_put_irq;
1880 		} else {
1881 			ring->irq_get = gen6_ring_get_irq;
1882 			ring->irq_put = gen6_ring_put_irq;
1883 		}
1884 		ring->irq_enable_mask = GT_RENDER_USER_INTERRUPT;
1885 		ring->get_seqno = gen6_ring_get_seqno;
1886 		ring->set_seqno = ring_set_seqno;
1887 		ring->sync_to = gen6_ring_sync;
1888 		ring->semaphore_register[RCS] = MI_SEMAPHORE_SYNC_INVALID;
1889 		ring->semaphore_register[VCS] = MI_SEMAPHORE_SYNC_RV;
1890 		ring->semaphore_register[BCS] = MI_SEMAPHORE_SYNC_RB;
1891 		ring->semaphore_register[VECS] = MI_SEMAPHORE_SYNC_RVE;
1892 		ring->signal_mbox[RCS] = GEN6_NOSYNC;
1893 		ring->signal_mbox[VCS] = GEN6_VRSYNC;
1894 		ring->signal_mbox[BCS] = GEN6_BRSYNC;
1895 		ring->signal_mbox[VECS] = GEN6_VERSYNC;
1896 	} else if (IS_GEN5(dev)) {
1897 		ring->add_request = pc_render_add_request;
1898 		ring->flush = gen4_render_ring_flush;
1899 		ring->get_seqno = pc_render_get_seqno;
1900 		ring->set_seqno = pc_render_set_seqno;
1901 		ring->irq_get = gen5_ring_get_irq;
1902 		ring->irq_put = gen5_ring_put_irq;
1903 		ring->irq_enable_mask = GT_RENDER_USER_INTERRUPT |
1904 					GT_RENDER_PIPECTL_NOTIFY_INTERRUPT;
1905 	} else {
1906 		ring->add_request = i9xx_add_request;
1907 		if (INTEL_INFO(dev)->gen < 4)
1908 			ring->flush = gen2_render_ring_flush;
1909 		else
1910 			ring->flush = gen4_render_ring_flush;
1911 		ring->get_seqno = ring_get_seqno;
1912 		ring->set_seqno = ring_set_seqno;
1913 		if (IS_GEN2(dev)) {
1914 			ring->irq_get = i8xx_ring_get_irq;
1915 			ring->irq_put = i8xx_ring_put_irq;
1916 		} else {
1917 			ring->irq_get = i9xx_ring_get_irq;
1918 			ring->irq_put = i9xx_ring_put_irq;
1919 		}
1920 		ring->irq_enable_mask = I915_USER_INTERRUPT;
1921 	}
1922 	ring->write_tail = ring_write_tail;
1923 	if (IS_HASWELL(dev))
1924 		ring->dispatch_execbuffer = hsw_ring_dispatch_execbuffer;
1925 	else if (IS_GEN8(dev))
1926 		ring->dispatch_execbuffer = gen8_ring_dispatch_execbuffer;
1927 	else if (INTEL_INFO(dev)->gen >= 6)
1928 		ring->dispatch_execbuffer = gen6_ring_dispatch_execbuffer;
1929 	else if (INTEL_INFO(dev)->gen >= 4)
1930 		ring->dispatch_execbuffer = i965_dispatch_execbuffer;
1931 	else if (IS_I830(dev) || IS_845G(dev))
1932 		ring->dispatch_execbuffer = i830_dispatch_execbuffer;
1933 	else
1934 		ring->dispatch_execbuffer = i915_dispatch_execbuffer;
1935 	ring->init = init_render_ring;
1936 	ring->cleanup = render_ring_cleanup;
1937 
1938 	/* Workaround batchbuffer to combat CS tlb bug. */
1939 	if (HAS_BROKEN_CS_TLB(dev)) {
1940 		struct drm_i915_gem_object *obj;
1941 		int ret;
1942 
1943 		obj = i915_gem_alloc_object(dev, I830_BATCH_LIMIT);
1944 		if (obj == NULL) {
1945 			DRM_ERROR("Failed to allocate batch bo\n");
1946 			return -ENOMEM;
1947 		}
1948 
1949 		ret = i915_gem_obj_ggtt_pin(obj, 0, true, false);
1950 		if (ret != 0) {
1951 			drm_gem_object_unreference(&obj->base);
1952 			DRM_ERROR("Failed to ping batch bo\n");
1953 			return ret;
1954 		}
1955 
1956 		ring->scratch.obj = obj;
1957 		ring->scratch.gtt_offset = i915_gem_obj_ggtt_offset(obj);
1958 	}
1959 
1960 	return intel_init_ring_buffer(dev, ring);
1961 }
1962 
1963 int intel_render_ring_init_dri(struct drm_device *dev, u64 start, u32 size)
1964 {
1965 	drm_i915_private_t *dev_priv = dev->dev_private;
1966 	struct intel_ring_buffer *ring = &dev_priv->ring[RCS];
1967 	int ret;
1968 
1969 	ring->name = "render ring";
1970 	ring->id = RCS;
1971 	ring->mmio_base = RENDER_RING_BASE;
1972 
1973 	if (INTEL_INFO(dev)->gen >= 6) {
1974 		/* non-kms not supported on gen6+ */
1975 		return -ENODEV;
1976 	}
1977 
1978 	/* Note: gem is not supported on gen5/ilk without kms (the corresponding
1979 	 * gem_init ioctl returns with -ENODEV). Hence we do not need to set up
1980 	 * the special gen5 functions. */
1981 	ring->add_request = i9xx_add_request;
1982 	if (INTEL_INFO(dev)->gen < 4)
1983 		ring->flush = gen2_render_ring_flush;
1984 	else
1985 		ring->flush = gen4_render_ring_flush;
1986 	ring->get_seqno = ring_get_seqno;
1987 	ring->set_seqno = ring_set_seqno;
1988 	if (IS_GEN2(dev)) {
1989 		ring->irq_get = i8xx_ring_get_irq;
1990 		ring->irq_put = i8xx_ring_put_irq;
1991 	} else {
1992 		ring->irq_get = i9xx_ring_get_irq;
1993 		ring->irq_put = i9xx_ring_put_irq;
1994 	}
1995 	ring->irq_enable_mask = I915_USER_INTERRUPT;
1996 	ring->write_tail = ring_write_tail;
1997 	if (INTEL_INFO(dev)->gen >= 4)
1998 		ring->dispatch_execbuffer = i965_dispatch_execbuffer;
1999 	else if (IS_I830(dev) || IS_845G(dev))
2000 		ring->dispatch_execbuffer = i830_dispatch_execbuffer;
2001 	else
2002 		ring->dispatch_execbuffer = i915_dispatch_execbuffer;
2003 	ring->init = init_render_ring;
2004 	ring->cleanup = render_ring_cleanup;
2005 
2006 	ring->dev = dev;
2007 	INIT_LIST_HEAD(&ring->active_list);
2008 	INIT_LIST_HEAD(&ring->request_list);
2009 
2010 	ring->size = size;
2011 	ring->effective_size = ring->size;
2012 	if (IS_I830(ring->dev) || IS_845G(ring->dev))
2013 		ring->effective_size -= 128;
2014 
2015 	ring->virtual_start = ioremap_wc(start, size);
2016 	if (ring->virtual_start == NULL) {
2017 		DRM_ERROR("can not ioremap virtual address for"
2018 			  " ring buffer\n");
2019 		return -ENOMEM;
2020 	}
2021 
2022 	if (!I915_NEED_GFX_HWS(dev)) {
2023 		ret = init_phys_status_page(ring);
2024 		if (ret)
2025 			return ret;
2026 	}
2027 
2028 	return 0;
2029 }
2030 
2031 int intel_init_bsd_ring_buffer(struct drm_device *dev)
2032 {
2033 	drm_i915_private_t *dev_priv = dev->dev_private;
2034 	struct intel_ring_buffer *ring = &dev_priv->ring[VCS];
2035 
2036 	ring->name = "bsd ring";
2037 	ring->id = VCS;
2038 
2039 	ring->write_tail = ring_write_tail;
2040 	if (INTEL_INFO(dev)->gen >= 6) {
2041 		ring->mmio_base = GEN6_BSD_RING_BASE;
2042 		/* gen6 bsd needs a special wa for tail updates */
2043 		if (IS_GEN6(dev))
2044 			ring->write_tail = gen6_bsd_ring_write_tail;
2045 		ring->flush = gen6_bsd_ring_flush;
2046 		ring->add_request = gen6_add_request;
2047 		ring->get_seqno = gen6_ring_get_seqno;
2048 		ring->set_seqno = ring_set_seqno;
2049 		if (INTEL_INFO(dev)->gen >= 8) {
2050 			ring->irq_enable_mask =
2051 				GT_RENDER_USER_INTERRUPT << GEN8_VCS1_IRQ_SHIFT;
2052 			ring->irq_get = gen8_ring_get_irq;
2053 			ring->irq_put = gen8_ring_put_irq;
2054 			ring->dispatch_execbuffer =
2055 				gen8_ring_dispatch_execbuffer;
2056 		} else {
2057 			ring->irq_enable_mask = GT_BSD_USER_INTERRUPT;
2058 			ring->irq_get = gen6_ring_get_irq;
2059 			ring->irq_put = gen6_ring_put_irq;
2060 			ring->dispatch_execbuffer =
2061 				gen6_ring_dispatch_execbuffer;
2062 		}
2063 		ring->sync_to = gen6_ring_sync;
2064 		ring->semaphore_register[RCS] = MI_SEMAPHORE_SYNC_VR;
2065 		ring->semaphore_register[VCS] = MI_SEMAPHORE_SYNC_INVALID;
2066 		ring->semaphore_register[BCS] = MI_SEMAPHORE_SYNC_VB;
2067 		ring->semaphore_register[VECS] = MI_SEMAPHORE_SYNC_VVE;
2068 		ring->signal_mbox[RCS] = GEN6_RVSYNC;
2069 		ring->signal_mbox[VCS] = GEN6_NOSYNC;
2070 		ring->signal_mbox[BCS] = GEN6_BVSYNC;
2071 		ring->signal_mbox[VECS] = GEN6_VEVSYNC;
2072 	} else {
2073 		ring->mmio_base = BSD_RING_BASE;
2074 		ring->flush = bsd_ring_flush;
2075 		ring->add_request = i9xx_add_request;
2076 		ring->get_seqno = ring_get_seqno;
2077 		ring->set_seqno = ring_set_seqno;
2078 		if (IS_GEN5(dev)) {
2079 			ring->irq_enable_mask = ILK_BSD_USER_INTERRUPT;
2080 			ring->irq_get = gen5_ring_get_irq;
2081 			ring->irq_put = gen5_ring_put_irq;
2082 		} else {
2083 			ring->irq_enable_mask = I915_BSD_USER_INTERRUPT;
2084 			ring->irq_get = i9xx_ring_get_irq;
2085 			ring->irq_put = i9xx_ring_put_irq;
2086 		}
2087 		ring->dispatch_execbuffer = i965_dispatch_execbuffer;
2088 	}
2089 	ring->init = init_ring_common;
2090 
2091 	return intel_init_ring_buffer(dev, ring);
2092 }
2093 
2094 int intel_init_blt_ring_buffer(struct drm_device *dev)
2095 {
2096 	drm_i915_private_t *dev_priv = dev->dev_private;
2097 	struct intel_ring_buffer *ring = &dev_priv->ring[BCS];
2098 
2099 	ring->name = "blitter ring";
2100 	ring->id = BCS;
2101 
2102 	ring->mmio_base = BLT_RING_BASE;
2103 	ring->write_tail = ring_write_tail;
2104 	ring->flush = gen6_ring_flush;
2105 	ring->add_request = gen6_add_request;
2106 	ring->get_seqno = gen6_ring_get_seqno;
2107 	ring->set_seqno = ring_set_seqno;
2108 	if (INTEL_INFO(dev)->gen >= 8) {
2109 		ring->irq_enable_mask =
2110 			GT_RENDER_USER_INTERRUPT << GEN8_BCS_IRQ_SHIFT;
2111 		ring->irq_get = gen8_ring_get_irq;
2112 		ring->irq_put = gen8_ring_put_irq;
2113 		ring->dispatch_execbuffer = gen8_ring_dispatch_execbuffer;
2114 	} else {
2115 		ring->irq_enable_mask = GT_BLT_USER_INTERRUPT;
2116 		ring->irq_get = gen6_ring_get_irq;
2117 		ring->irq_put = gen6_ring_put_irq;
2118 		ring->dispatch_execbuffer = gen6_ring_dispatch_execbuffer;
2119 	}
2120 	ring->sync_to = gen6_ring_sync;
2121 	ring->semaphore_register[RCS] = MI_SEMAPHORE_SYNC_BR;
2122 	ring->semaphore_register[VCS] = MI_SEMAPHORE_SYNC_BV;
2123 	ring->semaphore_register[BCS] = MI_SEMAPHORE_SYNC_INVALID;
2124 	ring->semaphore_register[VECS] = MI_SEMAPHORE_SYNC_BVE;
2125 	ring->signal_mbox[RCS] = GEN6_RBSYNC;
2126 	ring->signal_mbox[VCS] = GEN6_VBSYNC;
2127 	ring->signal_mbox[BCS] = GEN6_NOSYNC;
2128 	ring->signal_mbox[VECS] = GEN6_VEBSYNC;
2129 	ring->init = init_ring_common;
2130 
2131 	return intel_init_ring_buffer(dev, ring);
2132 }
2133 
2134 int intel_init_vebox_ring_buffer(struct drm_device *dev)
2135 {
2136 	drm_i915_private_t *dev_priv = dev->dev_private;
2137 	struct intel_ring_buffer *ring = &dev_priv->ring[VECS];
2138 
2139 	ring->name = "video enhancement ring";
2140 	ring->id = VECS;
2141 
2142 	ring->mmio_base = VEBOX_RING_BASE;
2143 	ring->write_tail = ring_write_tail;
2144 	ring->flush = gen6_ring_flush;
2145 	ring->add_request = gen6_add_request;
2146 	ring->get_seqno = gen6_ring_get_seqno;
2147 	ring->set_seqno = ring_set_seqno;
2148 
2149 	if (INTEL_INFO(dev)->gen >= 8) {
2150 		ring->irq_enable_mask =
2151 			GT_RENDER_USER_INTERRUPT << GEN8_VECS_IRQ_SHIFT;
2152 		ring->irq_get = gen8_ring_get_irq;
2153 		ring->irq_put = gen8_ring_put_irq;
2154 		ring->dispatch_execbuffer = gen8_ring_dispatch_execbuffer;
2155 	} else {
2156 		ring->irq_enable_mask = PM_VEBOX_USER_INTERRUPT;
2157 		ring->irq_get = hsw_vebox_get_irq;
2158 		ring->irq_put = hsw_vebox_put_irq;
2159 		ring->dispatch_execbuffer = gen6_ring_dispatch_execbuffer;
2160 	}
2161 	ring->sync_to = gen6_ring_sync;
2162 	ring->semaphore_register[RCS] = MI_SEMAPHORE_SYNC_VER;
2163 	ring->semaphore_register[VCS] = MI_SEMAPHORE_SYNC_VEV;
2164 	ring->semaphore_register[BCS] = MI_SEMAPHORE_SYNC_VEB;
2165 	ring->semaphore_register[VECS] = MI_SEMAPHORE_SYNC_INVALID;
2166 	ring->signal_mbox[RCS] = GEN6_RVESYNC;
2167 	ring->signal_mbox[VCS] = GEN6_VVESYNC;
2168 	ring->signal_mbox[BCS] = GEN6_BVESYNC;
2169 	ring->signal_mbox[VECS] = GEN6_NOSYNC;
2170 	ring->init = init_ring_common;
2171 
2172 	return intel_init_ring_buffer(dev, ring);
2173 }
2174 
2175 int
2176 intel_ring_flush_all_caches(struct intel_ring_buffer *ring)
2177 {
2178 	int ret;
2179 
2180 	if (!ring->gpu_caches_dirty)
2181 		return 0;
2182 
2183 	ret = ring->flush(ring, 0, I915_GEM_GPU_DOMAINS);
2184 	if (ret)
2185 		return ret;
2186 
2187 	trace_i915_gem_ring_flush(ring, 0, I915_GEM_GPU_DOMAINS);
2188 
2189 	ring->gpu_caches_dirty = false;
2190 	return 0;
2191 }
2192 
2193 int
2194 intel_ring_invalidate_all_caches(struct intel_ring_buffer *ring)
2195 {
2196 	uint32_t flush_domains;
2197 	int ret;
2198 
2199 	flush_domains = 0;
2200 	if (ring->gpu_caches_dirty)
2201 		flush_domains = I915_GEM_GPU_DOMAINS;
2202 
2203 	ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, flush_domains);
2204 	if (ret)
2205 		return ret;
2206 
2207 	trace_i915_gem_ring_flush(ring, I915_GEM_GPU_DOMAINS, flush_domains);
2208 
2209 	ring->gpu_caches_dirty = false;
2210 	return 0;
2211 }
2212