xref: /dflybsd-src/sys/dev/drm/i915/intel_ringbuffer.c (revision 475c7069e94570a897d1467613efd2b3f0212ff9)
1 /*
2  * Copyright © 2008-2010 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Eric Anholt <eric@anholt.net>
25  *    Zou Nan hai <nanhai.zou@intel.com>
26  *    Xiang Hai hao<haihao.xiang@intel.com>
27  *
28  */
29 
30 #include <linux/log2.h>
31 #include <drm/drmP.h>
32 #include "i915_drv.h"
33 #include <drm/i915_drm.h>
34 #include "i915_trace.h"
35 #include "intel_drv.h"
36 
37 int __intel_ring_space(int head, int tail, int size)
38 {
39 	int space = head - tail;
40 	if (space <= 0)
41 		space += size;
42 	return space - I915_RING_FREE_SPACE;
43 }
44 
45 void intel_ring_update_space(struct intel_ringbuffer *ringbuf)
46 {
47 	if (ringbuf->last_retired_head != -1) {
48 		ringbuf->head = ringbuf->last_retired_head;
49 		ringbuf->last_retired_head = -1;
50 	}
51 
52 	ringbuf->space = __intel_ring_space(ringbuf->head & HEAD_ADDR,
53 					    ringbuf->tail, ringbuf->size);
54 }
55 
56 int intel_ring_space(struct intel_ringbuffer *ringbuf)
57 {
58 	intel_ring_update_space(ringbuf);
59 	return ringbuf->space;
60 }
61 
62 bool intel_ring_stopped(struct intel_engine_cs *ring)
63 {
64 	struct drm_i915_private *dev_priv = ring->dev->dev_private;
65 	return dev_priv->gpu_error.stop_rings & intel_ring_flag(ring);
66 }
67 
68 static void __intel_ring_advance(struct intel_engine_cs *ring)
69 {
70 	struct intel_ringbuffer *ringbuf = ring->buffer;
71 	ringbuf->tail &= ringbuf->size - 1;
72 	if (intel_ring_stopped(ring))
73 		return;
74 	ring->write_tail(ring, ringbuf->tail);
75 }
76 
77 static int
78 gen2_render_ring_flush(struct drm_i915_gem_request *req,
79 		       u32	invalidate_domains,
80 		       u32	flush_domains)
81 {
82 	struct intel_engine_cs *ring = req->ring;
83 	u32 cmd;
84 	int ret;
85 
86 	cmd = MI_FLUSH;
87 	if (((invalidate_domains|flush_domains) & I915_GEM_DOMAIN_RENDER) == 0)
88 		cmd |= MI_NO_WRITE_FLUSH;
89 
90 	if (invalidate_domains & I915_GEM_DOMAIN_SAMPLER)
91 		cmd |= MI_READ_FLUSH;
92 
93 	ret = intel_ring_begin(req, 2);
94 	if (ret)
95 		return ret;
96 
97 	intel_ring_emit(ring, cmd);
98 	intel_ring_emit(ring, MI_NOOP);
99 	intel_ring_advance(ring);
100 
101 	return 0;
102 }
103 
104 static int
105 gen4_render_ring_flush(struct drm_i915_gem_request *req,
106 		       u32	invalidate_domains,
107 		       u32	flush_domains)
108 {
109 	struct intel_engine_cs *ring = req->ring;
110 	struct drm_device *dev = ring->dev;
111 	u32 cmd;
112 	int ret;
113 
114 	/*
115 	 * read/write caches:
116 	 *
117 	 * I915_GEM_DOMAIN_RENDER is always invalidated, but is
118 	 * only flushed if MI_NO_WRITE_FLUSH is unset.  On 965, it is
119 	 * also flushed at 2d versus 3d pipeline switches.
120 	 *
121 	 * read-only caches:
122 	 *
123 	 * I915_GEM_DOMAIN_SAMPLER is flushed on pre-965 if
124 	 * MI_READ_FLUSH is set, and is always flushed on 965.
125 	 *
126 	 * I915_GEM_DOMAIN_COMMAND may not exist?
127 	 *
128 	 * I915_GEM_DOMAIN_INSTRUCTION, which exists on 965, is
129 	 * invalidated when MI_EXE_FLUSH is set.
130 	 *
131 	 * I915_GEM_DOMAIN_VERTEX, which exists on 965, is
132 	 * invalidated with every MI_FLUSH.
133 	 *
134 	 * TLBs:
135 	 *
136 	 * On 965, TLBs associated with I915_GEM_DOMAIN_COMMAND
137 	 * and I915_GEM_DOMAIN_CPU in are invalidated at PTE write and
138 	 * I915_GEM_DOMAIN_RENDER and I915_GEM_DOMAIN_SAMPLER
139 	 * are flushed at any MI_FLUSH.
140 	 */
141 
142 	cmd = MI_FLUSH | MI_NO_WRITE_FLUSH;
143 	if ((invalidate_domains|flush_domains) & I915_GEM_DOMAIN_RENDER)
144 		cmd &= ~MI_NO_WRITE_FLUSH;
145 	if (invalidate_domains & I915_GEM_DOMAIN_INSTRUCTION)
146 		cmd |= MI_EXE_FLUSH;
147 
148 	if (invalidate_domains & I915_GEM_DOMAIN_COMMAND &&
149 	    (IS_G4X(dev) || IS_GEN5(dev)))
150 		cmd |= MI_INVALIDATE_ISP;
151 
152 	ret = intel_ring_begin(req, 2);
153 	if (ret)
154 		return ret;
155 
156 	intel_ring_emit(ring, cmd);
157 	intel_ring_emit(ring, MI_NOOP);
158 	intel_ring_advance(ring);
159 
160 	return 0;
161 }
162 
163 /**
164  * Emits a PIPE_CONTROL with a non-zero post-sync operation, for
165  * implementing two workarounds on gen6.  From section 1.4.7.1
166  * "PIPE_CONTROL" of the Sandy Bridge PRM volume 2 part 1:
167  *
168  * [DevSNB-C+{W/A}] Before any depth stall flush (including those
169  * produced by non-pipelined state commands), software needs to first
170  * send a PIPE_CONTROL with no bits set except Post-Sync Operation !=
171  * 0.
172  *
173  * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush Enable
174  * =1, a PIPE_CONTROL with any non-zero post-sync-op is required.
175  *
176  * And the workaround for these two requires this workaround first:
177  *
178  * [Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent
179  * BEFORE the pipe-control with a post-sync op and no write-cache
180  * flushes.
181  *
182  * And this last workaround is tricky because of the requirements on
183  * that bit.  From section 1.4.7.2.3 "Stall" of the Sandy Bridge PRM
184  * volume 2 part 1:
185  *
186  *     "1 of the following must also be set:
187  *      - Render Target Cache Flush Enable ([12] of DW1)
188  *      - Depth Cache Flush Enable ([0] of DW1)
189  *      - Stall at Pixel Scoreboard ([1] of DW1)
190  *      - Depth Stall ([13] of DW1)
191  *      - Post-Sync Operation ([13] of DW1)
192  *      - Notify Enable ([8] of DW1)"
193  *
194  * The cache flushes require the workaround flush that triggered this
195  * one, so we can't use it.  Depth stall would trigger the same.
196  * Post-sync nonzero is what triggered this second workaround, so we
197  * can't use that one either.  Notify enable is IRQs, which aren't
198  * really our business.  That leaves only stall at scoreboard.
199  */
200 static int
201 intel_emit_post_sync_nonzero_flush(struct drm_i915_gem_request *req)
202 {
203 	struct intel_engine_cs *ring = req->ring;
204 	u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES;
205 	int ret;
206 
207 	ret = intel_ring_begin(req, 6);
208 	if (ret)
209 		return ret;
210 
211 	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(5));
212 	intel_ring_emit(ring, PIPE_CONTROL_CS_STALL |
213 			PIPE_CONTROL_STALL_AT_SCOREBOARD);
214 	intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); /* address */
215 	intel_ring_emit(ring, 0); /* low dword */
216 	intel_ring_emit(ring, 0); /* high dword */
217 	intel_ring_emit(ring, MI_NOOP);
218 	intel_ring_advance(ring);
219 
220 	ret = intel_ring_begin(req, 6);
221 	if (ret)
222 		return ret;
223 
224 	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(5));
225 	intel_ring_emit(ring, PIPE_CONTROL_QW_WRITE);
226 	intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); /* address */
227 	intel_ring_emit(ring, 0);
228 	intel_ring_emit(ring, 0);
229 	intel_ring_emit(ring, MI_NOOP);
230 	intel_ring_advance(ring);
231 
232 	return 0;
233 }
234 
235 static int
236 gen6_render_ring_flush(struct drm_i915_gem_request *req,
237 		       u32 invalidate_domains, u32 flush_domains)
238 {
239 	struct intel_engine_cs *ring = req->ring;
240 	u32 flags = 0;
241 	u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES;
242 	int ret;
243 
244 	/* Force SNB workarounds for PIPE_CONTROL flushes */
245 	ret = intel_emit_post_sync_nonzero_flush(req);
246 	if (ret)
247 		return ret;
248 
249 	/* Just flush everything.  Experiments have shown that reducing the
250 	 * number of bits based on the write domains has little performance
251 	 * impact.
252 	 */
253 	if (flush_domains) {
254 		flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
255 		flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
256 		/*
257 		 * Ensure that any following seqno writes only happen
258 		 * when the render cache is indeed flushed.
259 		 */
260 		flags |= PIPE_CONTROL_CS_STALL;
261 	}
262 	if (invalidate_domains) {
263 		flags |= PIPE_CONTROL_TLB_INVALIDATE;
264 		flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
265 		flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
266 		flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
267 		flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
268 		flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
269 		/*
270 		 * TLB invalidate requires a post-sync write.
271 		 */
272 		flags |= PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_CS_STALL;
273 	}
274 
275 	ret = intel_ring_begin(req, 4);
276 	if (ret)
277 		return ret;
278 
279 	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4));
280 	intel_ring_emit(ring, flags);
281 	intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT);
282 	intel_ring_emit(ring, 0);
283 	intel_ring_advance(ring);
284 
285 	return 0;
286 }
287 
288 static int
289 gen7_render_ring_cs_stall_wa(struct drm_i915_gem_request *req)
290 {
291 	struct intel_engine_cs *ring = req->ring;
292 	int ret;
293 
294 	ret = intel_ring_begin(req, 4);
295 	if (ret)
296 		return ret;
297 
298 	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4));
299 	intel_ring_emit(ring, PIPE_CONTROL_CS_STALL |
300 			      PIPE_CONTROL_STALL_AT_SCOREBOARD);
301 	intel_ring_emit(ring, 0);
302 	intel_ring_emit(ring, 0);
303 	intel_ring_advance(ring);
304 
305 	return 0;
306 }
307 
308 static int
309 gen7_render_ring_flush(struct drm_i915_gem_request *req,
310 		       u32 invalidate_domains, u32 flush_domains)
311 {
312 	struct intel_engine_cs *ring = req->ring;
313 	u32 flags = 0;
314 	u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES;
315 	int ret;
316 
317 	/*
318 	 * Ensure that any following seqno writes only happen when the render
319 	 * cache is indeed flushed.
320 	 *
321 	 * Workaround: 4th PIPE_CONTROL command (except the ones with only
322 	 * read-cache invalidate bits set) must have the CS_STALL bit set. We
323 	 * don't try to be clever and just set it unconditionally.
324 	 */
325 	flags |= PIPE_CONTROL_CS_STALL;
326 
327 	/* Just flush everything.  Experiments have shown that reducing the
328 	 * number of bits based on the write domains has little performance
329 	 * impact.
330 	 */
331 	if (flush_domains) {
332 		flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
333 		flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
334 		flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
335 		flags |= PIPE_CONTROL_FLUSH_ENABLE;
336 	}
337 	if (invalidate_domains) {
338 		flags |= PIPE_CONTROL_TLB_INVALIDATE;
339 		flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
340 		flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
341 		flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
342 		flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
343 		flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
344 		flags |= PIPE_CONTROL_MEDIA_STATE_CLEAR;
345 		/*
346 		 * TLB invalidate requires a post-sync write.
347 		 */
348 		flags |= PIPE_CONTROL_QW_WRITE;
349 		flags |= PIPE_CONTROL_GLOBAL_GTT_IVB;
350 
351 		flags |= PIPE_CONTROL_STALL_AT_SCOREBOARD;
352 
353 		/* Workaround: we must issue a pipe_control with CS-stall bit
354 		 * set before a pipe_control command that has the state cache
355 		 * invalidate bit set. */
356 		gen7_render_ring_cs_stall_wa(req);
357 	}
358 
359 	ret = intel_ring_begin(req, 4);
360 	if (ret)
361 		return ret;
362 
363 	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4));
364 	intel_ring_emit(ring, flags);
365 	intel_ring_emit(ring, scratch_addr);
366 	intel_ring_emit(ring, 0);
367 	intel_ring_advance(ring);
368 
369 	return 0;
370 }
371 
372 static int
373 gen8_emit_pipe_control(struct drm_i915_gem_request *req,
374 		       u32 flags, u32 scratch_addr)
375 {
376 	struct intel_engine_cs *ring = req->ring;
377 	int ret;
378 
379 	ret = intel_ring_begin(req, 6);
380 	if (ret)
381 		return ret;
382 
383 	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(6));
384 	intel_ring_emit(ring, flags);
385 	intel_ring_emit(ring, scratch_addr);
386 	intel_ring_emit(ring, 0);
387 	intel_ring_emit(ring, 0);
388 	intel_ring_emit(ring, 0);
389 	intel_ring_advance(ring);
390 
391 	return 0;
392 }
393 
394 static int
395 gen8_render_ring_flush(struct drm_i915_gem_request *req,
396 		       u32 invalidate_domains, u32 flush_domains)
397 {
398 	u32 flags = 0;
399 	u32 scratch_addr = req->ring->scratch.gtt_offset + 2 * CACHELINE_BYTES;
400 	int ret;
401 
402 	flags |= PIPE_CONTROL_CS_STALL;
403 
404 	if (flush_domains) {
405 		flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
406 		flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
407 		flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
408 		flags |= PIPE_CONTROL_FLUSH_ENABLE;
409 	}
410 	if (invalidate_domains) {
411 		flags |= PIPE_CONTROL_TLB_INVALIDATE;
412 		flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
413 		flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
414 		flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
415 		flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
416 		flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
417 		flags |= PIPE_CONTROL_QW_WRITE;
418 		flags |= PIPE_CONTROL_GLOBAL_GTT_IVB;
419 
420 		/* WaCsStallBeforeStateCacheInvalidate:bdw,chv */
421 		ret = gen8_emit_pipe_control(req,
422 					     PIPE_CONTROL_CS_STALL |
423 					     PIPE_CONTROL_STALL_AT_SCOREBOARD,
424 					     0);
425 		if (ret)
426 			return ret;
427 	}
428 
429 	return gen8_emit_pipe_control(req, flags, scratch_addr);
430 }
431 
432 static void ring_write_tail(struct intel_engine_cs *ring,
433 			    u32 value)
434 {
435 	struct drm_i915_private *dev_priv = ring->dev->dev_private;
436 	I915_WRITE_TAIL(ring, value);
437 }
438 
439 u64 intel_ring_get_active_head(struct intel_engine_cs *ring)
440 {
441 	struct drm_i915_private *dev_priv = ring->dev->dev_private;
442 	u64 acthd;
443 
444 	if (INTEL_INFO(ring->dev)->gen >= 8)
445 		acthd = I915_READ64_2x32(RING_ACTHD(ring->mmio_base),
446 					 RING_ACTHD_UDW(ring->mmio_base));
447 	else if (INTEL_INFO(ring->dev)->gen >= 4)
448 		acthd = I915_READ(RING_ACTHD(ring->mmio_base));
449 	else
450 		acthd = I915_READ(ACTHD);
451 
452 	return acthd;
453 }
454 
455 static void ring_setup_phys_status_page(struct intel_engine_cs *ring)
456 {
457 	struct drm_i915_private *dev_priv = ring->dev->dev_private;
458 	u32 addr;
459 
460 	addr = dev_priv->status_page_dmah->busaddr;
461 	if (INTEL_INFO(ring->dev)->gen >= 4)
462 		addr |= (dev_priv->status_page_dmah->busaddr >> 28) & 0xf0;
463 	I915_WRITE(HWS_PGA, addr);
464 }
465 
466 static void intel_ring_setup_status_page(struct intel_engine_cs *ring)
467 {
468 	struct drm_device *dev = ring->dev;
469 	struct drm_i915_private *dev_priv = ring->dev->dev_private;
470 	i915_reg_t mmio;
471 
472 	/* The ring status page addresses are no longer next to the rest of
473 	 * the ring registers as of gen7.
474 	 */
475 	if (IS_GEN7(dev)) {
476 		switch (ring->id) {
477 		case RCS:
478 			mmio = RENDER_HWS_PGA_GEN7;
479 			break;
480 		case BCS:
481 			mmio = BLT_HWS_PGA_GEN7;
482 			break;
483 		/*
484 		 * VCS2 actually doesn't exist on Gen7. Only shut up
485 		 * gcc switch check warning
486 		 */
487 		case VCS2:
488 		case VCS:
489 			mmio = BSD_HWS_PGA_GEN7;
490 			break;
491 		case VECS:
492 			mmio = VEBOX_HWS_PGA_GEN7;
493 			break;
494 		}
495 	} else if (IS_GEN6(ring->dev)) {
496 		mmio = RING_HWS_PGA_GEN6(ring->mmio_base);
497 	} else {
498 		/* XXX: gen8 returns to sanity */
499 		mmio = RING_HWS_PGA(ring->mmio_base);
500 	}
501 
502 	I915_WRITE(mmio, (u32)ring->status_page.gfx_addr);
503 	POSTING_READ(mmio);
504 
505 	/*
506 	 * Flush the TLB for this page
507 	 *
508 	 * FIXME: These two bits have disappeared on gen8, so a question
509 	 * arises: do we still need this and if so how should we go about
510 	 * invalidating the TLB?
511 	 */
512 	if (INTEL_INFO(dev)->gen >= 6 && INTEL_INFO(dev)->gen < 8) {
513 		i915_reg_t reg = RING_INSTPM(ring->mmio_base);
514 
515 		/* ring should be idle before issuing a sync flush*/
516 		WARN_ON((I915_READ_MODE(ring) & MODE_IDLE) == 0);
517 
518 		I915_WRITE(reg,
519 			   _MASKED_BIT_ENABLE(INSTPM_TLB_INVALIDATE |
520 					      INSTPM_SYNC_FLUSH));
521 		if (wait_for((I915_READ(reg) & INSTPM_SYNC_FLUSH) == 0,
522 			     1000))
523 			DRM_ERROR("%s: wait for SyncFlush to complete for TLB invalidation timed out\n",
524 				  ring->name);
525 	}
526 }
527 
528 static bool stop_ring(struct intel_engine_cs *ring)
529 {
530 	struct drm_i915_private *dev_priv = to_i915(ring->dev);
531 
532 	if (!IS_GEN2(ring->dev)) {
533 		I915_WRITE_MODE(ring, _MASKED_BIT_ENABLE(STOP_RING));
534 		if (wait_for((I915_READ_MODE(ring) & MODE_IDLE) != 0, 1000)) {
535 			DRM_ERROR("%s : timed out trying to stop ring\n", ring->name);
536 			/* Sometimes we observe that the idle flag is not
537 			 * set even though the ring is empty. So double
538 			 * check before giving up.
539 			 */
540 			if (I915_READ_HEAD(ring) != I915_READ_TAIL(ring))
541 				return false;
542 		}
543 	}
544 
545 	I915_WRITE_CTL(ring, 0);
546 	I915_WRITE_HEAD(ring, 0);
547 	ring->write_tail(ring, 0);
548 
549 	if (!IS_GEN2(ring->dev)) {
550 		(void)I915_READ_CTL(ring);
551 		I915_WRITE_MODE(ring, _MASKED_BIT_DISABLE(STOP_RING));
552 	}
553 
554 	return (I915_READ_HEAD(ring) & HEAD_ADDR) == 0;
555 }
556 
557 static int init_ring_common(struct intel_engine_cs *ring)
558 {
559 	struct drm_device *dev = ring->dev;
560 	struct drm_i915_private *dev_priv = dev->dev_private;
561 	struct intel_ringbuffer *ringbuf = ring->buffer;
562 	struct drm_i915_gem_object *obj = ringbuf->obj;
563 	int ret = 0;
564 
565 	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
566 
567 	if (!stop_ring(ring)) {
568 		/* G45 ring initialization often fails to reset head to zero */
569 		DRM_DEBUG_KMS("%s head not reset to zero "
570 			      "ctl %08x head %08x tail %08x start %08x\n",
571 			      ring->name,
572 			      I915_READ_CTL(ring),
573 			      I915_READ_HEAD(ring),
574 			      I915_READ_TAIL(ring),
575 			      I915_READ_START(ring));
576 
577 		if (!stop_ring(ring)) {
578 			DRM_ERROR("failed to set %s head to zero "
579 				  "ctl %08x head %08x tail %08x start %08x\n",
580 				  ring->name,
581 				  I915_READ_CTL(ring),
582 				  I915_READ_HEAD(ring),
583 				  I915_READ_TAIL(ring),
584 				  I915_READ_START(ring));
585 			ret = -EIO;
586 			goto out;
587 		}
588 	}
589 
590 	if (I915_NEED_GFX_HWS(dev))
591 		intel_ring_setup_status_page(ring);
592 	else
593 		ring_setup_phys_status_page(ring);
594 
595 	/* Enforce ordering by reading HEAD register back */
596 	I915_READ_HEAD(ring);
597 
598 	/* Initialize the ring. This must happen _after_ we've cleared the ring
599 	 * registers with the above sequence (the readback of the HEAD registers
600 	 * also enforces ordering), otherwise the hw might lose the new ring
601 	 * register values. */
602 	I915_WRITE_START(ring, i915_gem_obj_ggtt_offset(obj));
603 
604 	/* WaClearRingBufHeadRegAtInit:ctg,elk */
605 	if (I915_READ_HEAD(ring))
606 		DRM_DEBUG("%s initialization failed [head=%08x], fudging\n",
607 			  ring->name, I915_READ_HEAD(ring));
608 	I915_WRITE_HEAD(ring, 0);
609 	(void)I915_READ_HEAD(ring);
610 
611 	I915_WRITE_CTL(ring,
612 			((ringbuf->size - PAGE_SIZE) & RING_NR_PAGES)
613 			| RING_VALID);
614 
615 	/* If the head is still not zero, the ring is dead */
616 	if (wait_for((I915_READ_CTL(ring) & RING_VALID) != 0 &&
617 		     I915_READ_START(ring) == i915_gem_obj_ggtt_offset(obj) &&
618 		     (I915_READ_HEAD(ring) & HEAD_ADDR) == 0, 50)) {
619 		DRM_ERROR("%s initialization failed "
620 			  "ctl %08x (valid? %d) head %08x tail %08x start %08x [expected %08lx]\n",
621 			  ring->name,
622 			  I915_READ_CTL(ring), I915_READ_CTL(ring) & RING_VALID,
623 			  I915_READ_HEAD(ring), I915_READ_TAIL(ring),
624 			  I915_READ_START(ring), (unsigned long)i915_gem_obj_ggtt_offset(obj));
625 		ret = -EIO;
626 		goto out;
627 	}
628 
629 	ringbuf->last_retired_head = -1;
630 	ringbuf->head = I915_READ_HEAD(ring);
631 	ringbuf->tail = I915_READ_TAIL(ring) & TAIL_ADDR;
632 	intel_ring_update_space(ringbuf);
633 
634 	memset(&ring->hangcheck, 0, sizeof(ring->hangcheck));
635 
636 out:
637 	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
638 
639 	return ret;
640 }
641 
642 void
643 intel_fini_pipe_control(struct intel_engine_cs *ring)
644 {
645 	struct drm_device *dev = ring->dev;
646 
647 	if (ring->scratch.obj == NULL)
648 		return;
649 
650 	if (INTEL_INFO(dev)->gen >= 5) {
651 		kunmap(sg_page(ring->scratch.obj->pages->sgl));
652 		i915_gem_object_ggtt_unpin(ring->scratch.obj);
653 	}
654 
655 	drm_gem_object_unreference(&ring->scratch.obj->base);
656 	ring->scratch.obj = NULL;
657 }
658 
659 int
660 intel_init_pipe_control(struct intel_engine_cs *ring)
661 {
662 	int ret;
663 
664 	WARN_ON(ring->scratch.obj);
665 
666 	ring->scratch.obj = i915_gem_alloc_object(ring->dev, 4096);
667 	if (ring->scratch.obj == NULL) {
668 		DRM_ERROR("Failed to allocate seqno page\n");
669 		ret = -ENOMEM;
670 		goto err;
671 	}
672 
673 	ret = i915_gem_object_set_cache_level(ring->scratch.obj, I915_CACHE_LLC);
674 	if (ret)
675 		goto err_unref;
676 
677 	ret = i915_gem_obj_ggtt_pin(ring->scratch.obj, 4096, 0);
678 	if (ret)
679 		goto err_unref;
680 
681 	ring->scratch.gtt_offset = i915_gem_obj_ggtt_offset(ring->scratch.obj);
682 	ring->scratch.cpu_page = kmap(sg_page(ring->scratch.obj->pages->sgl));
683 	if (ring->scratch.cpu_page == NULL) {
684 		ret = -ENOMEM;
685 		goto err_unpin;
686 	}
687 
688 	DRM_DEBUG_DRIVER("%s pipe control offset: 0x%08x\n",
689 			 ring->name, ring->scratch.gtt_offset);
690 	return 0;
691 
692 err_unpin:
693 	i915_gem_object_ggtt_unpin(ring->scratch.obj);
694 err_unref:
695 	drm_gem_object_unreference(&ring->scratch.obj->base);
696 err:
697 	return ret;
698 }
699 
700 static int intel_ring_workarounds_emit(struct drm_i915_gem_request *req)
701 {
702 	int ret, i;
703 	struct intel_engine_cs *ring = req->ring;
704 	struct drm_device *dev = ring->dev;
705 	struct drm_i915_private *dev_priv = dev->dev_private;
706 	struct i915_workarounds *w = &dev_priv->workarounds;
707 
708 	if (w->count == 0)
709 		return 0;
710 
711 	ring->gpu_caches_dirty = true;
712 	ret = intel_ring_flush_all_caches(req);
713 	if (ret)
714 		return ret;
715 
716 	ret = intel_ring_begin(req, (w->count * 2 + 2));
717 	if (ret)
718 		return ret;
719 
720 	intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(w->count));
721 	for (i = 0; i < w->count; i++) {
722 		intel_ring_emit_reg(ring, w->reg[i].addr);
723 		intel_ring_emit(ring, w->reg[i].value);
724 	}
725 	intel_ring_emit(ring, MI_NOOP);
726 
727 	intel_ring_advance(ring);
728 
729 	ring->gpu_caches_dirty = true;
730 	ret = intel_ring_flush_all_caches(req);
731 	if (ret)
732 		return ret;
733 
734 	DRM_DEBUG_DRIVER("Number of Workarounds emitted: %d\n", w->count);
735 
736 	return 0;
737 }
738 
739 static int intel_rcs_ctx_init(struct drm_i915_gem_request *req)
740 {
741 	int ret;
742 
743 	ret = intel_ring_workarounds_emit(req);
744 	if (ret != 0)
745 		return ret;
746 
747 	ret = i915_gem_render_state_init(req);
748 	if (ret)
749 		return ret;
750 
751 	return 0;
752 }
753 
754 static int wa_add(struct drm_i915_private *dev_priv,
755 		  i915_reg_t addr,
756 		  const u32 mask, const u32 val)
757 {
758 	const u32 idx = dev_priv->workarounds.count;
759 
760 	if (WARN_ON(idx >= I915_MAX_WA_REGS))
761 		return -ENOSPC;
762 
763 	dev_priv->workarounds.reg[idx].addr = addr;
764 	dev_priv->workarounds.reg[idx].value = val;
765 	dev_priv->workarounds.reg[idx].mask = mask;
766 
767 	dev_priv->workarounds.count++;
768 
769 	return 0;
770 }
771 
772 #define WA_REG(addr, mask, val) do { \
773 		const int r = wa_add(dev_priv, (addr), (mask), (val)); \
774 		if (r) \
775 			return r; \
776 	} while (0)
777 
778 #define WA_SET_BIT_MASKED(addr, mask) \
779 	WA_REG(addr, (mask), _MASKED_BIT_ENABLE(mask))
780 
781 #define WA_CLR_BIT_MASKED(addr, mask) \
782 	WA_REG(addr, (mask), _MASKED_BIT_DISABLE(mask))
783 
784 #define WA_SET_FIELD_MASKED(addr, mask, value) \
785 	WA_REG(addr, mask, _MASKED_FIELD(mask, value))
786 
787 #define WA_SET_BIT(addr, mask) WA_REG(addr, mask, I915_READ(addr) | (mask))
788 #define WA_CLR_BIT(addr, mask) WA_REG(addr, mask, I915_READ(addr) & ~(mask))
789 
790 #define WA_WRITE(addr, val) WA_REG(addr, 0xffffffff, val)
791 
792 static int wa_ring_whitelist_reg(struct intel_engine_cs *ring, i915_reg_t reg)
793 {
794 	struct drm_i915_private *dev_priv = ring->dev->dev_private;
795 	struct i915_workarounds *wa = &dev_priv->workarounds;
796 	const uint32_t index = wa->hw_whitelist_count[ring->id];
797 
798 	if (WARN_ON(index >= RING_MAX_NONPRIV_SLOTS))
799 		return -EINVAL;
800 
801 	WA_WRITE(RING_FORCE_TO_NONPRIV(ring->mmio_base, index),
802 		 i915_mmio_reg_offset(reg));
803 	wa->hw_whitelist_count[ring->id]++;
804 
805 	return 0;
806 }
807 
808 static int gen8_init_workarounds(struct intel_engine_cs *ring)
809 {
810 	struct drm_device *dev = ring->dev;
811 	struct drm_i915_private *dev_priv = dev->dev_private;
812 
813 	WA_SET_BIT_MASKED(INSTPM, INSTPM_FORCE_ORDERING);
814 
815 	/* WaDisableAsyncFlipPerfMode:bdw,chv */
816 	WA_SET_BIT_MASKED(MI_MODE, ASYNC_FLIP_PERF_DISABLE);
817 
818 	/* WaDisablePartialInstShootdown:bdw,chv */
819 	WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
820 			  PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
821 
822 	/* Use Force Non-Coherent whenever executing a 3D context. This is a
823 	 * workaround for for a possible hang in the unlikely event a TLB
824 	 * invalidation occurs during a PSD flush.
825 	 */
826 	/* WaForceEnableNonCoherent:bdw,chv */
827 	/* WaHdcDisableFetchWhenMasked:bdw,chv */
828 	WA_SET_BIT_MASKED(HDC_CHICKEN0,
829 			  HDC_DONOT_FETCH_MEM_WHEN_MASKED |
830 			  HDC_FORCE_NON_COHERENT);
831 
832 	/* From the Haswell PRM, Command Reference: Registers, CACHE_MODE_0:
833 	 * "The Hierarchical Z RAW Stall Optimization allows non-overlapping
834 	 *  polygons in the same 8x4 pixel/sample area to be processed without
835 	 *  stalling waiting for the earlier ones to write to Hierarchical Z
836 	 *  buffer."
837 	 *
838 	 * This optimization is off by default for BDW and CHV; turn it on.
839 	 */
840 	WA_CLR_BIT_MASKED(CACHE_MODE_0_GEN7, HIZ_RAW_STALL_OPT_DISABLE);
841 
842 	/* Wa4x4STCOptimizationDisable:bdw,chv */
843 	WA_SET_BIT_MASKED(CACHE_MODE_1, GEN8_4x4_STC_OPTIMIZATION_DISABLE);
844 
845 	/*
846 	 * BSpec recommends 8x4 when MSAA is used,
847 	 * however in practice 16x4 seems fastest.
848 	 *
849 	 * Note that PS/WM thread counts depend on the WIZ hashing
850 	 * disable bit, which we don't touch here, but it's good
851 	 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
852 	 */
853 	WA_SET_FIELD_MASKED(GEN7_GT_MODE,
854 			    GEN6_WIZ_HASHING_MASK,
855 			    GEN6_WIZ_HASHING_16x4);
856 
857 	return 0;
858 }
859 
860 static int bdw_init_workarounds(struct intel_engine_cs *ring)
861 {
862 	int ret;
863 	struct drm_device *dev = ring->dev;
864 	struct drm_i915_private *dev_priv = dev->dev_private;
865 
866 	ret = gen8_init_workarounds(ring);
867 	if (ret)
868 		return ret;
869 
870 	/* WaDisableThreadStallDopClockGating:bdw (pre-production) */
871 	WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
872 
873 	/* WaDisableDopClockGating:bdw */
874 	WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2,
875 			  DOP_CLOCK_GATING_DISABLE);
876 
877 	WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
878 			  GEN8_SAMPLER_POWER_BYPASS_DIS);
879 
880 	WA_SET_BIT_MASKED(HDC_CHICKEN0,
881 			  /* WaForceContextSaveRestoreNonCoherent:bdw */
882 			  HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT |
883 			  /* WaDisableFenceDestinationToSLM:bdw (pre-prod) */
884 			  (IS_BDW_GT3(dev) ? HDC_FENCE_DEST_SLM_DISABLE : 0));
885 
886 	return 0;
887 }
888 
889 static int chv_init_workarounds(struct intel_engine_cs *ring)
890 {
891 	int ret;
892 	struct drm_device *dev = ring->dev;
893 	struct drm_i915_private *dev_priv = dev->dev_private;
894 
895 	ret = gen8_init_workarounds(ring);
896 	if (ret)
897 		return ret;
898 
899 	/* WaDisableThreadStallDopClockGating:chv */
900 	WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
901 
902 	/* Improve HiZ throughput on CHV. */
903 	WA_SET_BIT_MASKED(HIZ_CHICKEN, CHV_HZ_8X8_MODE_IN_1X);
904 
905 	return 0;
906 }
907 
908 static int gen9_init_workarounds(struct intel_engine_cs *ring)
909 {
910 	struct drm_device *dev = ring->dev;
911 	struct drm_i915_private *dev_priv = dev->dev_private;
912 	uint32_t tmp;
913 	int ret;
914 
915 	/* WaEnableLbsSlaRetryTimerDecrement:skl */
916 	I915_WRITE(BDW_SCRATCH1, I915_READ(BDW_SCRATCH1) |
917 		   GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE);
918 
919 	/* WaDisableKillLogic:bxt,skl */
920 	I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) |
921 		   ECOCHK_DIS_TLB);
922 
923 	/* WaDisablePartialInstShootdown:skl,bxt */
924 	WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
925 			  PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
926 
927 	/* Syncing dependencies between camera and graphics:skl,bxt */
928 	WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
929 			  GEN9_DISABLE_OCL_OOB_SUPPRESS_LOGIC);
930 
931 	/* WaDisableDgMirrorFixInHalfSliceChicken5:skl,bxt */
932 	if (IS_SKL_REVID(dev, 0, SKL_REVID_B0) ||
933 	    IS_BXT_REVID(dev, 0, BXT_REVID_A1))
934 		WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5,
935 				  GEN9_DG_MIRROR_FIX_ENABLE);
936 
937 	/* WaSetDisablePixMaskCammingAndRhwoInCommonSliceChicken:skl,bxt */
938 	if (IS_SKL_REVID(dev, 0, SKL_REVID_B0) ||
939 	    IS_BXT_REVID(dev, 0, BXT_REVID_A1)) {
940 		WA_SET_BIT_MASKED(GEN7_COMMON_SLICE_CHICKEN1,
941 				  GEN9_RHWO_OPTIMIZATION_DISABLE);
942 		/*
943 		 * WA also requires GEN9_SLICE_COMMON_ECO_CHICKEN0[14:14] to be set
944 		 * but we do that in per ctx batchbuffer as there is an issue
945 		 * with this register not getting restored on ctx restore
946 		 */
947 	}
948 
949 	/* WaEnableYV12BugFixInHalfSliceChicken7:skl,bxt */
950 	if (IS_SKL_REVID(dev, SKL_REVID_C0, REVID_FOREVER) || IS_BROXTON(dev))
951 		WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7,
952 				  GEN9_ENABLE_YV12_BUGFIX);
953 
954 	/* Wa4x4STCOptimizationDisable:skl,bxt */
955 	/* WaDisablePartialResolveInVc:skl,bxt */
956 	WA_SET_BIT_MASKED(CACHE_MODE_1, (GEN8_4x4_STC_OPTIMIZATION_DISABLE |
957 					 GEN9_PARTIAL_RESOLVE_IN_VC_DISABLE));
958 
959 	/* WaCcsTlbPrefetchDisable:skl,bxt */
960 	WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5,
961 			  GEN9_CCS_TLB_PREFETCH_ENABLE);
962 
963 	/* WaDisableMaskBasedCammingInRCC:skl,bxt */
964 	if (IS_SKL_REVID(dev, SKL_REVID_C0, SKL_REVID_C0) ||
965 	    IS_BXT_REVID(dev, 0, BXT_REVID_A1))
966 		WA_SET_BIT_MASKED(SLICE_ECO_CHICKEN0,
967 				  PIXEL_MASK_CAMMING_DISABLE);
968 
969 	/* WaForceContextSaveRestoreNonCoherent:skl,bxt */
970 	tmp = HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT;
971 	if (IS_SKL_REVID(dev, SKL_REVID_F0, REVID_FOREVER) ||
972 	    IS_BXT_REVID(dev, BXT_REVID_B0, REVID_FOREVER))
973 		tmp |= HDC_FORCE_CSR_NON_COHERENT_OVR_DISABLE;
974 	WA_SET_BIT_MASKED(HDC_CHICKEN0, tmp);
975 
976 	/* WaDisableSamplerPowerBypassForSOPingPong:skl,bxt */
977 	if (IS_SKYLAKE(dev) || IS_BXT_REVID(dev, 0, BXT_REVID_B0))
978 		WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
979 				  GEN8_SAMPLER_POWER_BYPASS_DIS);
980 
981 	/* WaDisableSTUnitPowerOptimization:skl,bxt */
982 	WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN2, GEN8_ST_PO_DISABLE);
983 
984 	/* WaOCLCoherentLineFlush:skl,bxt */
985 	I915_WRITE(GEN8_L3SQCREG4, (I915_READ(GEN8_L3SQCREG4) |
986 				    GEN8_LQSC_FLUSH_COHERENT_LINES));
987 
988 	/* WaEnablePreemptionGranularityControlByUMD:skl,bxt */
989 	ret= wa_ring_whitelist_reg(ring, GEN8_CS_CHICKEN1);
990 	if (ret)
991 		return ret;
992 
993 	/* WaAllowUMDToModifyHDCChicken1:skl,bxt */
994 	ret = wa_ring_whitelist_reg(ring, GEN8_HDC_CHICKEN1);
995 	if (ret)
996 		return ret;
997 
998 	return 0;
999 }
1000 
1001 static int skl_tune_iz_hashing(struct intel_engine_cs *ring)
1002 {
1003 	struct drm_device *dev = ring->dev;
1004 	struct drm_i915_private *dev_priv = dev->dev_private;
1005 	u8 vals[3] = { 0, 0, 0 };
1006 	unsigned int i;
1007 
1008 	for (i = 0; i < 3; i++) {
1009 		u8 ss;
1010 
1011 		/*
1012 		 * Only consider slices where one, and only one, subslice has 7
1013 		 * EUs
1014 		 */
1015 		if (!is_power_of_2(dev_priv->info.subslice_7eu[i]))
1016 			continue;
1017 
1018 		/*
1019 		 * subslice_7eu[i] != 0 (because of the check above) and
1020 		 * ss_max == 4 (maximum number of subslices possible per slice)
1021 		 *
1022 		 * ->    0 <= ss <= 3;
1023 		 */
1024 		ss = ffs(dev_priv->info.subslice_7eu[i]) - 1;
1025 		vals[i] = 3 - ss;
1026 	}
1027 
1028 	if (vals[0] == 0 && vals[1] == 0 && vals[2] == 0)
1029 		return 0;
1030 
1031 	/* Tune IZ hashing. See intel_device_info_runtime_init() */
1032 	WA_SET_FIELD_MASKED(GEN7_GT_MODE,
1033 			    GEN9_IZ_HASHING_MASK(2) |
1034 			    GEN9_IZ_HASHING_MASK(1) |
1035 			    GEN9_IZ_HASHING_MASK(0),
1036 			    GEN9_IZ_HASHING(2, vals[2]) |
1037 			    GEN9_IZ_HASHING(1, vals[1]) |
1038 			    GEN9_IZ_HASHING(0, vals[0]));
1039 
1040 	return 0;
1041 }
1042 
1043 static int skl_init_workarounds(struct intel_engine_cs *ring)
1044 {
1045 	int ret;
1046 	struct drm_device *dev = ring->dev;
1047 	struct drm_i915_private *dev_priv = dev->dev_private;
1048 
1049 	ret = gen9_init_workarounds(ring);
1050 	if (ret)
1051 		return ret;
1052 
1053 	/*
1054 	 * Actual WA is to disable percontext preemption granularity control
1055 	 * until D0 which is the default case so this is equivalent to
1056 	 * !WaDisablePerCtxtPreemptionGranularityControl:skl
1057 	 */
1058 	if (IS_SKL_REVID(dev, SKL_REVID_E0, REVID_FOREVER)) {
1059 		I915_WRITE(GEN7_FF_SLICE_CS_CHICKEN1,
1060 			   _MASKED_BIT_ENABLE(GEN9_FFSC_PERCTX_PREEMPT_CTRL));
1061 	}
1062 
1063 	if (IS_SKL_REVID(dev, 0, SKL_REVID_D0)) {
1064 		/* WaDisableChickenBitTSGBarrierAckForFFSliceCS:skl */
1065 		I915_WRITE(FF_SLICE_CS_CHICKEN2,
1066 			   _MASKED_BIT_ENABLE(GEN9_TSG_BARRIER_ACK_DISABLE));
1067 	}
1068 
1069 	/* GEN8_L3SQCREG4 has a dependency with WA batch so any new changes
1070 	 * involving this register should also be added to WA batch as required.
1071 	 */
1072 	if (IS_SKL_REVID(dev, 0, SKL_REVID_E0))
1073 		/* WaDisableLSQCROPERFforOCL:skl */
1074 		I915_WRITE(GEN8_L3SQCREG4, I915_READ(GEN8_L3SQCREG4) |
1075 			   GEN8_LQSC_RO_PERF_DIS);
1076 
1077 	/* WaEnableGapsTsvCreditFix:skl */
1078 	if (IS_SKL_REVID(dev, SKL_REVID_C0, REVID_FOREVER)) {
1079 		I915_WRITE(GEN8_GARBCNTL, (I915_READ(GEN8_GARBCNTL) |
1080 					   GEN9_GAPS_TSV_CREDIT_DISABLE));
1081 	}
1082 
1083 	/* WaDisablePowerCompilerClockGating:skl */
1084 	if (IS_SKL_REVID(dev, SKL_REVID_B0, SKL_REVID_B0))
1085 		WA_SET_BIT_MASKED(HIZ_CHICKEN,
1086 				  BDW_HIZ_POWER_COMPILER_CLOCK_GATING_DISABLE);
1087 
1088 	/* This is tied to WaForceContextSaveRestoreNonCoherent */
1089 	if (IS_SKL_REVID(dev, 0, REVID_FOREVER)) {
1090 		/*
1091 		 *Use Force Non-Coherent whenever executing a 3D context. This
1092 		 * is a workaround for a possible hang in the unlikely event
1093 		 * a TLB invalidation occurs during a PSD flush.
1094 		 */
1095 		/* WaForceEnableNonCoherent:skl */
1096 		WA_SET_BIT_MASKED(HDC_CHICKEN0,
1097 				  HDC_FORCE_NON_COHERENT);
1098 
1099 		/* WaDisableHDCInvalidation:skl */
1100 		I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) |
1101 			   BDW_DISABLE_HDC_INVALIDATION);
1102 	}
1103 
1104 	/* WaBarrierPerformanceFixDisable:skl */
1105 	if (IS_SKL_REVID(dev, SKL_REVID_C0, SKL_REVID_D0))
1106 		WA_SET_BIT_MASKED(HDC_CHICKEN0,
1107 				  HDC_FENCE_DEST_SLM_DISABLE |
1108 				  HDC_BARRIER_PERFORMANCE_DISABLE);
1109 
1110 	/* WaDisableSbeCacheDispatchPortSharing:skl */
1111 	if (IS_SKL_REVID(dev, 0, SKL_REVID_F0))
1112 		WA_SET_BIT_MASKED(
1113 			GEN7_HALF_SLICE_CHICKEN1,
1114 			GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
1115 
1116 	/* WaDisableLSQCROPERFforOCL:skl */
1117 	ret = wa_ring_whitelist_reg(ring, GEN8_L3SQCREG4);
1118 	if (ret)
1119 		return ret;
1120 
1121 	return skl_tune_iz_hashing(ring);
1122 }
1123 
1124 static int bxt_init_workarounds(struct intel_engine_cs *ring)
1125 {
1126 	int ret;
1127 	struct drm_device *dev = ring->dev;
1128 	struct drm_i915_private *dev_priv = dev->dev_private;
1129 
1130 	ret = gen9_init_workarounds(ring);
1131 	if (ret)
1132 		return ret;
1133 
1134 	/* WaStoreMultiplePTEenable:bxt */
1135 	/* This is a requirement according to Hardware specification */
1136 	if (IS_BXT_REVID(dev, 0, BXT_REVID_A1))
1137 		I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_TLBPF);
1138 
1139 	/* WaSetClckGatingDisableMedia:bxt */
1140 	if (IS_BXT_REVID(dev, 0, BXT_REVID_A1)) {
1141 		I915_WRITE(GEN7_MISCCPCTL, (I915_READ(GEN7_MISCCPCTL) &
1142 					    ~GEN8_DOP_CLOCK_GATE_MEDIA_ENABLE));
1143 	}
1144 
1145 	/* WaDisableThreadStallDopClockGating:bxt */
1146 	WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
1147 			  STALL_DOP_GATING_DISABLE);
1148 
1149 	/* WaDisableSbeCacheDispatchPortSharing:bxt */
1150 	if (IS_BXT_REVID(dev, 0, BXT_REVID_B0)) {
1151 		WA_SET_BIT_MASKED(
1152 			GEN7_HALF_SLICE_CHICKEN1,
1153 			GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
1154 	}
1155 
1156 	/* WaDisableObjectLevelPreemptionForTrifanOrPolygon:bxt */
1157 	/* WaDisableObjectLevelPreemptionForInstancedDraw:bxt */
1158 	/* WaDisableObjectLevelPreemtionForInstanceId:bxt */
1159 	/* WaDisableLSQCROPERFforOCL:bxt */
1160 	if (IS_BXT_REVID(dev, 0, BXT_REVID_A1)) {
1161 		ret = wa_ring_whitelist_reg(ring, GEN9_CS_DEBUG_MODE1);
1162 		if (ret)
1163 			return ret;
1164 
1165 		ret = wa_ring_whitelist_reg(ring, GEN8_L3SQCREG4);
1166 		if (ret)
1167 			return ret;
1168 	}
1169 
1170 	return 0;
1171 }
1172 
1173 int init_workarounds_ring(struct intel_engine_cs *ring)
1174 {
1175 	struct drm_device *dev = ring->dev;
1176 	struct drm_i915_private *dev_priv = dev->dev_private;
1177 
1178 	WARN_ON(ring->id != RCS);
1179 
1180 	dev_priv->workarounds.count = 0;
1181 	dev_priv->workarounds.hw_whitelist_count[RCS] = 0;
1182 
1183 	if (IS_BROADWELL(dev))
1184 		return bdw_init_workarounds(ring);
1185 
1186 	if (IS_CHERRYVIEW(dev))
1187 		return chv_init_workarounds(ring);
1188 
1189 	if (IS_SKYLAKE(dev))
1190 		return skl_init_workarounds(ring);
1191 
1192 	if (IS_BROXTON(dev))
1193 		return bxt_init_workarounds(ring);
1194 
1195 	return 0;
1196 }
1197 
1198 static int init_render_ring(struct intel_engine_cs *ring)
1199 {
1200 	struct drm_device *dev = ring->dev;
1201 	struct drm_i915_private *dev_priv = dev->dev_private;
1202 	int ret = init_ring_common(ring);
1203 	if (ret)
1204 		return ret;
1205 
1206 	/* WaTimedSingleVertexDispatch:cl,bw,ctg,elk,ilk,snb */
1207 	if (INTEL_INFO(dev)->gen >= 4 && INTEL_INFO(dev)->gen < 7)
1208 		I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(VS_TIMER_DISPATCH));
1209 
1210 	/* We need to disable the AsyncFlip performance optimisations in order
1211 	 * to use MI_WAIT_FOR_EVENT within the CS. It should already be
1212 	 * programmed to '1' on all products.
1213 	 *
1214 	 * WaDisableAsyncFlipPerfMode:snb,ivb,hsw,vlv
1215 	 */
1216 	if (INTEL_INFO(dev)->gen >= 6 && INTEL_INFO(dev)->gen < 8)
1217 		I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(ASYNC_FLIP_PERF_DISABLE));
1218 
1219 	/* Required for the hardware to program scanline values for waiting */
1220 	/* WaEnableFlushTlbInvalidationMode:snb */
1221 	if (INTEL_INFO(dev)->gen == 6)
1222 		I915_WRITE(GFX_MODE,
1223 			   _MASKED_BIT_ENABLE(GFX_TLB_INVALIDATE_EXPLICIT));
1224 
1225 	/* WaBCSVCSTlbInvalidationMode:ivb,vlv,hsw */
1226 	if (IS_GEN7(dev))
1227 		I915_WRITE(GFX_MODE_GEN7,
1228 			   _MASKED_BIT_ENABLE(GFX_TLB_INVALIDATE_EXPLICIT) |
1229 			   _MASKED_BIT_ENABLE(GFX_REPLAY_MODE));
1230 
1231 	if (IS_GEN6(dev)) {
1232 		/* From the Sandybridge PRM, volume 1 part 3, page 24:
1233 		 * "If this bit is set, STCunit will have LRA as replacement
1234 		 *  policy. [...] This bit must be reset.  LRA replacement
1235 		 *  policy is not supported."
1236 		 */
1237 		I915_WRITE(CACHE_MODE_0,
1238 			   _MASKED_BIT_DISABLE(CM0_STC_EVICT_DISABLE_LRA_SNB));
1239 	}
1240 
1241 	if (INTEL_INFO(dev)->gen >= 6 && INTEL_INFO(dev)->gen < 8)
1242 		I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING));
1243 
1244 	if (HAS_L3_DPF(dev))
1245 		I915_WRITE_IMR(ring, ~GT_PARITY_ERROR(dev));
1246 
1247 	return init_workarounds_ring(ring);
1248 }
1249 
1250 static void render_ring_cleanup(struct intel_engine_cs *ring)
1251 {
1252 	struct drm_device *dev = ring->dev;
1253 	struct drm_i915_private *dev_priv = dev->dev_private;
1254 
1255 	if (dev_priv->semaphore_obj) {
1256 		i915_gem_object_ggtt_unpin(dev_priv->semaphore_obj);
1257 		drm_gem_object_unreference(&dev_priv->semaphore_obj->base);
1258 		dev_priv->semaphore_obj = NULL;
1259 	}
1260 
1261 	intel_fini_pipe_control(ring);
1262 }
1263 
1264 static int gen8_rcs_signal(struct drm_i915_gem_request *signaller_req,
1265 			   unsigned int num_dwords)
1266 {
1267 #define MBOX_UPDATE_DWORDS 8
1268 	struct intel_engine_cs *signaller = signaller_req->ring;
1269 	struct drm_device *dev = signaller->dev;
1270 	struct drm_i915_private *dev_priv = dev->dev_private;
1271 	struct intel_engine_cs *waiter;
1272 	int i, ret, num_rings;
1273 
1274 	num_rings = hweight32(INTEL_INFO(dev)->ring_mask);
1275 	num_dwords += (num_rings-1) * MBOX_UPDATE_DWORDS;
1276 #undef MBOX_UPDATE_DWORDS
1277 
1278 	ret = intel_ring_begin(signaller_req, num_dwords);
1279 	if (ret)
1280 		return ret;
1281 
1282 	for_each_ring(waiter, dev_priv, i) {
1283 		u32 seqno;
1284 		u64 gtt_offset = signaller->semaphore.signal_ggtt[i];
1285 		if (gtt_offset == MI_SEMAPHORE_SYNC_INVALID)
1286 			continue;
1287 
1288 		seqno = i915_gem_request_get_seqno(signaller_req);
1289 		intel_ring_emit(signaller, GFX_OP_PIPE_CONTROL(6));
1290 		intel_ring_emit(signaller, PIPE_CONTROL_GLOBAL_GTT_IVB |
1291 					   PIPE_CONTROL_QW_WRITE |
1292 					   PIPE_CONTROL_FLUSH_ENABLE);
1293 		intel_ring_emit(signaller, lower_32_bits(gtt_offset));
1294 		intel_ring_emit(signaller, upper_32_bits(gtt_offset));
1295 		intel_ring_emit(signaller, seqno);
1296 		intel_ring_emit(signaller, 0);
1297 		intel_ring_emit(signaller, MI_SEMAPHORE_SIGNAL |
1298 					   MI_SEMAPHORE_TARGET(waiter->id));
1299 		intel_ring_emit(signaller, 0);
1300 	}
1301 
1302 	return 0;
1303 }
1304 
1305 static int gen8_xcs_signal(struct drm_i915_gem_request *signaller_req,
1306 			   unsigned int num_dwords)
1307 {
1308 #define MBOX_UPDATE_DWORDS 6
1309 	struct intel_engine_cs *signaller = signaller_req->ring;
1310 	struct drm_device *dev = signaller->dev;
1311 	struct drm_i915_private *dev_priv = dev->dev_private;
1312 	struct intel_engine_cs *waiter;
1313 	int i, ret, num_rings;
1314 
1315 	num_rings = hweight32(INTEL_INFO(dev)->ring_mask);
1316 	num_dwords += (num_rings-1) * MBOX_UPDATE_DWORDS;
1317 #undef MBOX_UPDATE_DWORDS
1318 
1319 	ret = intel_ring_begin(signaller_req, num_dwords);
1320 	if (ret)
1321 		return ret;
1322 
1323 	for_each_ring(waiter, dev_priv, i) {
1324 		u32 seqno;
1325 		u64 gtt_offset = signaller->semaphore.signal_ggtt[i];
1326 		if (gtt_offset == MI_SEMAPHORE_SYNC_INVALID)
1327 			continue;
1328 
1329 		seqno = i915_gem_request_get_seqno(signaller_req);
1330 		intel_ring_emit(signaller, (MI_FLUSH_DW + 1) |
1331 					   MI_FLUSH_DW_OP_STOREDW);
1332 		intel_ring_emit(signaller, lower_32_bits(gtt_offset) |
1333 					   MI_FLUSH_DW_USE_GTT);
1334 		intel_ring_emit(signaller, upper_32_bits(gtt_offset));
1335 		intel_ring_emit(signaller, seqno);
1336 		intel_ring_emit(signaller, MI_SEMAPHORE_SIGNAL |
1337 					   MI_SEMAPHORE_TARGET(waiter->id));
1338 		intel_ring_emit(signaller, 0);
1339 	}
1340 
1341 	return 0;
1342 }
1343 
1344 static int gen6_signal(struct drm_i915_gem_request *signaller_req,
1345 		       unsigned int num_dwords)
1346 {
1347 	struct intel_engine_cs *signaller = signaller_req->ring;
1348 	struct drm_device *dev = signaller->dev;
1349 	struct drm_i915_private *dev_priv = dev->dev_private;
1350 	struct intel_engine_cs *useless;
1351 	int i, ret, num_rings;
1352 
1353 #define MBOX_UPDATE_DWORDS 3
1354 	num_rings = hweight32(INTEL_INFO(dev)->ring_mask);
1355 	num_dwords += round_up((num_rings-1) * MBOX_UPDATE_DWORDS, 2);
1356 #undef MBOX_UPDATE_DWORDS
1357 
1358 	ret = intel_ring_begin(signaller_req, num_dwords);
1359 	if (ret)
1360 		return ret;
1361 
1362 	for_each_ring(useless, dev_priv, i) {
1363 		i915_reg_t mbox_reg = signaller->semaphore.mbox.signal[i];
1364 
1365 		if (i915_mmio_reg_valid(mbox_reg)) {
1366 			u32 seqno = i915_gem_request_get_seqno(signaller_req);
1367 
1368 			intel_ring_emit(signaller, MI_LOAD_REGISTER_IMM(1));
1369 			intel_ring_emit_reg(signaller, mbox_reg);
1370 			intel_ring_emit(signaller, seqno);
1371 		}
1372 	}
1373 
1374 	/* If num_dwords was rounded, make sure the tail pointer is correct */
1375 	if (num_rings % 2 == 0)
1376 		intel_ring_emit(signaller, MI_NOOP);
1377 
1378 	return 0;
1379 }
1380 
1381 /**
1382  * gen6_add_request - Update the semaphore mailbox registers
1383  *
1384  * @request - request to write to the ring
1385  *
1386  * Update the mailbox registers in the *other* rings with the current seqno.
1387  * This acts like a signal in the canonical semaphore.
1388  */
1389 static int
1390 gen6_add_request(struct drm_i915_gem_request *req)
1391 {
1392 	struct intel_engine_cs *ring = req->ring;
1393 	int ret;
1394 
1395 	if (ring->semaphore.signal)
1396 		ret = ring->semaphore.signal(req, 4);
1397 	else
1398 		ret = intel_ring_begin(req, 4);
1399 
1400 	if (ret)
1401 		return ret;
1402 
1403 	intel_ring_emit(ring, MI_STORE_DWORD_INDEX);
1404 	intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
1405 	intel_ring_emit(ring, i915_gem_request_get_seqno(req));
1406 	intel_ring_emit(ring, MI_USER_INTERRUPT);
1407 	__intel_ring_advance(ring);
1408 
1409 	return 0;
1410 }
1411 
1412 static inline bool i915_gem_has_seqno_wrapped(struct drm_device *dev,
1413 					      u32 seqno)
1414 {
1415 	struct drm_i915_private *dev_priv = dev->dev_private;
1416 	return dev_priv->last_seqno < seqno;
1417 }
1418 
1419 /**
1420  * intel_ring_sync - sync the waiter to the signaller on seqno
1421  *
1422  * @waiter - ring that is waiting
1423  * @signaller - ring which has, or will signal
1424  * @seqno - seqno which the waiter will block on
1425  */
1426 
1427 static int
1428 gen8_ring_sync(struct drm_i915_gem_request *waiter_req,
1429 	       struct intel_engine_cs *signaller,
1430 	       u32 seqno)
1431 {
1432 	struct intel_engine_cs *waiter = waiter_req->ring;
1433 	struct drm_i915_private *dev_priv = waiter->dev->dev_private;
1434 	int ret;
1435 
1436 	ret = intel_ring_begin(waiter_req, 4);
1437 	if (ret)
1438 		return ret;
1439 
1440 	intel_ring_emit(waiter, MI_SEMAPHORE_WAIT |
1441 				MI_SEMAPHORE_GLOBAL_GTT |
1442 				MI_SEMAPHORE_POLL |
1443 				MI_SEMAPHORE_SAD_GTE_SDD);
1444 	intel_ring_emit(waiter, seqno);
1445 	intel_ring_emit(waiter,
1446 			lower_32_bits(GEN8_WAIT_OFFSET(waiter, signaller->id)));
1447 	intel_ring_emit(waiter,
1448 			upper_32_bits(GEN8_WAIT_OFFSET(waiter, signaller->id)));
1449 	intel_ring_advance(waiter);
1450 	return 0;
1451 }
1452 
1453 static int
1454 gen6_ring_sync(struct drm_i915_gem_request *waiter_req,
1455 	       struct intel_engine_cs *signaller,
1456 	       u32 seqno)
1457 {
1458 	struct intel_engine_cs *waiter = waiter_req->ring;
1459 	u32 dw1 = MI_SEMAPHORE_MBOX |
1460 		  MI_SEMAPHORE_COMPARE |
1461 		  MI_SEMAPHORE_REGISTER;
1462 	u32 wait_mbox = signaller->semaphore.mbox.wait[waiter->id];
1463 	int ret;
1464 
1465 	/* Throughout all of the GEM code, seqno passed implies our current
1466 	 * seqno is >= the last seqno executed. However for hardware the
1467 	 * comparison is strictly greater than.
1468 	 */
1469 	seqno -= 1;
1470 
1471 	WARN_ON(wait_mbox == MI_SEMAPHORE_SYNC_INVALID);
1472 
1473 	ret = intel_ring_begin(waiter_req, 4);
1474 	if (ret)
1475 		return ret;
1476 
1477 	/* If seqno wrap happened, omit the wait with no-ops */
1478 	if (likely(!i915_gem_has_seqno_wrapped(waiter->dev, seqno))) {
1479 		intel_ring_emit(waiter, dw1 | wait_mbox);
1480 		intel_ring_emit(waiter, seqno);
1481 		intel_ring_emit(waiter, 0);
1482 		intel_ring_emit(waiter, MI_NOOP);
1483 	} else {
1484 		intel_ring_emit(waiter, MI_NOOP);
1485 		intel_ring_emit(waiter, MI_NOOP);
1486 		intel_ring_emit(waiter, MI_NOOP);
1487 		intel_ring_emit(waiter, MI_NOOP);
1488 	}
1489 	intel_ring_advance(waiter);
1490 
1491 	return 0;
1492 }
1493 
1494 #define PIPE_CONTROL_FLUSH(ring__, addr__)					\
1495 do {									\
1496 	intel_ring_emit(ring__, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE |		\
1497 		 PIPE_CONTROL_DEPTH_STALL);				\
1498 	intel_ring_emit(ring__, (addr__) | PIPE_CONTROL_GLOBAL_GTT);			\
1499 	intel_ring_emit(ring__, 0);							\
1500 	intel_ring_emit(ring__, 0);							\
1501 } while (0)
1502 
1503 static int
1504 pc_render_add_request(struct drm_i915_gem_request *req)
1505 {
1506 	struct intel_engine_cs *ring = req->ring;
1507 	u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES;
1508 	int ret;
1509 
1510 	/* For Ironlake, MI_USER_INTERRUPT was deprecated and apparently
1511 	 * incoherent with writes to memory, i.e. completely fubar,
1512 	 * so we need to use PIPE_NOTIFY instead.
1513 	 *
1514 	 * However, we also need to workaround the qword write
1515 	 * incoherence by flushing the 6 PIPE_NOTIFY buffers out to
1516 	 * memory before requesting an interrupt.
1517 	 */
1518 	ret = intel_ring_begin(req, 32);
1519 	if (ret)
1520 		return ret;
1521 
1522 	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE |
1523 			PIPE_CONTROL_WRITE_FLUSH |
1524 			PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE);
1525 	intel_ring_emit(ring, ring->scratch.gtt_offset | PIPE_CONTROL_GLOBAL_GTT);
1526 	intel_ring_emit(ring, i915_gem_request_get_seqno(req));
1527 	intel_ring_emit(ring, 0);
1528 	PIPE_CONTROL_FLUSH(ring, scratch_addr);
1529 	scratch_addr += 2 * CACHELINE_BYTES; /* write to separate cachelines */
1530 	PIPE_CONTROL_FLUSH(ring, scratch_addr);
1531 	scratch_addr += 2 * CACHELINE_BYTES;
1532 	PIPE_CONTROL_FLUSH(ring, scratch_addr);
1533 	scratch_addr += 2 * CACHELINE_BYTES;
1534 	PIPE_CONTROL_FLUSH(ring, scratch_addr);
1535 	scratch_addr += 2 * CACHELINE_BYTES;
1536 	PIPE_CONTROL_FLUSH(ring, scratch_addr);
1537 	scratch_addr += 2 * CACHELINE_BYTES;
1538 	PIPE_CONTROL_FLUSH(ring, scratch_addr);
1539 
1540 	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE |
1541 			PIPE_CONTROL_WRITE_FLUSH |
1542 			PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
1543 			PIPE_CONTROL_NOTIFY);
1544 	intel_ring_emit(ring, ring->scratch.gtt_offset | PIPE_CONTROL_GLOBAL_GTT);
1545 	intel_ring_emit(ring, i915_gem_request_get_seqno(req));
1546 	intel_ring_emit(ring, 0);
1547 	__intel_ring_advance(ring);
1548 
1549 	return 0;
1550 }
1551 
1552 static u32
1553 gen6_ring_get_seqno(struct intel_engine_cs *ring, bool lazy_coherency)
1554 {
1555 	/* Workaround to force correct ordering between irq and seqno writes on
1556 	 * ivb (and maybe also on snb) by reading from a CS register (like
1557 	 * ACTHD) before reading the status page. */
1558 	if (!lazy_coherency) {
1559 		struct drm_i915_private *dev_priv = ring->dev->dev_private;
1560 		POSTING_READ(RING_ACTHD(ring->mmio_base));
1561 	}
1562 
1563 	return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
1564 }
1565 
1566 static u32
1567 ring_get_seqno(struct intel_engine_cs *ring, bool lazy_coherency)
1568 {
1569 	return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
1570 }
1571 
1572 static void
1573 ring_set_seqno(struct intel_engine_cs *ring, u32 seqno)
1574 {
1575 	intel_write_status_page(ring, I915_GEM_HWS_INDEX, seqno);
1576 }
1577 
1578 static u32
1579 pc_render_get_seqno(struct intel_engine_cs *ring, bool lazy_coherency)
1580 {
1581 	return ring->scratch.cpu_page[0];
1582 }
1583 
1584 static void
1585 pc_render_set_seqno(struct intel_engine_cs *ring, u32 seqno)
1586 {
1587 	ring->scratch.cpu_page[0] = seqno;
1588 }
1589 
1590 static bool
1591 gen5_ring_get_irq(struct intel_engine_cs *ring)
1592 {
1593 	struct drm_device *dev = ring->dev;
1594 	struct drm_i915_private *dev_priv = dev->dev_private;
1595 	unsigned long flags;
1596 
1597 	if (WARN_ON(!intel_irqs_enabled(dev_priv)))
1598 		return false;
1599 
1600 	spin_lock_irqsave(&dev_priv->irq_lock, flags);
1601 	if (ring->irq_refcount++ == 0)
1602 		gen5_enable_gt_irq(dev_priv, ring->irq_enable_mask);
1603 	spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
1604 
1605 	return true;
1606 }
1607 
1608 static void
1609 gen5_ring_put_irq(struct intel_engine_cs *ring)
1610 {
1611 	struct drm_device *dev = ring->dev;
1612 	struct drm_i915_private *dev_priv = dev->dev_private;
1613 	unsigned long flags;
1614 
1615 	spin_lock_irqsave(&dev_priv->irq_lock, flags);
1616 	if (--ring->irq_refcount == 0)
1617 		gen5_disable_gt_irq(dev_priv, ring->irq_enable_mask);
1618 	spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
1619 }
1620 
1621 static bool
1622 i9xx_ring_get_irq(struct intel_engine_cs *ring)
1623 {
1624 	struct drm_device *dev = ring->dev;
1625 	struct drm_i915_private *dev_priv = dev->dev_private;
1626 	unsigned long flags;
1627 
1628 	if (!intel_irqs_enabled(dev_priv))
1629 		return false;
1630 
1631 	spin_lock_irqsave(&dev_priv->irq_lock, flags);
1632 	if (ring->irq_refcount++ == 0) {
1633 		dev_priv->irq_mask &= ~ring->irq_enable_mask;
1634 		I915_WRITE(IMR, dev_priv->irq_mask);
1635 		POSTING_READ(IMR);
1636 	}
1637 	spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
1638 
1639 	return true;
1640 }
1641 
1642 static void
1643 i9xx_ring_put_irq(struct intel_engine_cs *ring)
1644 {
1645 	struct drm_device *dev = ring->dev;
1646 	struct drm_i915_private *dev_priv = dev->dev_private;
1647 	unsigned long flags;
1648 
1649 	spin_lock_irqsave(&dev_priv->irq_lock, flags);
1650 	if (--ring->irq_refcount == 0) {
1651 		dev_priv->irq_mask |= ring->irq_enable_mask;
1652 		I915_WRITE(IMR, dev_priv->irq_mask);
1653 		POSTING_READ(IMR);
1654 	}
1655 	spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
1656 }
1657 
1658 static bool
1659 i8xx_ring_get_irq(struct intel_engine_cs *ring)
1660 {
1661 	struct drm_device *dev = ring->dev;
1662 	struct drm_i915_private *dev_priv = dev->dev_private;
1663 	unsigned long flags;
1664 
1665 	if (!intel_irqs_enabled(dev_priv))
1666 		return false;
1667 
1668 	spin_lock_irqsave(&dev_priv->irq_lock, flags);
1669 	if (ring->irq_refcount++ == 0) {
1670 		dev_priv->irq_mask &= ~ring->irq_enable_mask;
1671 		I915_WRITE16(IMR, dev_priv->irq_mask);
1672 		POSTING_READ16(IMR);
1673 	}
1674 	spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
1675 
1676 	return true;
1677 }
1678 
1679 static void
1680 i8xx_ring_put_irq(struct intel_engine_cs *ring)
1681 {
1682 	struct drm_device *dev = ring->dev;
1683 	struct drm_i915_private *dev_priv = dev->dev_private;
1684 	unsigned long flags;
1685 
1686 	spin_lock_irqsave(&dev_priv->irq_lock, flags);
1687 	if (--ring->irq_refcount == 0) {
1688 		dev_priv->irq_mask |= ring->irq_enable_mask;
1689 		I915_WRITE16(IMR, dev_priv->irq_mask);
1690 		POSTING_READ16(IMR);
1691 	}
1692 	spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
1693 }
1694 
1695 static int
1696 bsd_ring_flush(struct drm_i915_gem_request *req,
1697 	       u32     invalidate_domains,
1698 	       u32     flush_domains)
1699 {
1700 	struct intel_engine_cs *ring = req->ring;
1701 	int ret;
1702 
1703 	ret = intel_ring_begin(req, 2);
1704 	if (ret)
1705 		return ret;
1706 
1707 	intel_ring_emit(ring, MI_FLUSH);
1708 	intel_ring_emit(ring, MI_NOOP);
1709 	intel_ring_advance(ring);
1710 	return 0;
1711 }
1712 
1713 static int
1714 i9xx_add_request(struct drm_i915_gem_request *req)
1715 {
1716 	struct intel_engine_cs *ring = req->ring;
1717 	int ret;
1718 
1719 	ret = intel_ring_begin(req, 4);
1720 	if (ret)
1721 		return ret;
1722 
1723 	intel_ring_emit(ring, MI_STORE_DWORD_INDEX);
1724 	intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
1725 	intel_ring_emit(ring, i915_gem_request_get_seqno(req));
1726 	intel_ring_emit(ring, MI_USER_INTERRUPT);
1727 	__intel_ring_advance(ring);
1728 
1729 	return 0;
1730 }
1731 
1732 static bool
1733 gen6_ring_get_irq(struct intel_engine_cs *ring)
1734 {
1735 	struct drm_device *dev = ring->dev;
1736 	struct drm_i915_private *dev_priv = dev->dev_private;
1737 	unsigned long flags;
1738 
1739 	if (WARN_ON(!intel_irqs_enabled(dev_priv)))
1740 		return false;
1741 
1742 	spin_lock_irqsave(&dev_priv->irq_lock, flags);
1743 	if (ring->irq_refcount++ == 0) {
1744 		if (HAS_L3_DPF(dev) && ring->id == RCS)
1745 			I915_WRITE_IMR(ring,
1746 				       ~(ring->irq_enable_mask |
1747 					 GT_PARITY_ERROR(dev)));
1748 		else
1749 			I915_WRITE_IMR(ring, ~ring->irq_enable_mask);
1750 		gen5_enable_gt_irq(dev_priv, ring->irq_enable_mask);
1751 	}
1752 	spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
1753 
1754 	return true;
1755 }
1756 
1757 static void
1758 gen6_ring_put_irq(struct intel_engine_cs *ring)
1759 {
1760 	struct drm_device *dev = ring->dev;
1761 	struct drm_i915_private *dev_priv = dev->dev_private;
1762 	unsigned long flags;
1763 
1764 	spin_lock_irqsave(&dev_priv->irq_lock, flags);
1765 	if (--ring->irq_refcount == 0) {
1766 		if (HAS_L3_DPF(dev) && ring->id == RCS)
1767 			I915_WRITE_IMR(ring, ~GT_PARITY_ERROR(dev));
1768 		else
1769 			I915_WRITE_IMR(ring, ~0);
1770 		gen5_disable_gt_irq(dev_priv, ring->irq_enable_mask);
1771 	}
1772 	spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
1773 }
1774 
1775 static bool
1776 hsw_vebox_get_irq(struct intel_engine_cs *ring)
1777 {
1778 	struct drm_device *dev = ring->dev;
1779 	struct drm_i915_private *dev_priv = dev->dev_private;
1780 	unsigned long flags;
1781 
1782 	if (WARN_ON(!intel_irqs_enabled(dev_priv)))
1783 		return false;
1784 
1785 	spin_lock_irqsave(&dev_priv->irq_lock, flags);
1786 	if (ring->irq_refcount++ == 0) {
1787 		I915_WRITE_IMR(ring, ~ring->irq_enable_mask);
1788 		gen6_enable_pm_irq(dev_priv, ring->irq_enable_mask);
1789 	}
1790 	spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
1791 
1792 	return true;
1793 }
1794 
1795 static void
1796 hsw_vebox_put_irq(struct intel_engine_cs *ring)
1797 {
1798 	struct drm_device *dev = ring->dev;
1799 	struct drm_i915_private *dev_priv = dev->dev_private;
1800 	unsigned long flags;
1801 
1802 	spin_lock_irqsave(&dev_priv->irq_lock, flags);
1803 	if (--ring->irq_refcount == 0) {
1804 		I915_WRITE_IMR(ring, ~0);
1805 		gen6_disable_pm_irq(dev_priv, ring->irq_enable_mask);
1806 	}
1807 	spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
1808 }
1809 
1810 static bool
1811 gen8_ring_get_irq(struct intel_engine_cs *ring)
1812 {
1813 	struct drm_device *dev = ring->dev;
1814 	struct drm_i915_private *dev_priv = dev->dev_private;
1815 	unsigned long flags;
1816 
1817 	if (WARN_ON(!intel_irqs_enabled(dev_priv)))
1818 		return false;
1819 
1820 	spin_lock_irqsave(&dev_priv->irq_lock, flags);
1821 	if (ring->irq_refcount++ == 0) {
1822 		if (HAS_L3_DPF(dev) && ring->id == RCS) {
1823 			I915_WRITE_IMR(ring,
1824 				       ~(ring->irq_enable_mask |
1825 					 GT_RENDER_L3_PARITY_ERROR_INTERRUPT));
1826 		} else {
1827 			I915_WRITE_IMR(ring, ~ring->irq_enable_mask);
1828 		}
1829 		POSTING_READ(RING_IMR(ring->mmio_base));
1830 	}
1831 	spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
1832 
1833 	return true;
1834 }
1835 
1836 static void
1837 gen8_ring_put_irq(struct intel_engine_cs *ring)
1838 {
1839 	struct drm_device *dev = ring->dev;
1840 	struct drm_i915_private *dev_priv = dev->dev_private;
1841 	unsigned long flags;
1842 
1843 	spin_lock_irqsave(&dev_priv->irq_lock, flags);
1844 	if (--ring->irq_refcount == 0) {
1845 		if (HAS_L3_DPF(dev) && ring->id == RCS) {
1846 			I915_WRITE_IMR(ring,
1847 				       ~GT_RENDER_L3_PARITY_ERROR_INTERRUPT);
1848 		} else {
1849 			I915_WRITE_IMR(ring, ~0);
1850 		}
1851 		POSTING_READ(RING_IMR(ring->mmio_base));
1852 	}
1853 	spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
1854 }
1855 
1856 static int
1857 i965_dispatch_execbuffer(struct drm_i915_gem_request *req,
1858 			 u64 offset, u32 length,
1859 			 unsigned dispatch_flags)
1860 {
1861 	struct intel_engine_cs *ring = req->ring;
1862 	int ret;
1863 
1864 	ret = intel_ring_begin(req, 2);
1865 	if (ret)
1866 		return ret;
1867 
1868 	intel_ring_emit(ring,
1869 			MI_BATCH_BUFFER_START |
1870 			MI_BATCH_GTT |
1871 			(dispatch_flags & I915_DISPATCH_SECURE ?
1872 			 0 : MI_BATCH_NON_SECURE_I965));
1873 	intel_ring_emit(ring, offset);
1874 	intel_ring_advance(ring);
1875 
1876 	return 0;
1877 }
1878 
1879 /* Just userspace ABI convention to limit the wa batch bo to a resonable size */
1880 #define I830_BATCH_LIMIT (256*1024)
1881 #define I830_TLB_ENTRIES (2)
1882 #define I830_WA_SIZE max(I830_TLB_ENTRIES*4096, I830_BATCH_LIMIT)
1883 static int
1884 i830_dispatch_execbuffer(struct drm_i915_gem_request *req,
1885 			 u64 offset, u32 len,
1886 			 unsigned dispatch_flags)
1887 {
1888 	struct intel_engine_cs *ring = req->ring;
1889 	u32 cs_offset = ring->scratch.gtt_offset;
1890 	int ret;
1891 
1892 	ret = intel_ring_begin(req, 6);
1893 	if (ret)
1894 		return ret;
1895 
1896 	/* Evict the invalid PTE TLBs */
1897 	intel_ring_emit(ring, COLOR_BLT_CMD | BLT_WRITE_RGBA);
1898 	intel_ring_emit(ring, BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | 4096);
1899 	intel_ring_emit(ring, I830_TLB_ENTRIES << 16 | 4); /* load each page */
1900 	intel_ring_emit(ring, cs_offset);
1901 	intel_ring_emit(ring, 0xdeadbeef);
1902 	intel_ring_emit(ring, MI_NOOP);
1903 	intel_ring_advance(ring);
1904 
1905 	if ((dispatch_flags & I915_DISPATCH_PINNED) == 0) {
1906 		if (len > I830_BATCH_LIMIT)
1907 			return -ENOSPC;
1908 
1909 		ret = intel_ring_begin(req, 6 + 2);
1910 		if (ret)
1911 			return ret;
1912 
1913 		/* Blit the batch (which has now all relocs applied) to the
1914 		 * stable batch scratch bo area (so that the CS never
1915 		 * stumbles over its tlb invalidation bug) ...
1916 		 */
1917 		intel_ring_emit(ring, SRC_COPY_BLT_CMD | BLT_WRITE_RGBA);
1918 		intel_ring_emit(ring, BLT_DEPTH_32 | BLT_ROP_SRC_COPY | 4096);
1919 		intel_ring_emit(ring, DIV_ROUND_UP(len, 4096) << 16 | 4096);
1920 		intel_ring_emit(ring, cs_offset);
1921 		intel_ring_emit(ring, 4096);
1922 		intel_ring_emit(ring, offset);
1923 
1924 		intel_ring_emit(ring, MI_FLUSH);
1925 		intel_ring_emit(ring, MI_NOOP);
1926 		intel_ring_advance(ring);
1927 
1928 		/* ... and execute it. */
1929 		offset = cs_offset;
1930 	}
1931 
1932 	ret = intel_ring_begin(req, 2);
1933 	if (ret)
1934 		return ret;
1935 
1936 	intel_ring_emit(ring, MI_BATCH_BUFFER_START | MI_BATCH_GTT);
1937 	intel_ring_emit(ring, offset | (dispatch_flags & I915_DISPATCH_SECURE ?
1938 					0 : MI_BATCH_NON_SECURE));
1939 	intel_ring_advance(ring);
1940 
1941 	return 0;
1942 }
1943 
1944 static int
1945 i915_dispatch_execbuffer(struct drm_i915_gem_request *req,
1946 			 u64 offset, u32 len,
1947 			 unsigned dispatch_flags)
1948 {
1949 	struct intel_engine_cs *ring = req->ring;
1950 	int ret;
1951 
1952 	ret = intel_ring_begin(req, 2);
1953 	if (ret)
1954 		return ret;
1955 
1956 	intel_ring_emit(ring, MI_BATCH_BUFFER_START | MI_BATCH_GTT);
1957 	intel_ring_emit(ring, offset | (dispatch_flags & I915_DISPATCH_SECURE ?
1958 					0 : MI_BATCH_NON_SECURE));
1959 	intel_ring_advance(ring);
1960 
1961 	return 0;
1962 }
1963 
1964 static void cleanup_phys_status_page(struct intel_engine_cs *ring)
1965 {
1966 	struct drm_i915_private *dev_priv = to_i915(ring->dev);
1967 
1968 	if (!dev_priv->status_page_dmah)
1969 		return;
1970 
1971 	drm_pci_free(ring->dev, dev_priv->status_page_dmah);
1972 	ring->status_page.page_addr = NULL;
1973 }
1974 
1975 static void cleanup_status_page(struct intel_engine_cs *ring)
1976 {
1977 	struct drm_i915_gem_object *obj;
1978 
1979 	obj = ring->status_page.obj;
1980 	if (obj == NULL)
1981 		return;
1982 
1983 	kunmap(sg_page(obj->pages->sgl));
1984 	i915_gem_object_ggtt_unpin(obj);
1985 	drm_gem_object_unreference(&obj->base);
1986 	ring->status_page.obj = NULL;
1987 }
1988 
1989 static int init_status_page(struct intel_engine_cs *ring)
1990 {
1991 	struct drm_i915_gem_object *obj = ring->status_page.obj;
1992 
1993 	if (obj == NULL) {
1994 		unsigned flags;
1995 		int ret;
1996 
1997 		obj = i915_gem_alloc_object(ring->dev, 4096);
1998 		if (obj == NULL) {
1999 			DRM_ERROR("Failed to allocate status page\n");
2000 			return -ENOMEM;
2001 		}
2002 
2003 		ret = i915_gem_object_set_cache_level(obj, I915_CACHE_LLC);
2004 		if (ret)
2005 			goto err_unref;
2006 
2007 		flags = 0;
2008 		if (!HAS_LLC(ring->dev))
2009 			/* On g33, we cannot place HWS above 256MiB, so
2010 			 * restrict its pinning to the low mappable arena.
2011 			 * Though this restriction is not documented for
2012 			 * gen4, gen5, or byt, they also behave similarly
2013 			 * and hang if the HWS is placed at the top of the
2014 			 * GTT. To generalise, it appears that all !llc
2015 			 * platforms have issues with us placing the HWS
2016 			 * above the mappable region (even though we never
2017 			 * actualy map it).
2018 			 */
2019 			flags |= PIN_MAPPABLE;
2020 		ret = i915_gem_obj_ggtt_pin(obj, 4096, flags);
2021 		if (ret) {
2022 err_unref:
2023 			drm_gem_object_unreference(&obj->base);
2024 			return ret;
2025 		}
2026 
2027 		ring->status_page.obj = obj;
2028 	}
2029 
2030 	ring->status_page.gfx_addr = i915_gem_obj_ggtt_offset(obj);
2031 	ring->status_page.page_addr = kmap(sg_page(obj->pages->sgl));
2032 	memset(ring->status_page.page_addr, 0, PAGE_SIZE);
2033 
2034 	DRM_DEBUG_DRIVER("%s hws offset: 0x%08x\n",
2035 			ring->name, ring->status_page.gfx_addr);
2036 
2037 	return 0;
2038 }
2039 
2040 static int init_phys_status_page(struct intel_engine_cs *ring)
2041 {
2042 	struct drm_i915_private *dev_priv = ring->dev->dev_private;
2043 
2044 	if (!dev_priv->status_page_dmah) {
2045 		dev_priv->status_page_dmah =
2046 			drm_pci_alloc(ring->dev, PAGE_SIZE, PAGE_SIZE);
2047 		if (!dev_priv->status_page_dmah)
2048 			return -ENOMEM;
2049 	}
2050 
2051 	ring->status_page.page_addr = dev_priv->status_page_dmah->vaddr;
2052 	memset(ring->status_page.page_addr, 0, PAGE_SIZE);
2053 
2054 	return 0;
2055 }
2056 
2057 void intel_unpin_ringbuffer_obj(struct intel_ringbuffer *ringbuf)
2058 {
2059 	if (HAS_LLC(ringbuf->obj->base.dev) && !ringbuf->obj->stolen)
2060 		vunmap(ringbuf->virtual_start, ringbuf->virtual_count);
2061 	else
2062 		iounmap(ringbuf->virtual_start);
2063 	ringbuf->virtual_start = NULL;
2064 	ringbuf->vma = NULL;
2065 	i915_gem_object_ggtt_unpin(ringbuf->obj);
2066 }
2067 
2068 static u32 *vmap_obj(struct drm_i915_gem_object *obj, unsigned int *countp)
2069 {
2070 	struct sg_page_iter sg_iter;
2071 	struct vm_page **pages;
2072 	void *addr;
2073 	int i;
2074 
2075 	pages = drm_malloc_ab(obj->base.size >> PAGE_SHIFT, sizeof(*pages));
2076 	if (pages == NULL)
2077 		return NULL;
2078 
2079 	i = 0;
2080 	for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 0)
2081 		pages[i++] = sg_page_iter_page(&sg_iter);
2082 	*countp = i;
2083 
2084 	addr = vmap(pages, i, 0, PAGE_KERNEL);
2085 	drm_free_large(pages);
2086 
2087 	return addr;
2088 }
2089 
2090 int intel_pin_and_map_ringbuffer_obj(struct drm_device *dev,
2091 				     struct intel_ringbuffer *ringbuf)
2092 {
2093 	struct drm_i915_private *dev_priv = to_i915(dev);
2094 	struct drm_i915_gem_object *obj = ringbuf->obj;
2095 	/* Ring wraparound at offset 0 sometimes hangs. No idea why. */
2096 	unsigned flags = PIN_OFFSET_BIAS | 4096;
2097 	int ret;
2098 
2099 	if (HAS_LLC(dev_priv) && !obj->stolen) {
2100 		ret = i915_gem_obj_ggtt_pin(obj, PAGE_SIZE, flags);
2101 		if (ret)
2102 			return ret;
2103 
2104 		ret = i915_gem_object_set_to_cpu_domain(obj, true);
2105 		if (ret) {
2106 			i915_gem_object_ggtt_unpin(obj);
2107 			return ret;
2108 		}
2109 
2110 		ringbuf->virtual_start = (char *)vmap_obj(obj,
2111 						    &ringbuf->virtual_count);
2112 		if (ringbuf->virtual_start == NULL) {
2113 			i915_gem_object_ggtt_unpin(obj);
2114 			return -ENOMEM;
2115 		}
2116 	} else {
2117 		ret = i915_gem_obj_ggtt_pin(obj, PAGE_SIZE,
2118 					    flags | PIN_MAPPABLE);
2119 		if (ret)
2120 			return ret;
2121 
2122 		ret = i915_gem_object_set_to_gtt_domain(obj, true);
2123 		if (ret) {
2124 			i915_gem_object_ggtt_unpin(obj);
2125 			return ret;
2126 		}
2127 
2128 		/* Access through the GTT requires the device to be awake. */
2129 		assert_rpm_wakelock_held(dev_priv);
2130 
2131 		ringbuf->virtual_start = ioremap_wc(dev_priv->gtt.mappable_base +
2132 						    i915_gem_obj_ggtt_offset(obj), ringbuf->size);
2133 		if (ringbuf->virtual_start == NULL) {
2134 			i915_gem_object_ggtt_unpin(obj);
2135 			return -EINVAL;
2136 		}
2137 	}
2138 
2139 	ringbuf->vma = i915_gem_obj_to_ggtt(obj);
2140 
2141 	return 0;
2142 }
2143 
2144 static void intel_destroy_ringbuffer_obj(struct intel_ringbuffer *ringbuf)
2145 {
2146 	drm_gem_object_unreference(&ringbuf->obj->base);
2147 	ringbuf->obj = NULL;
2148 }
2149 
2150 static int intel_alloc_ringbuffer_obj(struct drm_device *dev,
2151 				      struct intel_ringbuffer *ringbuf)
2152 {
2153 	struct drm_i915_gem_object *obj;
2154 
2155 	obj = NULL;
2156 	if (!HAS_LLC(dev))
2157 		obj = i915_gem_object_create_stolen(dev, ringbuf->size);
2158 	if (obj == NULL)
2159 		obj = i915_gem_alloc_object(dev, ringbuf->size);
2160 	if (obj == NULL)
2161 		return -ENOMEM;
2162 
2163 	/* mark ring buffers as read-only from GPU side by default */
2164 	obj->gt_ro = 1;
2165 
2166 	ringbuf->obj = obj;
2167 
2168 	return 0;
2169 }
2170 
2171 struct intel_ringbuffer *
2172 intel_engine_create_ringbuffer(struct intel_engine_cs *engine, int size)
2173 {
2174 	struct intel_ringbuffer *ring;
2175 	int ret;
2176 
2177 	ring = kzalloc(sizeof(*ring), GFP_KERNEL);
2178 	if (ring == NULL) {
2179 		DRM_DEBUG_DRIVER("Failed to allocate ringbuffer %s\n",
2180 				 engine->name);
2181 		return ERR_PTR(-ENOMEM);
2182 	}
2183 
2184 	ring->ring = engine;
2185 	list_add(&ring->link, &engine->buffers);
2186 
2187 	ring->size = size;
2188 	/* Workaround an erratum on the i830 which causes a hang if
2189 	 * the TAIL pointer points to within the last 2 cachelines
2190 	 * of the buffer.
2191 	 */
2192 	ring->effective_size = size;
2193 	if (IS_I830(engine->dev) || IS_845G(engine->dev))
2194 		ring->effective_size -= 2 * CACHELINE_BYTES;
2195 
2196 	ring->last_retired_head = -1;
2197 	intel_ring_update_space(ring);
2198 
2199 	ret = intel_alloc_ringbuffer_obj(engine->dev, ring);
2200 	if (ret) {
2201 		DRM_DEBUG_DRIVER("Failed to allocate ringbuffer %s: %d\n",
2202 				 engine->name, ret);
2203 		list_del(&ring->link);
2204 		kfree(ring);
2205 		return ERR_PTR(ret);
2206 	}
2207 
2208 	return ring;
2209 }
2210 
2211 void
2212 intel_ringbuffer_free(struct intel_ringbuffer *ring)
2213 {
2214 	intel_destroy_ringbuffer_obj(ring);
2215 	list_del(&ring->link);
2216 	kfree(ring);
2217 }
2218 
2219 static int intel_init_ring_buffer(struct drm_device *dev,
2220 				  struct intel_engine_cs *ring)
2221 {
2222 	struct intel_ringbuffer *ringbuf;
2223 	int ret;
2224 
2225 	WARN_ON(ring->buffer);
2226 
2227 	ring->dev = dev;
2228 	INIT_LIST_HEAD(&ring->active_list);
2229 	INIT_LIST_HEAD(&ring->request_list);
2230 	INIT_LIST_HEAD(&ring->execlist_queue);
2231 	INIT_LIST_HEAD(&ring->buffers);
2232 	i915_gem_batch_pool_init(dev, &ring->batch_pool);
2233 	memset(ring->semaphore.sync_seqno, 0, sizeof(ring->semaphore.sync_seqno));
2234 
2235 	init_waitqueue_head(&ring->irq_queue);
2236 
2237 	ringbuf = intel_engine_create_ringbuffer(ring, 32 * PAGE_SIZE);
2238 	if (IS_ERR(ringbuf)) {
2239 		ret = PTR_ERR(ringbuf);
2240 		goto error;
2241 	}
2242 	ring->buffer = ringbuf;
2243 
2244 	if (I915_NEED_GFX_HWS(dev)) {
2245 		ret = init_status_page(ring);
2246 		if (ret)
2247 			goto error;
2248 	} else {
2249 		WARN_ON(ring->id != RCS);
2250 		ret = init_phys_status_page(ring);
2251 		if (ret)
2252 			goto error;
2253 	}
2254 
2255 	ret = intel_pin_and_map_ringbuffer_obj(dev, ringbuf);
2256 	if (ret) {
2257 		DRM_ERROR("Failed to pin and map ringbuffer %s: %d\n",
2258 				ring->name, ret);
2259 		intel_destroy_ringbuffer_obj(ringbuf);
2260 		goto error;
2261 	}
2262 
2263 	ret = i915_cmd_parser_init_ring(ring);
2264 	if (ret)
2265 		goto error;
2266 
2267 	return 0;
2268 
2269 error:
2270 	intel_cleanup_ring_buffer(ring);
2271 	return ret;
2272 }
2273 
2274 void intel_cleanup_ring_buffer(struct intel_engine_cs *ring)
2275 {
2276 	struct drm_i915_private *dev_priv;
2277 
2278 	if (!intel_ring_initialized(ring))
2279 		return;
2280 
2281 	dev_priv = to_i915(ring->dev);
2282 
2283 	if (ring->buffer) {
2284 		intel_stop_ring_buffer(ring);
2285 		WARN_ON(!IS_GEN2(ring->dev) && (I915_READ_MODE(ring) & MODE_IDLE) == 0);
2286 
2287 		intel_unpin_ringbuffer_obj(ring->buffer);
2288 		intel_ringbuffer_free(ring->buffer);
2289 		ring->buffer = NULL;
2290 	}
2291 
2292 	if (ring->cleanup)
2293 		ring->cleanup(ring);
2294 
2295 	if (I915_NEED_GFX_HWS(ring->dev)) {
2296 		cleanup_status_page(ring);
2297 	} else {
2298 		WARN_ON(ring->id != RCS);
2299 		cleanup_phys_status_page(ring);
2300 	}
2301 
2302 	i915_cmd_parser_fini_ring(ring);
2303 	i915_gem_batch_pool_fini(&ring->batch_pool);
2304 	ring->dev = NULL;
2305 }
2306 
2307 static int ring_wait_for_space(struct intel_engine_cs *ring, int n)
2308 {
2309 	struct intel_ringbuffer *ringbuf = ring->buffer;
2310 	struct drm_i915_gem_request *request;
2311 	unsigned space;
2312 	int ret;
2313 
2314 	if (intel_ring_space(ringbuf) >= n)
2315 		return 0;
2316 
2317 	/* The whole point of reserving space is to not wait! */
2318 	WARN_ON(ringbuf->reserved_in_use);
2319 
2320 	list_for_each_entry(request, &ring->request_list, list) {
2321 		space = __intel_ring_space(request->postfix, ringbuf->tail,
2322 					   ringbuf->size);
2323 		if (space >= n)
2324 			break;
2325 	}
2326 
2327 	if (WARN_ON(&request->list == &ring->request_list))
2328 		return -ENOSPC;
2329 
2330 	ret = i915_wait_request(request);
2331 	if (ret)
2332 		return ret;
2333 
2334 	ringbuf->space = space;
2335 	return 0;
2336 }
2337 
2338 static void __wrap_ring_buffer(struct intel_ringbuffer *ringbuf)
2339 {
2340 	uint32_t __iomem *virt;
2341 	int rem = ringbuf->size - ringbuf->tail;
2342 
2343 	virt = (unsigned int *)((char *)ringbuf->virtual_start + ringbuf->tail);
2344 	rem /= 4;
2345 	while (rem--)
2346 		iowrite32(MI_NOOP, virt++);
2347 
2348 	ringbuf->tail = 0;
2349 	intel_ring_update_space(ringbuf);
2350 }
2351 
2352 int intel_ring_idle(struct intel_engine_cs *ring)
2353 {
2354 	struct drm_i915_gem_request *req;
2355 
2356 	/* Wait upon the last request to be completed */
2357 	if (list_empty(&ring->request_list))
2358 		return 0;
2359 
2360 	req = list_entry(ring->request_list.prev,
2361 			struct drm_i915_gem_request,
2362 			list);
2363 
2364 	/* Make sure we do not trigger any retires */
2365 	return __i915_wait_request(req,
2366 				   atomic_read(&to_i915(ring->dev)->gpu_error.reset_counter),
2367 				   to_i915(ring->dev)->mm.interruptible,
2368 				   NULL, NULL);
2369 }
2370 
2371 int intel_ring_alloc_request_extras(struct drm_i915_gem_request *request)
2372 {
2373 	request->ringbuf = request->ring->buffer;
2374 	return 0;
2375 }
2376 
2377 int intel_ring_reserve_space(struct drm_i915_gem_request *request)
2378 {
2379 	/*
2380 	 * The first call merely notes the reserve request and is common for
2381 	 * all back ends. The subsequent localised _begin() call actually
2382 	 * ensures that the reservation is available. Without the begin, if
2383 	 * the request creator immediately submitted the request without
2384 	 * adding any commands to it then there might not actually be
2385 	 * sufficient room for the submission commands.
2386 	 */
2387 	intel_ring_reserved_space_reserve(request->ringbuf, MIN_SPACE_FOR_ADD_REQUEST);
2388 
2389 	return intel_ring_begin(request, 0);
2390 }
2391 
2392 void intel_ring_reserved_space_reserve(struct intel_ringbuffer *ringbuf, int size)
2393 {
2394 	WARN_ON(ringbuf->reserved_size);
2395 	WARN_ON(ringbuf->reserved_in_use);
2396 
2397 	ringbuf->reserved_size = size;
2398 }
2399 
2400 void intel_ring_reserved_space_cancel(struct intel_ringbuffer *ringbuf)
2401 {
2402 	WARN_ON(ringbuf->reserved_in_use);
2403 
2404 	ringbuf->reserved_size   = 0;
2405 	ringbuf->reserved_in_use = false;
2406 }
2407 
2408 void intel_ring_reserved_space_use(struct intel_ringbuffer *ringbuf)
2409 {
2410 	WARN_ON(ringbuf->reserved_in_use);
2411 
2412 	ringbuf->reserved_in_use = true;
2413 	ringbuf->reserved_tail   = ringbuf->tail;
2414 }
2415 
2416 void intel_ring_reserved_space_end(struct intel_ringbuffer *ringbuf)
2417 {
2418 	WARN_ON(!ringbuf->reserved_in_use);
2419 	if (ringbuf->tail > ringbuf->reserved_tail) {
2420 		WARN(ringbuf->tail > ringbuf->reserved_tail + ringbuf->reserved_size,
2421 		     "request reserved size too small: %d vs %d!\n",
2422 		     ringbuf->tail - ringbuf->reserved_tail, ringbuf->reserved_size);
2423 	} else {
2424 		/*
2425 		 * The ring was wrapped while the reserved space was in use.
2426 		 * That means that some unknown amount of the ring tail was
2427 		 * no-op filled and skipped. Thus simply adding the ring size
2428 		 * to the tail and doing the above space check will not work.
2429 		 * Rather than attempt to track how much tail was skipped,
2430 		 * it is much simpler to say that also skipping the sanity
2431 		 * check every once in a while is not a big issue.
2432 		 */
2433 	}
2434 
2435 	ringbuf->reserved_size   = 0;
2436 	ringbuf->reserved_in_use = false;
2437 }
2438 
2439 static int __intel_ring_prepare(struct intel_engine_cs *ring, int bytes)
2440 {
2441 	struct intel_ringbuffer *ringbuf = ring->buffer;
2442 	int remain_usable = ringbuf->effective_size - ringbuf->tail;
2443 	int remain_actual = ringbuf->size - ringbuf->tail;
2444 	int ret, total_bytes, wait_bytes = 0;
2445 	bool need_wrap = false;
2446 
2447 	if (ringbuf->reserved_in_use)
2448 		total_bytes = bytes;
2449 	else
2450 		total_bytes = bytes + ringbuf->reserved_size;
2451 
2452 	if (unlikely(bytes > remain_usable)) {
2453 		/*
2454 		 * Not enough space for the basic request. So need to flush
2455 		 * out the remainder and then wait for base + reserved.
2456 		 */
2457 		wait_bytes = remain_actual + total_bytes;
2458 		need_wrap = true;
2459 	} else {
2460 		if (unlikely(total_bytes > remain_usable)) {
2461 			/*
2462 			 * The base request will fit but the reserved space
2463 			 * falls off the end. So don't need an immediate wrap
2464 			 * and only need to effectively wait for the reserved
2465 			 * size space from the start of ringbuffer.
2466 			 */
2467 			wait_bytes = remain_actual + ringbuf->reserved_size;
2468 		} else if (total_bytes > ringbuf->space) {
2469 			/* No wrapping required, just waiting. */
2470 			wait_bytes = total_bytes;
2471 		}
2472 	}
2473 
2474 	if (wait_bytes) {
2475 		ret = ring_wait_for_space(ring, wait_bytes);
2476 		if (unlikely(ret))
2477 			return ret;
2478 
2479 		if (need_wrap)
2480 			__wrap_ring_buffer(ringbuf);
2481 	}
2482 
2483 	return 0;
2484 }
2485 
2486 int intel_ring_begin(struct drm_i915_gem_request *req,
2487 		     int num_dwords)
2488 {
2489 	struct intel_engine_cs *ring;
2490 	struct drm_i915_private *dev_priv;
2491 	int ret;
2492 
2493 	WARN_ON(req == NULL);
2494 	ring = req->ring;
2495 	dev_priv = ring->dev->dev_private;
2496 
2497 	ret = i915_gem_check_wedge(&dev_priv->gpu_error,
2498 				   dev_priv->mm.interruptible);
2499 	if (ret)
2500 		return ret;
2501 
2502 	ret = __intel_ring_prepare(ring, num_dwords * sizeof(uint32_t));
2503 	if (ret)
2504 		return ret;
2505 
2506 	ring->buffer->space -= num_dwords * sizeof(uint32_t);
2507 	return 0;
2508 }
2509 
2510 /* Align the ring tail to a cacheline boundary */
2511 int intel_ring_cacheline_align(struct drm_i915_gem_request *req)
2512 {
2513 	struct intel_engine_cs *ring = req->ring;
2514 	int num_dwords = (ring->buffer->tail & (CACHELINE_BYTES - 1)) / sizeof(uint32_t);
2515 	int ret;
2516 
2517 	if (num_dwords == 0)
2518 		return 0;
2519 
2520 	num_dwords = CACHELINE_BYTES / sizeof(uint32_t) - num_dwords;
2521 	ret = intel_ring_begin(req, num_dwords);
2522 	if (ret)
2523 		return ret;
2524 
2525 	while (num_dwords--)
2526 		intel_ring_emit(ring, MI_NOOP);
2527 
2528 	intel_ring_advance(ring);
2529 
2530 	return 0;
2531 }
2532 
2533 void intel_ring_init_seqno(struct intel_engine_cs *ring, u32 seqno)
2534 {
2535 	struct drm_device *dev = ring->dev;
2536 	struct drm_i915_private *dev_priv = dev->dev_private;
2537 
2538 	if (INTEL_INFO(dev)->gen == 6 || INTEL_INFO(dev)->gen == 7) {
2539 		I915_WRITE(RING_SYNC_0(ring->mmio_base), 0);
2540 		I915_WRITE(RING_SYNC_1(ring->mmio_base), 0);
2541 		if (HAS_VEBOX(dev))
2542 			I915_WRITE(RING_SYNC_2(ring->mmio_base), 0);
2543 	}
2544 
2545 	ring->set_seqno(ring, seqno);
2546 	ring->hangcheck.seqno = seqno;
2547 }
2548 
2549 static void gen6_bsd_ring_write_tail(struct intel_engine_cs *ring,
2550 				     u32 value)
2551 {
2552 	struct drm_i915_private *dev_priv = ring->dev->dev_private;
2553 
2554        /* Every tail move must follow the sequence below */
2555 
2556 	/* Disable notification that the ring is IDLE. The GT
2557 	 * will then assume that it is busy and bring it out of rc6.
2558 	 */
2559 	I915_WRITE(GEN6_BSD_SLEEP_PSMI_CONTROL,
2560 		   _MASKED_BIT_ENABLE(GEN6_BSD_SLEEP_MSG_DISABLE));
2561 
2562 	/* Clear the context id. Here be magic! */
2563 	I915_WRITE64(GEN6_BSD_RNCID, 0x0);
2564 
2565 	/* Wait for the ring not to be idle, i.e. for it to wake up. */
2566 	if (wait_for((I915_READ(GEN6_BSD_SLEEP_PSMI_CONTROL) &
2567 		      GEN6_BSD_SLEEP_INDICATOR) == 0,
2568 		     50))
2569 		DRM_ERROR("timed out waiting for the BSD ring to wake up\n");
2570 
2571 	/* Now that the ring is fully powered up, update the tail */
2572 	I915_WRITE_TAIL(ring, value);
2573 	POSTING_READ(RING_TAIL(ring->mmio_base));
2574 
2575 	/* Let the ring send IDLE messages to the GT again,
2576 	 * and so let it sleep to conserve power when idle.
2577 	 */
2578 	I915_WRITE(GEN6_BSD_SLEEP_PSMI_CONTROL,
2579 		   _MASKED_BIT_DISABLE(GEN6_BSD_SLEEP_MSG_DISABLE));
2580 }
2581 
2582 static int gen6_bsd_ring_flush(struct drm_i915_gem_request *req,
2583 			       u32 invalidate, u32 flush)
2584 {
2585 	struct intel_engine_cs *ring = req->ring;
2586 	uint32_t cmd;
2587 	int ret;
2588 
2589 	ret = intel_ring_begin(req, 4);
2590 	if (ret)
2591 		return ret;
2592 
2593 	cmd = MI_FLUSH_DW;
2594 	if (INTEL_INFO(ring->dev)->gen >= 8)
2595 		cmd += 1;
2596 
2597 	/* We always require a command barrier so that subsequent
2598 	 * commands, such as breadcrumb interrupts, are strictly ordered
2599 	 * wrt the contents of the write cache being flushed to memory
2600 	 * (and thus being coherent from the CPU).
2601 	 */
2602 	cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
2603 
2604 	/*
2605 	 * Bspec vol 1c.5 - video engine command streamer:
2606 	 * "If ENABLED, all TLBs will be invalidated once the flush
2607 	 * operation is complete. This bit is only valid when the
2608 	 * Post-Sync Operation field is a value of 1h or 3h."
2609 	 */
2610 	if (invalidate & I915_GEM_GPU_DOMAINS)
2611 		cmd |= MI_INVALIDATE_TLB | MI_INVALIDATE_BSD;
2612 
2613 	intel_ring_emit(ring, cmd);
2614 	intel_ring_emit(ring, I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT);
2615 	if (INTEL_INFO(ring->dev)->gen >= 8) {
2616 		intel_ring_emit(ring, 0); /* upper addr */
2617 		intel_ring_emit(ring, 0); /* value */
2618 	} else  {
2619 		intel_ring_emit(ring, 0);
2620 		intel_ring_emit(ring, MI_NOOP);
2621 	}
2622 	intel_ring_advance(ring);
2623 	return 0;
2624 }
2625 
2626 static int
2627 gen8_ring_dispatch_execbuffer(struct drm_i915_gem_request *req,
2628 			      u64 offset, u32 len,
2629 			      unsigned dispatch_flags)
2630 {
2631 	struct intel_engine_cs *ring = req->ring;
2632 	bool ppgtt = USES_PPGTT(ring->dev) &&
2633 			!(dispatch_flags & I915_DISPATCH_SECURE);
2634 	int ret;
2635 
2636 	ret = intel_ring_begin(req, 4);
2637 	if (ret)
2638 		return ret;
2639 
2640 	/* FIXME(BDW): Address space and security selectors. */
2641 	intel_ring_emit(ring, MI_BATCH_BUFFER_START_GEN8 | (ppgtt<<8) |
2642 			(dispatch_flags & I915_DISPATCH_RS ?
2643 			 MI_BATCH_RESOURCE_STREAMER : 0));
2644 	intel_ring_emit(ring, lower_32_bits(offset));
2645 	intel_ring_emit(ring, upper_32_bits(offset));
2646 	intel_ring_emit(ring, MI_NOOP);
2647 	intel_ring_advance(ring);
2648 
2649 	return 0;
2650 }
2651 
2652 static int
2653 hsw_ring_dispatch_execbuffer(struct drm_i915_gem_request *req,
2654 			     u64 offset, u32 len,
2655 			     unsigned dispatch_flags)
2656 {
2657 	struct intel_engine_cs *ring = req->ring;
2658 	int ret;
2659 
2660 	ret = intel_ring_begin(req, 2);
2661 	if (ret)
2662 		return ret;
2663 
2664 	intel_ring_emit(ring,
2665 			MI_BATCH_BUFFER_START |
2666 			(dispatch_flags & I915_DISPATCH_SECURE ?
2667 			 0 : MI_BATCH_PPGTT_HSW | MI_BATCH_NON_SECURE_HSW) |
2668 			(dispatch_flags & I915_DISPATCH_RS ?
2669 			 MI_BATCH_RESOURCE_STREAMER : 0));
2670 	/* bit0-7 is the length on GEN6+ */
2671 	intel_ring_emit(ring, offset);
2672 	intel_ring_advance(ring);
2673 
2674 	return 0;
2675 }
2676 
2677 static int
2678 gen6_ring_dispatch_execbuffer(struct drm_i915_gem_request *req,
2679 			      u64 offset, u32 len,
2680 			      unsigned dispatch_flags)
2681 {
2682 	struct intel_engine_cs *ring = req->ring;
2683 	int ret;
2684 
2685 	ret = intel_ring_begin(req, 2);
2686 	if (ret)
2687 		return ret;
2688 
2689 	intel_ring_emit(ring,
2690 			MI_BATCH_BUFFER_START |
2691 			(dispatch_flags & I915_DISPATCH_SECURE ?
2692 			 0 : MI_BATCH_NON_SECURE_I965));
2693 	/* bit0-7 is the length on GEN6+ */
2694 	intel_ring_emit(ring, offset);
2695 	intel_ring_advance(ring);
2696 
2697 	return 0;
2698 }
2699 
2700 /* Blitter support (SandyBridge+) */
2701 
2702 static int gen6_ring_flush(struct drm_i915_gem_request *req,
2703 			   u32 invalidate, u32 flush)
2704 {
2705 	struct intel_engine_cs *ring = req->ring;
2706 	struct drm_device *dev = ring->dev;
2707 	uint32_t cmd;
2708 	int ret;
2709 
2710 	ret = intel_ring_begin(req, 4);
2711 	if (ret)
2712 		return ret;
2713 
2714 	cmd = MI_FLUSH_DW;
2715 	if (INTEL_INFO(dev)->gen >= 8)
2716 		cmd += 1;
2717 
2718 	/* We always require a command barrier so that subsequent
2719 	 * commands, such as breadcrumb interrupts, are strictly ordered
2720 	 * wrt the contents of the write cache being flushed to memory
2721 	 * (and thus being coherent from the CPU).
2722 	 */
2723 	cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
2724 
2725 	/*
2726 	 * Bspec vol 1c.3 - blitter engine command streamer:
2727 	 * "If ENABLED, all TLBs will be invalidated once the flush
2728 	 * operation is complete. This bit is only valid when the
2729 	 * Post-Sync Operation field is a value of 1h or 3h."
2730 	 */
2731 	if (invalidate & I915_GEM_DOMAIN_RENDER)
2732 		cmd |= MI_INVALIDATE_TLB;
2733 	intel_ring_emit(ring, cmd);
2734 	intel_ring_emit(ring, I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT);
2735 	if (INTEL_INFO(dev)->gen >= 8) {
2736 		intel_ring_emit(ring, 0); /* upper addr */
2737 		intel_ring_emit(ring, 0); /* value */
2738 	} else  {
2739 		intel_ring_emit(ring, 0);
2740 		intel_ring_emit(ring, MI_NOOP);
2741 	}
2742 	intel_ring_advance(ring);
2743 
2744 	return 0;
2745 }
2746 
2747 int intel_init_render_ring_buffer(struct drm_device *dev)
2748 {
2749 	struct drm_i915_private *dev_priv = dev->dev_private;
2750 	struct intel_engine_cs *ring = &dev_priv->ring[RCS];
2751 	struct drm_i915_gem_object *obj;
2752 	int ret;
2753 
2754 	ring->name = "render ring";
2755 	ring->id = RCS;
2756 	ring->exec_id = I915_EXEC_RENDER;
2757 	ring->mmio_base = RENDER_RING_BASE;
2758 
2759 	if (INTEL_INFO(dev)->gen >= 8) {
2760 		if (i915_semaphore_is_enabled(dev)) {
2761 			obj = i915_gem_alloc_object(dev, 4096);
2762 			if (obj == NULL) {
2763 				DRM_ERROR("Failed to allocate semaphore bo. Disabling semaphores\n");
2764 				i915.semaphores = 0;
2765 			} else {
2766 				i915_gem_object_set_cache_level(obj, I915_CACHE_LLC);
2767 				ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_NONBLOCK);
2768 				if (ret != 0) {
2769 					drm_gem_object_unreference(&obj->base);
2770 					DRM_ERROR("Failed to pin semaphore bo. Disabling semaphores\n");
2771 					i915.semaphores = 0;
2772 				} else
2773 					dev_priv->semaphore_obj = obj;
2774 			}
2775 		}
2776 
2777 		ring->init_context = intel_rcs_ctx_init;
2778 		ring->add_request = gen6_add_request;
2779 		ring->flush = gen8_render_ring_flush;
2780 		ring->irq_get = gen8_ring_get_irq;
2781 		ring->irq_put = gen8_ring_put_irq;
2782 		ring->irq_enable_mask = GT_RENDER_USER_INTERRUPT;
2783 		ring->get_seqno = gen6_ring_get_seqno;
2784 		ring->set_seqno = ring_set_seqno;
2785 		if (i915_semaphore_is_enabled(dev)) {
2786 			WARN_ON(!dev_priv->semaphore_obj);
2787 			ring->semaphore.sync_to = gen8_ring_sync;
2788 			ring->semaphore.signal = gen8_rcs_signal;
2789 			GEN8_RING_SEMAPHORE_INIT;
2790 		}
2791 	} else if (INTEL_INFO(dev)->gen >= 6) {
2792 		ring->init_context = intel_rcs_ctx_init;
2793 		ring->add_request = gen6_add_request;
2794 		ring->flush = gen7_render_ring_flush;
2795 		if (INTEL_INFO(dev)->gen == 6)
2796 			ring->flush = gen6_render_ring_flush;
2797 		ring->irq_get = gen6_ring_get_irq;
2798 		ring->irq_put = gen6_ring_put_irq;
2799 		ring->irq_enable_mask = GT_RENDER_USER_INTERRUPT;
2800 		ring->get_seqno = gen6_ring_get_seqno;
2801 		ring->set_seqno = ring_set_seqno;
2802 		if (i915_semaphore_is_enabled(dev)) {
2803 			ring->semaphore.sync_to = gen6_ring_sync;
2804 			ring->semaphore.signal = gen6_signal;
2805 			/*
2806 			 * The current semaphore is only applied on pre-gen8
2807 			 * platform.  And there is no VCS2 ring on the pre-gen8
2808 			 * platform. So the semaphore between RCS and VCS2 is
2809 			 * initialized as INVALID.  Gen8 will initialize the
2810 			 * sema between VCS2 and RCS later.
2811 			 */
2812 			ring->semaphore.mbox.wait[RCS] = MI_SEMAPHORE_SYNC_INVALID;
2813 			ring->semaphore.mbox.wait[VCS] = MI_SEMAPHORE_SYNC_RV;
2814 			ring->semaphore.mbox.wait[BCS] = MI_SEMAPHORE_SYNC_RB;
2815 			ring->semaphore.mbox.wait[VECS] = MI_SEMAPHORE_SYNC_RVE;
2816 			ring->semaphore.mbox.wait[VCS2] = MI_SEMAPHORE_SYNC_INVALID;
2817 			ring->semaphore.mbox.signal[RCS] = GEN6_NOSYNC;
2818 			ring->semaphore.mbox.signal[VCS] = GEN6_VRSYNC;
2819 			ring->semaphore.mbox.signal[BCS] = GEN6_BRSYNC;
2820 			ring->semaphore.mbox.signal[VECS] = GEN6_VERSYNC;
2821 			ring->semaphore.mbox.signal[VCS2] = GEN6_NOSYNC;
2822 		}
2823 	} else if (IS_GEN5(dev)) {
2824 		ring->add_request = pc_render_add_request;
2825 		ring->flush = gen4_render_ring_flush;
2826 		ring->get_seqno = pc_render_get_seqno;
2827 		ring->set_seqno = pc_render_set_seqno;
2828 		ring->irq_get = gen5_ring_get_irq;
2829 		ring->irq_put = gen5_ring_put_irq;
2830 		ring->irq_enable_mask = GT_RENDER_USER_INTERRUPT |
2831 					GT_RENDER_PIPECTL_NOTIFY_INTERRUPT;
2832 	} else {
2833 		ring->add_request = i9xx_add_request;
2834 		if (INTEL_INFO(dev)->gen < 4)
2835 			ring->flush = gen2_render_ring_flush;
2836 		else
2837 			ring->flush = gen4_render_ring_flush;
2838 		ring->get_seqno = ring_get_seqno;
2839 		ring->set_seqno = ring_set_seqno;
2840 		if (IS_GEN2(dev)) {
2841 			ring->irq_get = i8xx_ring_get_irq;
2842 			ring->irq_put = i8xx_ring_put_irq;
2843 		} else {
2844 			ring->irq_get = i9xx_ring_get_irq;
2845 			ring->irq_put = i9xx_ring_put_irq;
2846 		}
2847 		ring->irq_enable_mask = I915_USER_INTERRUPT;
2848 	}
2849 	ring->write_tail = ring_write_tail;
2850 
2851 	if (IS_HASWELL(dev))
2852 		ring->dispatch_execbuffer = hsw_ring_dispatch_execbuffer;
2853 	else if (IS_GEN8(dev))
2854 		ring->dispatch_execbuffer = gen8_ring_dispatch_execbuffer;
2855 	else if (INTEL_INFO(dev)->gen >= 6)
2856 		ring->dispatch_execbuffer = gen6_ring_dispatch_execbuffer;
2857 	else if (INTEL_INFO(dev)->gen >= 4)
2858 		ring->dispatch_execbuffer = i965_dispatch_execbuffer;
2859 	else if (IS_I830(dev) || IS_845G(dev))
2860 		ring->dispatch_execbuffer = i830_dispatch_execbuffer;
2861 	else
2862 		ring->dispatch_execbuffer = i915_dispatch_execbuffer;
2863 	ring->init_hw = init_render_ring;
2864 	ring->cleanup = render_ring_cleanup;
2865 
2866 	/* Workaround batchbuffer to combat CS tlb bug. */
2867 	if (HAS_BROKEN_CS_TLB(dev)) {
2868 		obj = i915_gem_alloc_object(dev, I830_WA_SIZE);
2869 		if (obj == NULL) {
2870 			DRM_ERROR("Failed to allocate batch bo\n");
2871 			return -ENOMEM;
2872 		}
2873 
2874 		ret = i915_gem_obj_ggtt_pin(obj, 0, 0);
2875 		if (ret != 0) {
2876 			drm_gem_object_unreference(&obj->base);
2877 			DRM_ERROR("Failed to ping batch bo\n");
2878 			return ret;
2879 		}
2880 
2881 		ring->scratch.obj = obj;
2882 		ring->scratch.gtt_offset = i915_gem_obj_ggtt_offset(obj);
2883 	}
2884 
2885 	ret = intel_init_ring_buffer(dev, ring);
2886 	if (ret)
2887 		return ret;
2888 
2889 	if (INTEL_INFO(dev)->gen >= 5) {
2890 		ret = intel_init_pipe_control(ring);
2891 		if (ret)
2892 			return ret;
2893 	}
2894 
2895 	return 0;
2896 }
2897 
2898 int intel_init_bsd_ring_buffer(struct drm_device *dev)
2899 {
2900 	struct drm_i915_private *dev_priv = dev->dev_private;
2901 	struct intel_engine_cs *ring = &dev_priv->ring[VCS];
2902 
2903 	ring->name = "bsd ring";
2904 	ring->id = VCS;
2905 	ring->exec_id = I915_EXEC_BSD;
2906 
2907 	ring->write_tail = ring_write_tail;
2908 	if (INTEL_INFO(dev)->gen >= 6) {
2909 		ring->mmio_base = GEN6_BSD_RING_BASE;
2910 		/* gen6 bsd needs a special wa for tail updates */
2911 		if (IS_GEN6(dev))
2912 			ring->write_tail = gen6_bsd_ring_write_tail;
2913 		ring->flush = gen6_bsd_ring_flush;
2914 		ring->add_request = gen6_add_request;
2915 		ring->get_seqno = gen6_ring_get_seqno;
2916 		ring->set_seqno = ring_set_seqno;
2917 		if (INTEL_INFO(dev)->gen >= 8) {
2918 			ring->irq_enable_mask =
2919 				GT_RENDER_USER_INTERRUPT << GEN8_VCS1_IRQ_SHIFT;
2920 			ring->irq_get = gen8_ring_get_irq;
2921 			ring->irq_put = gen8_ring_put_irq;
2922 			ring->dispatch_execbuffer =
2923 				gen8_ring_dispatch_execbuffer;
2924 			if (i915_semaphore_is_enabled(dev)) {
2925 				ring->semaphore.sync_to = gen8_ring_sync;
2926 				ring->semaphore.signal = gen8_xcs_signal;
2927 				GEN8_RING_SEMAPHORE_INIT;
2928 			}
2929 		} else {
2930 			ring->irq_enable_mask = GT_BSD_USER_INTERRUPT;
2931 			ring->irq_get = gen6_ring_get_irq;
2932 			ring->irq_put = gen6_ring_put_irq;
2933 			ring->dispatch_execbuffer =
2934 				gen6_ring_dispatch_execbuffer;
2935 			if (i915_semaphore_is_enabled(dev)) {
2936 				ring->semaphore.sync_to = gen6_ring_sync;
2937 				ring->semaphore.signal = gen6_signal;
2938 				ring->semaphore.mbox.wait[RCS] = MI_SEMAPHORE_SYNC_VR;
2939 				ring->semaphore.mbox.wait[VCS] = MI_SEMAPHORE_SYNC_INVALID;
2940 				ring->semaphore.mbox.wait[BCS] = MI_SEMAPHORE_SYNC_VB;
2941 				ring->semaphore.mbox.wait[VECS] = MI_SEMAPHORE_SYNC_VVE;
2942 				ring->semaphore.mbox.wait[VCS2] = MI_SEMAPHORE_SYNC_INVALID;
2943 				ring->semaphore.mbox.signal[RCS] = GEN6_RVSYNC;
2944 				ring->semaphore.mbox.signal[VCS] = GEN6_NOSYNC;
2945 				ring->semaphore.mbox.signal[BCS] = GEN6_BVSYNC;
2946 				ring->semaphore.mbox.signal[VECS] = GEN6_VEVSYNC;
2947 				ring->semaphore.mbox.signal[VCS2] = GEN6_NOSYNC;
2948 			}
2949 		}
2950 	} else {
2951 		ring->mmio_base = BSD_RING_BASE;
2952 		ring->flush = bsd_ring_flush;
2953 		ring->add_request = i9xx_add_request;
2954 		ring->get_seqno = ring_get_seqno;
2955 		ring->set_seqno = ring_set_seqno;
2956 		if (IS_GEN5(dev)) {
2957 			ring->irq_enable_mask = ILK_BSD_USER_INTERRUPT;
2958 			ring->irq_get = gen5_ring_get_irq;
2959 			ring->irq_put = gen5_ring_put_irq;
2960 		} else {
2961 			ring->irq_enable_mask = I915_BSD_USER_INTERRUPT;
2962 			ring->irq_get = i9xx_ring_get_irq;
2963 			ring->irq_put = i9xx_ring_put_irq;
2964 		}
2965 		ring->dispatch_execbuffer = i965_dispatch_execbuffer;
2966 	}
2967 	ring->init_hw = init_ring_common;
2968 
2969 	return intel_init_ring_buffer(dev, ring);
2970 }
2971 
2972 /**
2973  * Initialize the second BSD ring (eg. Broadwell GT3, Skylake GT3)
2974  */
2975 int intel_init_bsd2_ring_buffer(struct drm_device *dev)
2976 {
2977 	struct drm_i915_private *dev_priv = dev->dev_private;
2978 	struct intel_engine_cs *ring = &dev_priv->ring[VCS2];
2979 
2980 	ring->name = "bsd2 ring";
2981 	ring->id = VCS2;
2982 	ring->exec_id = I915_EXEC_BSD;
2983 
2984 	ring->write_tail = ring_write_tail;
2985 	ring->mmio_base = GEN8_BSD2_RING_BASE;
2986 	ring->flush = gen6_bsd_ring_flush;
2987 	ring->add_request = gen6_add_request;
2988 	ring->get_seqno = gen6_ring_get_seqno;
2989 	ring->set_seqno = ring_set_seqno;
2990 	ring->irq_enable_mask =
2991 			GT_RENDER_USER_INTERRUPT << GEN8_VCS2_IRQ_SHIFT;
2992 	ring->irq_get = gen8_ring_get_irq;
2993 	ring->irq_put = gen8_ring_put_irq;
2994 	ring->dispatch_execbuffer =
2995 			gen8_ring_dispatch_execbuffer;
2996 	if (i915_semaphore_is_enabled(dev)) {
2997 		ring->semaphore.sync_to = gen8_ring_sync;
2998 		ring->semaphore.signal = gen8_xcs_signal;
2999 		GEN8_RING_SEMAPHORE_INIT;
3000 	}
3001 	ring->init_hw = init_ring_common;
3002 
3003 	return intel_init_ring_buffer(dev, ring);
3004 }
3005 
3006 int intel_init_blt_ring_buffer(struct drm_device *dev)
3007 {
3008 	struct drm_i915_private *dev_priv = dev->dev_private;
3009 	struct intel_engine_cs *ring = &dev_priv->ring[BCS];
3010 
3011 	ring->name = "blitter ring";
3012 	ring->id = BCS;
3013 	ring->exec_id = I915_EXEC_BLT;
3014 
3015 	ring->mmio_base = BLT_RING_BASE;
3016 	ring->write_tail = ring_write_tail;
3017 	ring->flush = gen6_ring_flush;
3018 	ring->add_request = gen6_add_request;
3019 	ring->get_seqno = gen6_ring_get_seqno;
3020 	ring->set_seqno = ring_set_seqno;
3021 	if (INTEL_INFO(dev)->gen >= 8) {
3022 		ring->irq_enable_mask =
3023 			GT_RENDER_USER_INTERRUPT << GEN8_BCS_IRQ_SHIFT;
3024 		ring->irq_get = gen8_ring_get_irq;
3025 		ring->irq_put = gen8_ring_put_irq;
3026 		ring->dispatch_execbuffer = gen8_ring_dispatch_execbuffer;
3027 		if (i915_semaphore_is_enabled(dev)) {
3028 			ring->semaphore.sync_to = gen8_ring_sync;
3029 			ring->semaphore.signal = gen8_xcs_signal;
3030 			GEN8_RING_SEMAPHORE_INIT;
3031 		}
3032 	} else {
3033 		ring->irq_enable_mask = GT_BLT_USER_INTERRUPT;
3034 		ring->irq_get = gen6_ring_get_irq;
3035 		ring->irq_put = gen6_ring_put_irq;
3036 		ring->dispatch_execbuffer = gen6_ring_dispatch_execbuffer;
3037 		if (i915_semaphore_is_enabled(dev)) {
3038 			ring->semaphore.signal = gen6_signal;
3039 			ring->semaphore.sync_to = gen6_ring_sync;
3040 			/*
3041 			 * The current semaphore is only applied on pre-gen8
3042 			 * platform.  And there is no VCS2 ring on the pre-gen8
3043 			 * platform. So the semaphore between BCS and VCS2 is
3044 			 * initialized as INVALID.  Gen8 will initialize the
3045 			 * sema between BCS and VCS2 later.
3046 			 */
3047 			ring->semaphore.mbox.wait[RCS] = MI_SEMAPHORE_SYNC_BR;
3048 			ring->semaphore.mbox.wait[VCS] = MI_SEMAPHORE_SYNC_BV;
3049 			ring->semaphore.mbox.wait[BCS] = MI_SEMAPHORE_SYNC_INVALID;
3050 			ring->semaphore.mbox.wait[VECS] = MI_SEMAPHORE_SYNC_BVE;
3051 			ring->semaphore.mbox.wait[VCS2] = MI_SEMAPHORE_SYNC_INVALID;
3052 			ring->semaphore.mbox.signal[RCS] = GEN6_RBSYNC;
3053 			ring->semaphore.mbox.signal[VCS] = GEN6_VBSYNC;
3054 			ring->semaphore.mbox.signal[BCS] = GEN6_NOSYNC;
3055 			ring->semaphore.mbox.signal[VECS] = GEN6_VEBSYNC;
3056 			ring->semaphore.mbox.signal[VCS2] = GEN6_NOSYNC;
3057 		}
3058 	}
3059 	ring->init_hw = init_ring_common;
3060 
3061 	return intel_init_ring_buffer(dev, ring);
3062 }
3063 
3064 int intel_init_vebox_ring_buffer(struct drm_device *dev)
3065 {
3066 	struct drm_i915_private *dev_priv = dev->dev_private;
3067 	struct intel_engine_cs *ring = &dev_priv->ring[VECS];
3068 
3069 	ring->name = "video enhancement ring";
3070 	ring->id = VECS;
3071 	ring->exec_id = I915_EXEC_VEBOX;
3072 
3073 	ring->mmio_base = VEBOX_RING_BASE;
3074 	ring->write_tail = ring_write_tail;
3075 	ring->flush = gen6_ring_flush;
3076 	ring->add_request = gen6_add_request;
3077 	ring->get_seqno = gen6_ring_get_seqno;
3078 	ring->set_seqno = ring_set_seqno;
3079 
3080 	if (INTEL_INFO(dev)->gen >= 8) {
3081 		ring->irq_enable_mask =
3082 			GT_RENDER_USER_INTERRUPT << GEN8_VECS_IRQ_SHIFT;
3083 		ring->irq_get = gen8_ring_get_irq;
3084 		ring->irq_put = gen8_ring_put_irq;
3085 		ring->dispatch_execbuffer = gen8_ring_dispatch_execbuffer;
3086 		if (i915_semaphore_is_enabled(dev)) {
3087 			ring->semaphore.sync_to = gen8_ring_sync;
3088 			ring->semaphore.signal = gen8_xcs_signal;
3089 			GEN8_RING_SEMAPHORE_INIT;
3090 		}
3091 	} else {
3092 		ring->irq_enable_mask = PM_VEBOX_USER_INTERRUPT;
3093 		ring->irq_get = hsw_vebox_get_irq;
3094 		ring->irq_put = hsw_vebox_put_irq;
3095 		ring->dispatch_execbuffer = gen6_ring_dispatch_execbuffer;
3096 		if (i915_semaphore_is_enabled(dev)) {
3097 			ring->semaphore.sync_to = gen6_ring_sync;
3098 			ring->semaphore.signal = gen6_signal;
3099 			ring->semaphore.mbox.wait[RCS] = MI_SEMAPHORE_SYNC_VER;
3100 			ring->semaphore.mbox.wait[VCS] = MI_SEMAPHORE_SYNC_VEV;
3101 			ring->semaphore.mbox.wait[BCS] = MI_SEMAPHORE_SYNC_VEB;
3102 			ring->semaphore.mbox.wait[VECS] = MI_SEMAPHORE_SYNC_INVALID;
3103 			ring->semaphore.mbox.wait[VCS2] = MI_SEMAPHORE_SYNC_INVALID;
3104 			ring->semaphore.mbox.signal[RCS] = GEN6_RVESYNC;
3105 			ring->semaphore.mbox.signal[VCS] = GEN6_VVESYNC;
3106 			ring->semaphore.mbox.signal[BCS] = GEN6_BVESYNC;
3107 			ring->semaphore.mbox.signal[VECS] = GEN6_NOSYNC;
3108 			ring->semaphore.mbox.signal[VCS2] = GEN6_NOSYNC;
3109 		}
3110 	}
3111 	ring->init_hw = init_ring_common;
3112 
3113 	return intel_init_ring_buffer(dev, ring);
3114 }
3115 
3116 int
3117 intel_ring_flush_all_caches(struct drm_i915_gem_request *req)
3118 {
3119 	struct intel_engine_cs *ring = req->ring;
3120 	int ret;
3121 
3122 	if (!ring->gpu_caches_dirty)
3123 		return 0;
3124 
3125 	ret = ring->flush(req, 0, I915_GEM_GPU_DOMAINS);
3126 	if (ret)
3127 		return ret;
3128 
3129 	trace_i915_gem_ring_flush(req, 0, I915_GEM_GPU_DOMAINS);
3130 
3131 	ring->gpu_caches_dirty = false;
3132 	return 0;
3133 }
3134 
3135 int
3136 intel_ring_invalidate_all_caches(struct drm_i915_gem_request *req)
3137 {
3138 	struct intel_engine_cs *ring = req->ring;
3139 	uint32_t flush_domains;
3140 	int ret;
3141 
3142 	flush_domains = 0;
3143 	if (ring->gpu_caches_dirty)
3144 		flush_domains = I915_GEM_GPU_DOMAINS;
3145 
3146 	ret = ring->flush(req, I915_GEM_GPU_DOMAINS, flush_domains);
3147 	if (ret)
3148 		return ret;
3149 
3150 	trace_i915_gem_ring_flush(req, I915_GEM_GPU_DOMAINS, flush_domains);
3151 
3152 	ring->gpu_caches_dirty = false;
3153 	return 0;
3154 }
3155 
3156 void
3157 intel_stop_ring_buffer(struct intel_engine_cs *ring)
3158 {
3159 	int ret;
3160 
3161 	if (!intel_ring_initialized(ring))
3162 		return;
3163 
3164 	ret = intel_ring_idle(ring);
3165 	if (ret && !i915_reset_in_progress(&to_i915(ring->dev)->gpu_error))
3166 		DRM_ERROR("failed to quiesce %s whilst cleaning up: %d\n",
3167 			  ring->name, ret);
3168 
3169 	stop_ring(ring);
3170 }
3171