xref: /dflybsd-src/sys/dev/drm/i915/intel_ringbuffer.c (revision 31c068aaf635ad9fa72dbc4c65b32d890ff7544d)
1 /*
2  * Copyright © 2008-2010 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Eric Anholt <eric@anholt.net>
25  *    Zou Nan hai <nanhai.zou@intel.com>
26  *    Xiang Hai hao<haihao.xiang@intel.com>
27  *
28  */
29 
30 #include <drm/drmP.h>
31 #include "i915_drv.h"
32 #include <drm/i915_drm.h>
33 #include "i915_trace.h"
34 #include "intel_drv.h"
35 
36 /*
37  * 965+ support PIPE_CONTROL commands, which provide finer grained control
38  * over cache flushing.
39  */
40 struct pipe_control {
41 	struct drm_i915_gem_object *obj;
42 	volatile u32 *cpu_page;
43 	u32 gtt_offset;
44 };
45 
46 static inline int ring_space(struct intel_ring_buffer *ring)
47 {
48 	int space = (ring->head & HEAD_ADDR) - (ring->tail + I915_RING_FREE_SPACE);
49 	if (space < 0)
50 		space += ring->size;
51 	return space;
52 }
53 
54 static int
55 gen2_render_ring_flush(struct intel_ring_buffer *ring,
56 		       u32	invalidate_domains,
57 		       u32	flush_domains)
58 {
59 	u32 cmd;
60 	int ret;
61 
62 	cmd = MI_FLUSH;
63 	if (((invalidate_domains|flush_domains) & I915_GEM_DOMAIN_RENDER) == 0)
64 		cmd |= MI_NO_WRITE_FLUSH;
65 
66 	if (invalidate_domains & I915_GEM_DOMAIN_SAMPLER)
67 		cmd |= MI_READ_FLUSH;
68 
69 	ret = intel_ring_begin(ring, 2);
70 	if (ret)
71 		return ret;
72 
73 	intel_ring_emit(ring, cmd);
74 	intel_ring_emit(ring, MI_NOOP);
75 	intel_ring_advance(ring);
76 
77 	return 0;
78 }
79 
80 static int
81 gen4_render_ring_flush(struct intel_ring_buffer *ring,
82 		       u32	invalidate_domains,
83 		       u32	flush_domains)
84 {
85 	struct drm_device *dev = ring->dev;
86 	u32 cmd;
87 	int ret;
88 
89 	/*
90 	 * read/write caches:
91 	 *
92 	 * I915_GEM_DOMAIN_RENDER is always invalidated, but is
93 	 * only flushed if MI_NO_WRITE_FLUSH is unset.  On 965, it is
94 	 * also flushed at 2d versus 3d pipeline switches.
95 	 *
96 	 * read-only caches:
97 	 *
98 	 * I915_GEM_DOMAIN_SAMPLER is flushed on pre-965 if
99 	 * MI_READ_FLUSH is set, and is always flushed on 965.
100 	 *
101 	 * I915_GEM_DOMAIN_COMMAND may not exist?
102 	 *
103 	 * I915_GEM_DOMAIN_INSTRUCTION, which exists on 965, is
104 	 * invalidated when MI_EXE_FLUSH is set.
105 	 *
106 	 * I915_GEM_DOMAIN_VERTEX, which exists on 965, is
107 	 * invalidated with every MI_FLUSH.
108 	 *
109 	 * TLBs:
110 	 *
111 	 * On 965, TLBs associated with I915_GEM_DOMAIN_COMMAND
112 	 * and I915_GEM_DOMAIN_CPU in are invalidated at PTE write and
113 	 * I915_GEM_DOMAIN_RENDER and I915_GEM_DOMAIN_SAMPLER
114 	 * are flushed at any MI_FLUSH.
115 	 */
116 
117 	cmd = MI_FLUSH | MI_NO_WRITE_FLUSH;
118 	if ((invalidate_domains|flush_domains) & I915_GEM_DOMAIN_RENDER)
119 		cmd &= ~MI_NO_WRITE_FLUSH;
120 	if (invalidate_domains & I915_GEM_DOMAIN_INSTRUCTION)
121 		cmd |= MI_EXE_FLUSH;
122 
123 	if (invalidate_domains & I915_GEM_DOMAIN_COMMAND &&
124 	    (IS_G4X(dev) || IS_GEN5(dev)))
125 		cmd |= MI_INVALIDATE_ISP;
126 
127 	ret = intel_ring_begin(ring, 2);
128 	if (ret)
129 		return ret;
130 
131 	intel_ring_emit(ring, cmd);
132 	intel_ring_emit(ring, MI_NOOP);
133 	intel_ring_advance(ring);
134 
135 	return 0;
136 }
137 
138 /**
139  * Emits a PIPE_CONTROL with a non-zero post-sync operation, for
140  * implementing two workarounds on gen6.  From section 1.4.7.1
141  * "PIPE_CONTROL" of the Sandy Bridge PRM volume 2 part 1:
142  *
143  * [DevSNB-C+{W/A}] Before any depth stall flush (including those
144  * produced by non-pipelined state commands), software needs to first
145  * send a PIPE_CONTROL with no bits set except Post-Sync Operation !=
146  * 0.
147  *
148  * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush Enable
149  * =1, a PIPE_CONTROL with any non-zero post-sync-op is required.
150  *
151  * And the workaround for these two requires this workaround first:
152  *
153  * [Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent
154  * BEFORE the pipe-control with a post-sync op and no write-cache
155  * flushes.
156  *
157  * And this last workaround is tricky because of the requirements on
158  * that bit.  From section 1.4.7.2.3 "Stall" of the Sandy Bridge PRM
159  * volume 2 part 1:
160  *
161  *     "1 of the following must also be set:
162  *      - Render Target Cache Flush Enable ([12] of DW1)
163  *      - Depth Cache Flush Enable ([0] of DW1)
164  *      - Stall at Pixel Scoreboard ([1] of DW1)
165  *      - Depth Stall ([13] of DW1)
166  *      - Post-Sync Operation ([13] of DW1)
167  *      - Notify Enable ([8] of DW1)"
168  *
169  * The cache flushes require the workaround flush that triggered this
170  * one, so we can't use it.  Depth stall would trigger the same.
171  * Post-sync nonzero is what triggered this second workaround, so we
172  * can't use that one either.  Notify enable is IRQs, which aren't
173  * really our business.  That leaves only stall at scoreboard.
174  */
175 static int
176 intel_emit_post_sync_nonzero_flush(struct intel_ring_buffer *ring)
177 {
178 	struct pipe_control *pc = ring->private;
179 	u32 scratch_addr = pc->gtt_offset + 128;
180 	int ret;
181 
182 
183 	ret = intel_ring_begin(ring, 6);
184 	if (ret)
185 		return ret;
186 
187 	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(5));
188 	intel_ring_emit(ring, PIPE_CONTROL_CS_STALL |
189 			PIPE_CONTROL_STALL_AT_SCOREBOARD);
190 	intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); /* address */
191 	intel_ring_emit(ring, 0); /* low dword */
192 	intel_ring_emit(ring, 0); /* high dword */
193 	intel_ring_emit(ring, MI_NOOP);
194 	intel_ring_advance(ring);
195 
196 	ret = intel_ring_begin(ring, 6);
197 	if (ret)
198 		return ret;
199 
200 	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(5));
201 	intel_ring_emit(ring, PIPE_CONTROL_QW_WRITE);
202 	intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); /* address */
203 	intel_ring_emit(ring, 0);
204 	intel_ring_emit(ring, 0);
205 	intel_ring_emit(ring, MI_NOOP);
206 	intel_ring_advance(ring);
207 
208 	return 0;
209 }
210 
211 static int
212 gen6_render_ring_flush(struct intel_ring_buffer *ring,
213                          u32 invalidate_domains, u32 flush_domains)
214 {
215 	u32 flags = 0;
216 	struct pipe_control *pc = ring->private;
217 	u32 scratch_addr = pc->gtt_offset + 128;
218 	int ret;
219 
220 	/* Force SNB workarounds for PIPE_CONTROL flushes */
221 	ret = intel_emit_post_sync_nonzero_flush(ring);
222 	if (ret)
223 		return ret;
224 
225 	/* Just flush everything.  Experiments have shown that reducing the
226 	 * number of bits based on the write domains has little performance
227 	 * impact.
228 	 */
229 	if (flush_domains) {
230 		flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
231 		flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
232 		/*
233 		 * Ensure that any following seqno writes only happen
234 		 * when the render cache is indeed flushed.
235 		 */
236 		flags |= PIPE_CONTROL_CS_STALL;
237 	}
238 	if (invalidate_domains) {
239 		flags |= PIPE_CONTROL_TLB_INVALIDATE;
240 		flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
241 		flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
242 		flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
243 		flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
244 		flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
245 		/*
246 		 * TLB invalidate requires a post-sync write.
247 		 */
248 		flags |= PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_CS_STALL;
249 	}
250 
251 	ret = intel_ring_begin(ring, 4);
252 	if (ret)
253 		return ret;
254 
255 	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4));
256 	intel_ring_emit(ring, flags);
257 	intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT);
258 	intel_ring_emit(ring, 0);
259 	intel_ring_advance(ring);
260 
261 	return 0;
262 }
263 
264 static int
265 gen7_render_ring_cs_stall_wa(struct intel_ring_buffer *ring)
266 {
267 	int ret;
268 
269 	ret = intel_ring_begin(ring, 4);
270 	if (ret)
271 		return ret;
272 
273 	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4));
274 	intel_ring_emit(ring, PIPE_CONTROL_CS_STALL |
275 			      PIPE_CONTROL_STALL_AT_SCOREBOARD);
276 	intel_ring_emit(ring, 0);
277 	intel_ring_emit(ring, 0);
278 	intel_ring_advance(ring);
279 
280 	return 0;
281 }
282 
283 static int
284 gen7_render_ring_flush(struct intel_ring_buffer *ring,
285 		       u32 invalidate_domains, u32 flush_domains)
286 {
287 	u32 flags = 0;
288 	struct pipe_control *pc = ring->private;
289 	u32 scratch_addr = pc->gtt_offset + 128;
290 	int ret;
291 
292 	/*
293 	 * Ensure that any following seqno writes only happen when the render
294 	 * cache is indeed flushed.
295 	 *
296 	 * Workaround: 4th PIPE_CONTROL command (except the ones with only
297 	 * read-cache invalidate bits set) must have the CS_STALL bit set. We
298 	 * don't try to be clever and just set it unconditionally.
299 	 */
300 	flags |= PIPE_CONTROL_CS_STALL;
301 
302 	/* Just flush everything.  Experiments have shown that reducing the
303 	 * number of bits based on the write domains has little performance
304 	 * impact.
305 	 */
306 	if (flush_domains) {
307 		flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
308 		flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
309 	}
310 	if (invalidate_domains) {
311 		flags |= PIPE_CONTROL_TLB_INVALIDATE;
312 		flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
313 		flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
314 		flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
315 		flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
316 		flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
317 		/*
318 		 * TLB invalidate requires a post-sync write.
319 		 */
320 		flags |= PIPE_CONTROL_QW_WRITE;
321 		flags |= PIPE_CONTROL_GLOBAL_GTT_IVB;
322 
323 		/* Workaround: we must issue a pipe_control with CS-stall bit
324 		 * set before a pipe_control command that has the state cache
325 		 * invalidate bit set. */
326 		gen7_render_ring_cs_stall_wa(ring);
327 	}
328 
329 	ret = intel_ring_begin(ring, 4);
330 	if (ret)
331 		return ret;
332 
333 	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4));
334 	intel_ring_emit(ring, flags);
335 	intel_ring_emit(ring, scratch_addr);
336 	intel_ring_emit(ring, 0);
337 	intel_ring_advance(ring);
338 
339 	return 0;
340 }
341 
342 static void ring_write_tail(struct intel_ring_buffer *ring,
343 			    u32 value)
344 {
345 	drm_i915_private_t *dev_priv = ring->dev->dev_private;
346 	I915_WRITE_TAIL(ring, value);
347 }
348 
349 u32 intel_ring_get_active_head(struct intel_ring_buffer *ring)
350 {
351 	drm_i915_private_t *dev_priv = ring->dev->dev_private;
352 	u32 acthd_reg = INTEL_INFO(ring->dev)->gen >= 4 ?
353 			RING_ACTHD(ring->mmio_base) : ACTHD;
354 
355 	return I915_READ(acthd_reg);
356 }
357 
358 static int init_ring_common(struct intel_ring_buffer *ring)
359 {
360 	struct drm_device *dev = ring->dev;
361 	drm_i915_private_t *dev_priv = dev->dev_private;
362 	struct drm_i915_gem_object *obj = ring->obj;
363 	int ret = 0;
364 	u32 head;
365 
366 	if (HAS_FORCE_WAKE(dev))
367 		gen6_gt_force_wake_get(dev_priv);
368 
369 	/* Stop the ring if it's running. */
370 	I915_WRITE_CTL(ring, 0);
371 	I915_WRITE_HEAD(ring, 0);
372 	ring->write_tail(ring, 0);
373 
374 	head = I915_READ_HEAD(ring) & HEAD_ADDR;
375 
376 	/* G45 ring initialization fails to reset head to zero */
377 	if (head != 0) {
378 		DRM_DEBUG_KMS("%s head not reset to zero "
379 			      "ctl %08x head %08x tail %08x start %08x\n",
380 			      ring->name,
381 			      I915_READ_CTL(ring),
382 			      I915_READ_HEAD(ring),
383 			      I915_READ_TAIL(ring),
384 			      I915_READ_START(ring));
385 
386 		I915_WRITE_HEAD(ring, 0);
387 
388 		if (I915_READ_HEAD(ring) & HEAD_ADDR) {
389 			DRM_ERROR("failed to set %s head to zero "
390 				  "ctl %08x head %08x tail %08x start %08x\n",
391 				  ring->name,
392 				  I915_READ_CTL(ring),
393 				  I915_READ_HEAD(ring),
394 				  I915_READ_TAIL(ring),
395 				  I915_READ_START(ring));
396 		}
397 	}
398 
399 	/* Initialize the ring. This must happen _after_ we've cleared the ring
400 	 * registers with the above sequence (the readback of the HEAD registers
401 	 * also enforces ordering), otherwise the hw might lose the new ring
402 	 * register values. */
403 	I915_WRITE_START(ring, obj->gtt_offset);
404 	I915_WRITE_CTL(ring,
405 			((ring->size - PAGE_SIZE) & RING_NR_PAGES)
406 			| RING_VALID);
407 
408 	/* If the head is still not zero, the ring is dead */
409 	if (wait_for((I915_READ_CTL(ring) & RING_VALID) != 0 &&
410 		     I915_READ_START(ring) == obj->gtt_offset &&
411 		     (I915_READ_HEAD(ring) & HEAD_ADDR) == 0, 50)) {
412 		DRM_ERROR("%s initialization failed "
413 				"ctl %08x head %08x tail %08x start %08x\n",
414 				ring->name,
415 				I915_READ_CTL(ring),
416 				I915_READ_HEAD(ring),
417 				I915_READ_TAIL(ring),
418 				I915_READ_START(ring));
419 		ret = -EIO;
420 		goto out;
421 	}
422 
423 	if (!drm_core_check_feature(ring->dev, DRIVER_MODESET))
424 		i915_kernel_lost_context(ring->dev);
425 	else {
426 		ring->head = I915_READ_HEAD(ring);
427 		ring->tail = I915_READ_TAIL(ring) & TAIL_ADDR;
428 		ring->space = ring_space(ring);
429 		ring->last_retired_head = -1;
430 	}
431 
432 out:
433 	if (HAS_FORCE_WAKE(dev))
434 		gen6_gt_force_wake_put(dev_priv);
435 
436 	return ret;
437 }
438 
439 static int
440 init_pipe_control(struct intel_ring_buffer *ring)
441 {
442 	struct pipe_control *pc;
443 	struct drm_i915_gem_object *obj;
444 	int ret;
445 
446 	if (ring->private)
447 		return 0;
448 
449 	pc = kmalloc(sizeof(*pc), M_DRM, M_WAITOK);
450 	if (!pc)
451 		return -ENOMEM;
452 
453 	obj = i915_gem_alloc_object(ring->dev, 4096);
454 	if (obj == NULL) {
455 		DRM_ERROR("Failed to allocate seqno page\n");
456 		ret = -ENOMEM;
457 		goto err;
458 	}
459 
460 	i915_gem_object_set_cache_level(obj, I915_CACHE_LLC);
461 
462 	ret = i915_gem_object_pin(obj, 4096, true, false);
463 	if (ret)
464 		goto err_unref;
465 
466 	pc->gtt_offset = obj->gtt_offset;
467 	pc->cpu_page = (uint32_t *)kmem_alloc_nofault(&kernel_map, PAGE_SIZE, PAGE_SIZE);
468 	if (pc->cpu_page == NULL)
469 		goto err_unpin;
470 
471 	pmap_qenter((uintptr_t)pc->cpu_page, &obj->pages[0], 1);
472 	pmap_invalidate_cache_range((vm_offset_t)pc->cpu_page,
473 	    (vm_offset_t)pc->cpu_page + PAGE_SIZE);
474 	DRM_DEBUG_DRIVER("%s pipe control offset: 0x%08x\n",
475 			 ring->name, pc->gtt_offset);
476 
477 	pc->obj = obj;
478 	ring->private = pc;
479 	return 0;
480 
481 err_unpin:
482 	i915_gem_object_unpin(obj);
483 err_unref:
484 	drm_gem_object_unreference(&obj->base);
485 err:
486 	kfree(pc, M_DRM);
487 	return ret;
488 }
489 
490 static void
491 cleanup_pipe_control(struct intel_ring_buffer *ring)
492 {
493 	struct pipe_control *pc = ring->private;
494 	struct drm_i915_gem_object *obj;
495 
496 	if (!ring->private)
497 		return;
498 
499 	obj = pc->obj;
500 
501 	pmap_qremove((vm_offset_t)pc->cpu_page, 1);
502 	kmem_free(&kernel_map, (uintptr_t)pc->cpu_page, PAGE_SIZE);
503 	i915_gem_object_unpin(obj);
504 	drm_gem_object_unreference(&obj->base);
505 
506 	kfree(pc, M_DRM);
507 	ring->private = NULL;
508 }
509 
510 static int init_render_ring(struct intel_ring_buffer *ring)
511 {
512 	struct drm_device *dev = ring->dev;
513 	struct drm_i915_private *dev_priv = dev->dev_private;
514 	int ret = init_ring_common(ring);
515 
516 	if (INTEL_INFO(dev)->gen > 3)
517 		I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(VS_TIMER_DISPATCH));
518 
519 	/* We need to disable the AsyncFlip performance optimisations in order
520 	 * to use MI_WAIT_FOR_EVENT within the CS. It should already be
521 	 * programmed to '1' on all products.
522 	 */
523 	if (INTEL_INFO(dev)->gen >= 6)
524 		I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(ASYNC_FLIP_PERF_DISABLE));
525 
526 	/* Required for the hardware to program scanline values for waiting */
527 	if (INTEL_INFO(dev)->gen == 6)
528 		I915_WRITE(GFX_MODE,
529 			   _MASKED_BIT_ENABLE(GFX_TLB_INVALIDATE_ALWAYS));
530 
531 	if (IS_GEN7(dev))
532 		I915_WRITE(GFX_MODE_GEN7,
533 			   _MASKED_BIT_DISABLE(GFX_TLB_INVALIDATE_ALWAYS) |
534 			   _MASKED_BIT_ENABLE(GFX_REPLAY_MODE));
535 
536 	if (INTEL_INFO(dev)->gen >= 5) {
537 		ret = init_pipe_control(ring);
538 		if (ret)
539 			return ret;
540 	}
541 
542 	if (IS_GEN6(dev)) {
543 		/* From the Sandybridge PRM, volume 1 part 3, page 24:
544 		 * "If this bit is set, STCunit will have LRA as replacement
545 		 *  policy. [...] This bit must be reset.  LRA replacement
546 		 *  policy is not supported."
547 		 */
548 		I915_WRITE(CACHE_MODE_0,
549 			   _MASKED_BIT_DISABLE(CM0_STC_EVICT_DISABLE_LRA_SNB));
550 
551 		/* This is not explicitly set for GEN6, so read the register.
552 		 * see intel_ring_mi_set_context() for why we care.
553 		 * TODO: consider explicitly setting the bit for GEN5
554 		 */
555 		ring->itlb_before_ctx_switch =
556 			!!(I915_READ(GFX_MODE) & GFX_TLB_INVALIDATE_ALWAYS);
557 	}
558 
559 	if (INTEL_INFO(dev)->gen >= 6)
560 		I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING));
561 
562 	if (HAS_L3_GPU_CACHE(dev))
563 		I915_WRITE_IMR(ring, ~GEN6_RENDER_L3_PARITY_ERROR);
564 
565 	return ret;
566 }
567 
568 static void render_ring_cleanup(struct intel_ring_buffer *ring)
569 {
570 	struct drm_device *dev = ring->dev;
571 
572 	if (!ring->private)
573 		return;
574 
575 	if (HAS_BROKEN_CS_TLB(dev))
576 		drm_gem_object_unreference(to_gem_object(ring->private));
577 
578 	cleanup_pipe_control(ring);
579 }
580 
581 static void
582 update_mboxes(struct intel_ring_buffer *ring,
583 	      u32 mmio_offset)
584 {
585 	intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
586 	intel_ring_emit(ring, mmio_offset);
587 	intel_ring_emit(ring, ring->outstanding_lazy_request);
588 }
589 
590 /**
591  * gen6_add_request - Update the semaphore mailbox registers
592  *
593  * @ring - ring that is adding a request
594  * @seqno - return seqno stuck into the ring
595  *
596  * Update the mailbox registers in the *other* rings with the current seqno.
597  * This acts like a signal in the canonical semaphore.
598  */
599 static int
600 gen6_add_request(struct intel_ring_buffer *ring)
601 {
602 	u32 mbox1_reg;
603 	u32 mbox2_reg;
604 	int ret;
605 
606 	ret = intel_ring_begin(ring, 10);
607 	if (ret)
608 		return ret;
609 
610 	mbox1_reg = ring->signal_mbox[0];
611 	mbox2_reg = ring->signal_mbox[1];
612 
613 	update_mboxes(ring, mbox1_reg);
614 	update_mboxes(ring, mbox2_reg);
615 	intel_ring_emit(ring, MI_STORE_DWORD_INDEX);
616 	intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
617 	intel_ring_emit(ring, ring->outstanding_lazy_request);
618 	intel_ring_emit(ring, MI_USER_INTERRUPT);
619 	intel_ring_advance(ring);
620 
621 	return 0;
622 }
623 
624 static inline bool i915_gem_has_seqno_wrapped(struct drm_device *dev,
625 					      u32 seqno)
626 {
627 	struct drm_i915_private *dev_priv = dev->dev_private;
628 	return dev_priv->last_seqno < seqno;
629 }
630 
631 /**
632  * intel_ring_sync - sync the waiter to the signaller on seqno
633  *
634  * @waiter - ring that is waiting
635  * @signaller - ring which has, or will signal
636  * @seqno - seqno which the waiter will block on
637  */
638 static int
639 gen6_ring_sync(struct intel_ring_buffer *waiter,
640 	       struct intel_ring_buffer *signaller,
641 	       u32 seqno)
642 {
643 	int ret;
644 	u32 dw1 = MI_SEMAPHORE_MBOX |
645 		  MI_SEMAPHORE_COMPARE |
646 		  MI_SEMAPHORE_REGISTER;
647 
648 	/* Throughout all of the GEM code, seqno passed implies our current
649 	 * seqno is >= the last seqno executed. However for hardware the
650 	 * comparison is strictly greater than.
651 	 */
652 	seqno -= 1;
653 
654 	WARN_ON(signaller->semaphore_register[waiter->id] ==
655 		MI_SEMAPHORE_SYNC_INVALID);
656 
657 	ret = intel_ring_begin(waiter, 4);
658 	if (ret)
659 		return ret;
660 
661 	/* If seqno wrap happened, omit the wait with no-ops */
662 	if (likely(!i915_gem_has_seqno_wrapped(waiter->dev, seqno))) {
663 		intel_ring_emit(waiter,
664 				dw1 |
665 				signaller->semaphore_register[waiter->id]);
666 		intel_ring_emit(waiter, seqno);
667 		intel_ring_emit(waiter, 0);
668 		intel_ring_emit(waiter, MI_NOOP);
669 	} else {
670 		intel_ring_emit(waiter, MI_NOOP);
671 		intel_ring_emit(waiter, MI_NOOP);
672 		intel_ring_emit(waiter, MI_NOOP);
673 		intel_ring_emit(waiter, MI_NOOP);
674 	}
675 	intel_ring_advance(waiter);
676 
677 	return 0;
678 }
679 
680 #define PIPE_CONTROL_FLUSH(ring__, addr__)					\
681 do {									\
682 	intel_ring_emit(ring__, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE |		\
683 		 PIPE_CONTROL_DEPTH_STALL);				\
684 	intel_ring_emit(ring__, (addr__) | PIPE_CONTROL_GLOBAL_GTT);			\
685 	intel_ring_emit(ring__, 0);							\
686 	intel_ring_emit(ring__, 0);							\
687 } while (0)
688 
689 static int
690 pc_render_add_request(struct intel_ring_buffer *ring)
691 {
692 	struct pipe_control *pc = ring->private;
693 	u32 scratch_addr = pc->gtt_offset + 128;
694 	int ret;
695 
696 	/* For Ironlake, MI_USER_INTERRUPT was deprecated and apparently
697 	 * incoherent with writes to memory, i.e. completely fubar,
698 	 * so we need to use PIPE_NOTIFY instead.
699 	 *
700 	 * However, we also need to workaround the qword write
701 	 * incoherence by flushing the 6 PIPE_NOTIFY buffers out to
702 	 * memory before requesting an interrupt.
703 	 */
704 	ret = intel_ring_begin(ring, 32);
705 	if (ret)
706 		return ret;
707 
708 	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE |
709 			PIPE_CONTROL_WRITE_FLUSH |
710 			PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE);
711 	intel_ring_emit(ring, pc->gtt_offset | PIPE_CONTROL_GLOBAL_GTT);
712 	intel_ring_emit(ring, ring->outstanding_lazy_request);
713 	intel_ring_emit(ring, 0);
714 	PIPE_CONTROL_FLUSH(ring, scratch_addr);
715 	scratch_addr += 128; /* write to separate cachelines */
716 	PIPE_CONTROL_FLUSH(ring, scratch_addr);
717 	scratch_addr += 128;
718 	PIPE_CONTROL_FLUSH(ring, scratch_addr);
719 	scratch_addr += 128;
720 	PIPE_CONTROL_FLUSH(ring, scratch_addr);
721 	scratch_addr += 128;
722 	PIPE_CONTROL_FLUSH(ring, scratch_addr);
723 	scratch_addr += 128;
724 	PIPE_CONTROL_FLUSH(ring, scratch_addr);
725 
726 	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE |
727 			PIPE_CONTROL_WRITE_FLUSH |
728 			PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
729 			PIPE_CONTROL_NOTIFY);
730 	intel_ring_emit(ring, pc->gtt_offset | PIPE_CONTROL_GLOBAL_GTT);
731 	intel_ring_emit(ring, ring->outstanding_lazy_request);
732 	intel_ring_emit(ring, 0);
733 	intel_ring_advance(ring);
734 
735 	return 0;
736 }
737 
738 static u32
739 gen6_ring_get_seqno(struct intel_ring_buffer *ring, bool lazy_coherency)
740 {
741 	/* Workaround to force correct ordering between irq and seqno writes on
742 	 * ivb (and maybe also on snb) by reading from a CS register (like
743 	 * ACTHD) before reading the status page. */
744 	if (!lazy_coherency)
745 		intel_ring_get_active_head(ring);
746 	return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
747 }
748 
749 static u32
750 ring_get_seqno(struct intel_ring_buffer *ring, bool lazy_coherency)
751 {
752 	return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
753 }
754 
755 static void
756 ring_set_seqno(struct intel_ring_buffer *ring, u32 seqno)
757 {
758 	intel_write_status_page(ring, I915_GEM_HWS_INDEX, seqno);
759 }
760 
761 static u32
762 pc_render_get_seqno(struct intel_ring_buffer *ring, bool lazy_coherency)
763 {
764 	struct pipe_control *pc = ring->private;
765 	return pc->cpu_page[0];
766 }
767 
768 static void
769 pc_render_set_seqno(struct intel_ring_buffer *ring, u32 seqno)
770 {
771 	struct pipe_control *pc = ring->private;
772 	pc->cpu_page[0] = seqno;
773 }
774 
775 static bool
776 gen5_ring_get_irq(struct intel_ring_buffer *ring)
777 {
778 	struct drm_device *dev = ring->dev;
779 	drm_i915_private_t *dev_priv = dev->dev_private;
780 
781 	if (!dev->irq_enabled)
782 		return false;
783 
784 	lockmgr(&dev_priv->irq_lock, LK_EXCLUSIVE);
785 	if (ring->irq_refcount++ == 0) {
786 		dev_priv->gt_irq_mask &= ~ring->irq_enable_mask;
787 		I915_WRITE(GTIMR, dev_priv->gt_irq_mask);
788 		POSTING_READ(GTIMR);
789 	}
790 	lockmgr(&dev_priv->irq_lock, LK_RELEASE);
791 
792 	return true;
793 }
794 
795 static void
796 gen5_ring_put_irq(struct intel_ring_buffer *ring)
797 {
798 	struct drm_device *dev = ring->dev;
799 	drm_i915_private_t *dev_priv = dev->dev_private;
800 
801 	lockmgr(&dev_priv->irq_lock, LK_EXCLUSIVE);
802 	if (--ring->irq_refcount == 0) {
803 		dev_priv->gt_irq_mask |= ring->irq_enable_mask;
804 		I915_WRITE(GTIMR, dev_priv->gt_irq_mask);
805 		POSTING_READ(GTIMR);
806 	}
807 	lockmgr(&dev_priv->irq_lock, LK_RELEASE);
808 }
809 
810 static bool
811 i9xx_ring_get_irq(struct intel_ring_buffer *ring)
812 {
813 	struct drm_device *dev = ring->dev;
814 	drm_i915_private_t *dev_priv = dev->dev_private;
815 
816 	if (!dev->irq_enabled)
817 		return false;
818 
819 	lockmgr(&dev_priv->irq_lock, LK_EXCLUSIVE);
820 	if (ring->irq_refcount++ == 0) {
821 		dev_priv->irq_mask &= ~ring->irq_enable_mask;
822 		I915_WRITE(IMR, dev_priv->irq_mask);
823 		POSTING_READ(IMR);
824 	}
825 	lockmgr(&dev_priv->irq_lock, LK_RELEASE);
826 
827 	return true;
828 }
829 
830 static void
831 i9xx_ring_put_irq(struct intel_ring_buffer *ring)
832 {
833 	struct drm_device *dev = ring->dev;
834 	drm_i915_private_t *dev_priv = dev->dev_private;
835 
836 	lockmgr(&dev_priv->irq_lock, LK_EXCLUSIVE);
837 	if (--ring->irq_refcount == 0) {
838 		dev_priv->irq_mask |= ring->irq_enable_mask;
839 		I915_WRITE(IMR, dev_priv->irq_mask);
840 		POSTING_READ(IMR);
841 	}
842 	lockmgr(&dev_priv->irq_lock, LK_RELEASE);
843 }
844 
845 static bool
846 i8xx_ring_get_irq(struct intel_ring_buffer *ring)
847 {
848 	struct drm_device *dev = ring->dev;
849 	drm_i915_private_t *dev_priv = dev->dev_private;
850 
851 	if (!dev->irq_enabled)
852 		return false;
853 
854 	lockmgr(&dev_priv->irq_lock, LK_EXCLUSIVE);
855 	if (ring->irq_refcount++ == 0) {
856 		dev_priv->irq_mask &= ~ring->irq_enable_mask;
857 		I915_WRITE16(IMR, dev_priv->irq_mask);
858 		POSTING_READ16(IMR);
859 	}
860 	lockmgr(&dev_priv->irq_lock, LK_RELEASE);
861 
862 	return true;
863 }
864 
865 static void
866 i8xx_ring_put_irq(struct intel_ring_buffer *ring)
867 {
868 	struct drm_device *dev = ring->dev;
869 	drm_i915_private_t *dev_priv = dev->dev_private;
870 
871 	lockmgr(&dev_priv->irq_lock, LK_EXCLUSIVE);
872 	if (--ring->irq_refcount == 0) {
873 		dev_priv->irq_mask |= ring->irq_enable_mask;
874 		I915_WRITE16(IMR, dev_priv->irq_mask);
875 		POSTING_READ16(IMR);
876 	}
877 	lockmgr(&dev_priv->irq_lock, LK_RELEASE);
878 }
879 
880 void intel_ring_setup_status_page(struct intel_ring_buffer *ring)
881 {
882 	struct drm_device *dev = ring->dev;
883 	drm_i915_private_t *dev_priv = ring->dev->dev_private;
884 	u32 mmio = 0;
885 
886 	/* The ring status page addresses are no longer next to the rest of
887 	 * the ring registers as of gen7.
888 	 */
889 	if (IS_GEN7(dev)) {
890 		switch (ring->id) {
891 		case RCS:
892 			mmio = RENDER_HWS_PGA_GEN7;
893 			break;
894 		case BCS:
895 			mmio = BLT_HWS_PGA_GEN7;
896 			break;
897 		case VCS:
898 			mmio = BSD_HWS_PGA_GEN7;
899 			break;
900 		}
901 	} else if (IS_GEN6(ring->dev)) {
902 		mmio = RING_HWS_PGA_GEN6(ring->mmio_base);
903 	} else {
904 		mmio = RING_HWS_PGA(ring->mmio_base);
905 	}
906 
907 	I915_WRITE(mmio, (u32)ring->status_page.gfx_addr);
908 	POSTING_READ(mmio);
909 }
910 
911 static int
912 bsd_ring_flush(struct intel_ring_buffer *ring,
913 	       u32     invalidate_domains,
914 	       u32     flush_domains)
915 {
916 	int ret;
917 
918 	ret = intel_ring_begin(ring, 2);
919 	if (ret)
920 		return ret;
921 
922 	intel_ring_emit(ring, MI_FLUSH);
923 	intel_ring_emit(ring, MI_NOOP);
924 	intel_ring_advance(ring);
925 	return 0;
926 }
927 
928 static int
929 i9xx_add_request(struct intel_ring_buffer *ring)
930 {
931 	int ret;
932 
933 	ret = intel_ring_begin(ring, 4);
934 	if (ret)
935 		return ret;
936 
937 	intel_ring_emit(ring, MI_STORE_DWORD_INDEX);
938 	intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
939 	intel_ring_emit(ring, ring->outstanding_lazy_request);
940 	intel_ring_emit(ring, MI_USER_INTERRUPT);
941 	intel_ring_advance(ring);
942 
943 	return 0;
944 }
945 
946 static bool
947 gen6_ring_get_irq(struct intel_ring_buffer *ring)
948 {
949 	struct drm_device *dev = ring->dev;
950 	drm_i915_private_t *dev_priv = dev->dev_private;
951 
952 	if (!dev->irq_enabled)
953 	       return false;
954 
955 	/* It looks like we need to prevent the gt from suspending while waiting
956 	 * for an notifiy irq, otherwise irqs seem to get lost on at least the
957 	 * blt/bsd rings on ivb. */
958 	gen6_gt_force_wake_get(dev_priv);
959 
960 	lockmgr(&dev_priv->irq_lock, LK_EXCLUSIVE);
961 	if (ring->irq_refcount++ == 0) {
962 		if (HAS_L3_GPU_CACHE(dev) && ring->id == RCS)
963 			I915_WRITE_IMR(ring, ~(ring->irq_enable_mask |
964 						GEN6_RENDER_L3_PARITY_ERROR));
965 		else
966 			I915_WRITE_IMR(ring, ~ring->irq_enable_mask);
967 		dev_priv->gt_irq_mask &= ~ring->irq_enable_mask;
968 		I915_WRITE(GTIMR, dev_priv->gt_irq_mask);
969 		POSTING_READ(GTIMR);
970 	}
971 	lockmgr(&dev_priv->irq_lock, LK_RELEASE);
972 
973 	return true;
974 }
975 
976 static void
977 gen6_ring_put_irq(struct intel_ring_buffer *ring)
978 {
979 	struct drm_device *dev = ring->dev;
980 	drm_i915_private_t *dev_priv = dev->dev_private;
981 
982 	lockmgr(&dev_priv->irq_lock, LK_EXCLUSIVE);
983 	if (--ring->irq_refcount == 0) {
984 		if (HAS_L3_GPU_CACHE(dev) && ring->id == RCS)
985 			I915_WRITE_IMR(ring, ~GEN6_RENDER_L3_PARITY_ERROR);
986 		else
987 			I915_WRITE_IMR(ring, ~0);
988 		dev_priv->gt_irq_mask |= ring->irq_enable_mask;
989 		I915_WRITE(GTIMR, dev_priv->gt_irq_mask);
990 		POSTING_READ(GTIMR);
991 	}
992 	lockmgr(&dev_priv->irq_lock, LK_RELEASE);
993 
994 	gen6_gt_force_wake_put(dev_priv);
995 }
996 
997 static int
998 i965_dispatch_execbuffer(struct intel_ring_buffer *ring,
999 			 u32 offset, u32 length,
1000 			 unsigned flags)
1001 {
1002 	int ret;
1003 
1004 	ret = intel_ring_begin(ring, 2);
1005 	if (ret)
1006 		return ret;
1007 
1008 	intel_ring_emit(ring,
1009 			MI_BATCH_BUFFER_START |
1010 			MI_BATCH_GTT |
1011 			(flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE_I965));
1012 	intel_ring_emit(ring, offset);
1013 	intel_ring_advance(ring);
1014 
1015 	return 0;
1016 }
1017 
1018 /* Just userspace ABI convention to limit the wa batch bo to a resonable size */
1019 #define I830_BATCH_LIMIT (256*1024)
1020 static int
1021 i830_dispatch_execbuffer(struct intel_ring_buffer *ring,
1022 				u32 offset, u32 len,
1023 				unsigned flags)
1024 {
1025 	int ret;
1026 
1027 	if (flags & I915_DISPATCH_PINNED) {
1028 		ret = intel_ring_begin(ring, 4);
1029 		if (ret)
1030 			return ret;
1031 
1032 		intel_ring_emit(ring, MI_BATCH_BUFFER);
1033 		intel_ring_emit(ring, offset | (flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE));
1034 		intel_ring_emit(ring, offset + len - 8);
1035 		intel_ring_emit(ring, MI_NOOP);
1036 		intel_ring_advance(ring);
1037 	} else {
1038 		struct drm_i915_gem_object *obj = ring->private;
1039 		u32 cs_offset = obj->gtt_offset;
1040 
1041 		if (len > I830_BATCH_LIMIT)
1042 			return -ENOSPC;
1043 
1044 		ret = intel_ring_begin(ring, 9+3);
1045 		if (ret)
1046 			return ret;
1047 		/* Blit the batch (which has now all relocs applied) to the stable batch
1048 		 * scratch bo area (so that the CS never stumbles over its tlb
1049 		 * invalidation bug) ... */
1050 		intel_ring_emit(ring, XY_SRC_COPY_BLT_CMD |
1051 				XY_SRC_COPY_BLT_WRITE_ALPHA |
1052 				XY_SRC_COPY_BLT_WRITE_RGB);
1053 		intel_ring_emit(ring, BLT_DEPTH_32 | BLT_ROP_GXCOPY | 4096);
1054 		intel_ring_emit(ring, 0);
1055 		intel_ring_emit(ring, (DIV_ROUND_UP(len, 4096) << 16) | 1024);
1056 		intel_ring_emit(ring, cs_offset);
1057 		intel_ring_emit(ring, 0);
1058 		intel_ring_emit(ring, 4096);
1059 		intel_ring_emit(ring, offset);
1060 		intel_ring_emit(ring, MI_FLUSH);
1061 
1062 		/* ... and execute it. */
1063 		intel_ring_emit(ring, MI_BATCH_BUFFER);
1064 		intel_ring_emit(ring, cs_offset | (flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE));
1065 		intel_ring_emit(ring, cs_offset + len - 8);
1066 		intel_ring_advance(ring);
1067 	}
1068 
1069 	return 0;
1070 }
1071 
1072 static int
1073 i915_dispatch_execbuffer(struct intel_ring_buffer *ring,
1074 			 u32 offset, u32 len,
1075 			 unsigned flags)
1076 {
1077 	int ret;
1078 
1079 	ret = intel_ring_begin(ring, 2);
1080 	if (ret)
1081 		return ret;
1082 
1083 	intel_ring_emit(ring, MI_BATCH_BUFFER_START | MI_BATCH_GTT);
1084 	intel_ring_emit(ring, offset | (flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE));
1085 	intel_ring_advance(ring);
1086 
1087 	return 0;
1088 }
1089 
1090 static void cleanup_status_page(struct intel_ring_buffer *ring)
1091 {
1092 	struct drm_i915_gem_object *obj;
1093 
1094 	obj = ring->status_page.obj;
1095 	if (obj == NULL)
1096 		return;
1097 
1098 	pmap_qremove((vm_offset_t)ring->status_page.page_addr, 1);
1099 	kmem_free(&kernel_map, (vm_offset_t)ring->status_page.page_addr,
1100 	    PAGE_SIZE);
1101 	i915_gem_object_unpin(obj);
1102 	drm_gem_object_unreference(&obj->base);
1103 	ring->status_page.obj = NULL;
1104 }
1105 
1106 static int init_status_page(struct intel_ring_buffer *ring)
1107 {
1108 	struct drm_device *dev = ring->dev;
1109 	struct drm_i915_gem_object *obj;
1110 	int ret;
1111 
1112 	obj = i915_gem_alloc_object(dev, 4096);
1113 	if (obj == NULL) {
1114 		DRM_ERROR("Failed to allocate status page\n");
1115 		ret = -ENOMEM;
1116 		goto err;
1117 	}
1118 
1119 	i915_gem_object_set_cache_level(obj, I915_CACHE_LLC);
1120 
1121 	ret = i915_gem_object_pin(obj, 4096, true, false);
1122 	if (ret != 0) {
1123 		goto err_unref;
1124 	}
1125 
1126 	ring->status_page.gfx_addr = obj->gtt_offset;
1127 	ring->status_page.page_addr = (void *)kmem_alloc_nofault(&kernel_map,
1128 	    PAGE_SIZE, PAGE_SIZE);
1129 	if (ring->status_page.page_addr == NULL) {
1130 		ret = -ENOMEM;
1131 		goto err_unpin;
1132 	}
1133 	pmap_qenter((vm_offset_t)ring->status_page.page_addr, &obj->pages[0], 1);
1134 	pmap_invalidate_cache_range((vm_offset_t)ring->status_page.page_addr,
1135 	    (vm_offset_t)ring->status_page.page_addr + PAGE_SIZE);
1136 	ring->status_page.obj = obj;
1137 	memset(ring->status_page.page_addr, 0, PAGE_SIZE);
1138 
1139 	intel_ring_setup_status_page(ring);
1140 	DRM_DEBUG_DRIVER("%s hws offset: 0x%08x\n",
1141 			ring->name, ring->status_page.gfx_addr);
1142 
1143 	return 0;
1144 
1145 err_unpin:
1146 	i915_gem_object_unpin(obj);
1147 err_unref:
1148 	drm_gem_object_unreference(&obj->base);
1149 err:
1150 	return ret;
1151 }
1152 
1153 static int init_phys_hws_pga(struct intel_ring_buffer *ring)
1154 {
1155 	struct drm_i915_private *dev_priv = ring->dev->dev_private;
1156 	u32 addr;
1157 
1158 	if (!dev_priv->status_page_dmah) {
1159 		dev_priv->status_page_dmah =
1160 			drm_pci_alloc(ring->dev, PAGE_SIZE, PAGE_SIZE);
1161 		if (!dev_priv->status_page_dmah)
1162 			return -ENOMEM;
1163 	}
1164 
1165 	addr = dev_priv->status_page_dmah->busaddr;
1166 	if (INTEL_INFO(ring->dev)->gen >= 4)
1167 		addr |= (dev_priv->status_page_dmah->busaddr >> 28) & 0xf0;
1168 	I915_WRITE(HWS_PGA, addr);
1169 
1170 	ring->status_page.page_addr = dev_priv->status_page_dmah->vaddr;
1171 	memset(ring->status_page.page_addr, 0, PAGE_SIZE);
1172 
1173 	return 0;
1174 }
1175 
1176 static int intel_init_ring_buffer(struct drm_device *dev,
1177 				  struct intel_ring_buffer *ring)
1178 {
1179 	struct drm_i915_gem_object *obj;
1180 	int ret;
1181 
1182 	ring->dev = dev;
1183 	INIT_LIST_HEAD(&ring->active_list);
1184 	INIT_LIST_HEAD(&ring->request_list);
1185 	ring->size = 32 * PAGE_SIZE;
1186 	memset(ring->sync_seqno, 0, sizeof(ring->sync_seqno));
1187 
1188 	init_waitqueue_head(&ring->irq_queue);
1189 
1190 	if (I915_NEED_GFX_HWS(dev)) {
1191 		ret = init_status_page(ring);
1192 		if (ret)
1193 			return ret;
1194 	} else {
1195 		BUG_ON(ring->id != RCS);
1196 		ret = init_phys_hws_pga(ring);
1197 		if (ret)
1198 			return ret;
1199 	}
1200 
1201 	obj = NULL;
1202 	if (!HAS_LLC(dev))
1203 		obj = i915_gem_alloc_object(dev, ring->size);
1204 	if (obj == NULL)
1205 		obj = i915_gem_alloc_object(dev, ring->size);
1206 	if (obj == NULL) {
1207 		DRM_ERROR("Failed to allocate ringbuffer\n");
1208 		ret = -ENOMEM;
1209 		goto err_hws;
1210 	}
1211 
1212 	ring->obj = obj;
1213 
1214 	ret = i915_gem_object_pin(obj, PAGE_SIZE, true, false);
1215 	if (ret)
1216 		goto err_unref;
1217 
1218 	ret = i915_gem_object_set_to_gtt_domain(obj, true);
1219 	if (ret)
1220 		goto err_unpin;
1221 
1222 	ring->virtual_start =
1223 		ioremap_wc(dev->agp->base + obj->gtt_offset,
1224 			   ring->size);
1225 	if (ring->virtual_start == NULL) {
1226 		DRM_ERROR("Failed to map ringbuffer.\n");
1227 		ret = -EINVAL;
1228 		goto err_unpin;
1229 	}
1230 
1231 	ret = ring->init(ring);
1232 	if (ret)
1233 		goto err_unmap;
1234 
1235 	/* Workaround an erratum on the i830 which causes a hang if
1236 	 * the TAIL pointer points to within the last 2 cachelines
1237 	 * of the buffer.
1238 	 */
1239 	ring->effective_size = ring->size;
1240 	if (IS_I830(ring->dev) || IS_845G(ring->dev))
1241 		ring->effective_size -= 128;
1242 
1243 	return 0;
1244 
1245 err_unmap:
1246 	pmap_unmapdev((vm_offset_t)ring->virtual_start, ring->size);
1247 err_unpin:
1248 	i915_gem_object_unpin(obj);
1249 err_unref:
1250 	drm_gem_object_unreference(&obj->base);
1251 	ring->obj = NULL;
1252 err_hws:
1253 	cleanup_status_page(ring);
1254 	return ret;
1255 }
1256 
1257 void intel_cleanup_ring_buffer(struct intel_ring_buffer *ring)
1258 {
1259 	struct drm_i915_private *dev_priv;
1260 	int ret;
1261 
1262 	if (ring->obj == NULL)
1263 		return;
1264 
1265 	/* Disable the ring buffer. The ring must be idle at this point */
1266 	dev_priv = ring->dev->dev_private;
1267 	ret = intel_ring_idle(ring);
1268 	if (ret)
1269 		DRM_ERROR("failed to quiesce %s whilst cleaning up: %d\n",
1270 			  ring->name, ret);
1271 
1272 	I915_WRITE_CTL(ring, 0);
1273 
1274 	pmap_unmapdev((vm_offset_t)ring->virtual_start, ring->size);
1275 
1276 	i915_gem_object_unpin(ring->obj);
1277 	drm_gem_object_unreference(&ring->obj->base);
1278 	ring->obj = NULL;
1279 
1280 	if (ring->cleanup)
1281 		ring->cleanup(ring);
1282 
1283 	cleanup_status_page(ring);
1284 }
1285 
1286 static int intel_ring_wait_seqno(struct intel_ring_buffer *ring, u32 seqno)
1287 {
1288 	int ret;
1289 
1290 	ret = i915_wait_seqno(ring, seqno);
1291 	if (!ret)
1292 		i915_gem_retire_requests_ring(ring);
1293 
1294 	return ret;
1295 }
1296 
1297 static int intel_ring_wait_request(struct intel_ring_buffer *ring, int n)
1298 {
1299 	struct drm_i915_gem_request *request;
1300 	u32 seqno = 0;
1301 	int ret;
1302 
1303 	i915_gem_retire_requests_ring(ring);
1304 
1305 	if (ring->last_retired_head != -1) {
1306 		ring->head = ring->last_retired_head;
1307 		ring->last_retired_head = -1;
1308 		ring->space = ring_space(ring);
1309 		if (ring->space >= n)
1310 			return 0;
1311 	}
1312 
1313 	list_for_each_entry(request, &ring->request_list, list) {
1314 		int space;
1315 
1316 		if (request->tail == -1)
1317 			continue;
1318 
1319 		space = request->tail - (ring->tail + I915_RING_FREE_SPACE);
1320 		if (space < 0)
1321 			space += ring->size;
1322 		if (space >= n) {
1323 			seqno = request->seqno;
1324 			break;
1325 		}
1326 
1327 		/* Consume this request in case we need more space than
1328 		 * is available and so need to prevent a race between
1329 		 * updating last_retired_head and direct reads of
1330 		 * I915_RING_HEAD. It also provides a nice sanity check.
1331 		 */
1332 		request->tail = -1;
1333 	}
1334 
1335 	if (seqno == 0)
1336 		return -ENOSPC;
1337 
1338 	ret = intel_ring_wait_seqno(ring, seqno);
1339 	if (ret)
1340 		return ret;
1341 
1342 	if (WARN_ON(ring->last_retired_head == -1))
1343 		return -ENOSPC;
1344 
1345 	ring->head = ring->last_retired_head;
1346 	ring->last_retired_head = -1;
1347 	ring->space = ring_space(ring);
1348 	if (WARN_ON(ring->space < n))
1349 		return -ENOSPC;
1350 
1351 	return 0;
1352 }
1353 
1354 static int ring_wait_for_space(struct intel_ring_buffer *ring, int n)
1355 {
1356 	struct drm_device *dev = ring->dev;
1357 	struct drm_i915_private *dev_priv = dev->dev_private;
1358 	unsigned long end;
1359 	int ret;
1360 
1361 	ret = intel_ring_wait_request(ring, n);
1362 	if (ret != -ENOSPC)
1363 		return ret;
1364 
1365 	trace_i915_ring_wait_begin(ring);
1366 	/* With GEM the hangcheck timer should kick us out of the loop,
1367 	 * leaving it early runs the risk of corrupting GEM state (due
1368 	 * to running on almost untested codepaths). But on resume
1369 	 * timers don't work yet, so prevent a complete hang in that
1370 	 * case by choosing an insanely large timeout. */
1371 	end = jiffies + 60 * HZ;
1372 
1373 	do {
1374 		ring->head = I915_READ_HEAD(ring);
1375 		ring->space = ring_space(ring);
1376 		if (ring->space >= n) {
1377 			trace_i915_ring_wait_end(ring);
1378 			return 0;
1379 		}
1380 
1381 #if 0
1382 		if (dev->primary->master) {
1383 			struct drm_i915_master_private *master_priv = dev->primary->master->driver_priv;
1384 			if (master_priv->sarea_priv)
1385 				master_priv->sarea_priv->perf_boxes |= I915_BOX_WAIT;
1386 		}
1387 #else
1388 		if (dev_priv->sarea_priv)
1389 			dev_priv->sarea_priv->perf_boxes |= I915_BOX_WAIT;
1390 #endif
1391 
1392 		msleep(1);
1393 
1394 		ret = i915_gem_check_wedge(&dev_priv->gpu_error,
1395 					   dev_priv->mm.interruptible);
1396 		if (ret)
1397 			return ret;
1398 	} while (!time_after(jiffies, end));
1399 	trace_i915_ring_wait_end(ring);
1400 	return -EBUSY;
1401 }
1402 
1403 static int intel_wrap_ring_buffer(struct intel_ring_buffer *ring)
1404 {
1405 	uint32_t __iomem *virt;
1406 	int rem = ring->size - ring->tail;
1407 
1408 	if (ring->space < rem) {
1409 		int ret = ring_wait_for_space(ring, rem);
1410 		if (ret)
1411 			return ret;
1412 	}
1413 
1414 	virt = (unsigned int *)((char *)ring->virtual_start + ring->tail);
1415 	rem /= 4;
1416 	while (rem--)
1417 		iowrite32(MI_NOOP, virt++);
1418 
1419 	ring->tail = 0;
1420 	ring->space = ring_space(ring);
1421 
1422 	return 0;
1423 }
1424 
1425 int intel_ring_idle(struct intel_ring_buffer *ring)
1426 {
1427 	u32 seqno;
1428 	int ret;
1429 
1430 	/* We need to add any requests required to flush the objects and ring */
1431 	if (ring->outstanding_lazy_request) {
1432 		ret = i915_add_request(ring, NULL, NULL);
1433 		if (ret)
1434 			return ret;
1435 	}
1436 
1437 	/* Wait upon the last request to be completed */
1438 	if (list_empty(&ring->request_list))
1439 		return 0;
1440 
1441 	seqno = list_entry(ring->request_list.prev,
1442 			   struct drm_i915_gem_request,
1443 			   list)->seqno;
1444 
1445 	return i915_wait_seqno(ring, seqno);
1446 }
1447 
1448 static int
1449 intel_ring_alloc_seqno(struct intel_ring_buffer *ring)
1450 {
1451 	if (ring->outstanding_lazy_request)
1452 		return 0;
1453 
1454 	return i915_gem_get_seqno(ring->dev, &ring->outstanding_lazy_request);
1455 }
1456 
1457 static int __intel_ring_begin(struct intel_ring_buffer *ring,
1458 			      int bytes)
1459 {
1460 	int ret;
1461 
1462 	if (unlikely(ring->tail + bytes > ring->effective_size)) {
1463 		ret = intel_wrap_ring_buffer(ring);
1464 		if (unlikely(ret))
1465 			return ret;
1466 	}
1467 
1468 	if (unlikely(ring->space < bytes)) {
1469 		ret = ring_wait_for_space(ring, bytes);
1470 		if (unlikely(ret))
1471 			return ret;
1472 	}
1473 
1474 	ring->space -= bytes;
1475 	return 0;
1476 }
1477 
1478 int intel_ring_begin(struct intel_ring_buffer *ring,
1479 		     int num_dwords)
1480 {
1481 	drm_i915_private_t *dev_priv = ring->dev->dev_private;
1482 	int ret;
1483 
1484 	ret = i915_gem_check_wedge(&dev_priv->gpu_error,
1485 				   dev_priv->mm.interruptible);
1486 	if (ret)
1487 		return ret;
1488 
1489 	/* Preallocate the olr before touching the ring */
1490 	ret = intel_ring_alloc_seqno(ring);
1491 	if (ret)
1492 		return ret;
1493 
1494 	return __intel_ring_begin(ring, num_dwords * sizeof(uint32_t));
1495 }
1496 
1497 void intel_ring_init_seqno(struct intel_ring_buffer *ring, u32 seqno)
1498 {
1499 	struct drm_i915_private *dev_priv = ring->dev->dev_private;
1500 
1501 	BUG_ON(ring->outstanding_lazy_request);
1502 
1503 	if (INTEL_INFO(ring->dev)->gen >= 6) {
1504 		I915_WRITE(RING_SYNC_0(ring->mmio_base), 0);
1505 		I915_WRITE(RING_SYNC_1(ring->mmio_base), 0);
1506 	}
1507 
1508 	ring->set_seqno(ring, seqno);
1509 }
1510 
1511 void intel_ring_advance(struct intel_ring_buffer *ring)
1512 {
1513 	struct drm_i915_private *dev_priv = ring->dev->dev_private;
1514 
1515 	ring->tail &= ring->size - 1;
1516 	if (dev_priv->gpu_error.stop_rings & intel_ring_flag(ring))
1517 		return;
1518 	ring->write_tail(ring, ring->tail);
1519 }
1520 
1521 
1522 static void gen6_bsd_ring_write_tail(struct intel_ring_buffer *ring,
1523 				     u32 value)
1524 {
1525 	drm_i915_private_t *dev_priv = ring->dev->dev_private;
1526 
1527        /* Every tail move must follow the sequence below */
1528 
1529 	/* Disable notification that the ring is IDLE. The GT
1530 	 * will then assume that it is busy and bring it out of rc6.
1531 	 */
1532 	I915_WRITE(GEN6_BSD_SLEEP_PSMI_CONTROL,
1533 		   _MASKED_BIT_ENABLE(GEN6_BSD_SLEEP_MSG_DISABLE));
1534 
1535 	/* Clear the context id. Here be magic! */
1536 	I915_WRITE64(GEN6_BSD_RNCID, 0x0);
1537 
1538 	/* Wait for the ring not to be idle, i.e. for it to wake up. */
1539 	if (wait_for((I915_READ(GEN6_BSD_SLEEP_PSMI_CONTROL) &
1540 		      GEN6_BSD_SLEEP_INDICATOR) == 0,
1541 		     50))
1542 		DRM_ERROR("timed out waiting for the BSD ring to wake up\n");
1543 
1544 	/* Now that the ring is fully powered up, update the tail */
1545 	I915_WRITE_TAIL(ring, value);
1546 	POSTING_READ(RING_TAIL(ring->mmio_base));
1547 
1548 	/* Let the ring send IDLE messages to the GT again,
1549 	 * and so let it sleep to conserve power when idle.
1550 	 */
1551 	I915_WRITE(GEN6_BSD_SLEEP_PSMI_CONTROL,
1552 		   _MASKED_BIT_DISABLE(GEN6_BSD_SLEEP_MSG_DISABLE));
1553 }
1554 
1555 static int gen6_ring_flush(struct intel_ring_buffer *ring,
1556 			   u32 invalidate, u32 flush)
1557 {
1558 	uint32_t cmd;
1559 	int ret;
1560 
1561 	ret = intel_ring_begin(ring, 4);
1562 	if (ret)
1563 		return ret;
1564 
1565 	cmd = MI_FLUSH_DW;
1566 	/*
1567 	 * Bspec vol 1c.5 - video engine command streamer:
1568 	 * "If ENABLED, all TLBs will be invalidated once the flush
1569 	 * operation is complete. This bit is only valid when the
1570 	 * Post-Sync Operation field is a value of 1h or 3h."
1571 	 */
1572 	if (invalidate & I915_GEM_GPU_DOMAINS)
1573 		cmd |= MI_INVALIDATE_TLB | MI_INVALIDATE_BSD |
1574 			MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
1575 	intel_ring_emit(ring, cmd);
1576 	intel_ring_emit(ring, I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT);
1577 	intel_ring_emit(ring, 0);
1578 	intel_ring_emit(ring, MI_NOOP);
1579 	intel_ring_advance(ring);
1580 	return 0;
1581 }
1582 
1583 static int
1584 hsw_ring_dispatch_execbuffer(struct intel_ring_buffer *ring,
1585 			      u32 offset, u32 len,
1586 			      unsigned flags)
1587 {
1588 	int ret;
1589 
1590 	ret = intel_ring_begin(ring, 2);
1591 	if (ret)
1592 		return ret;
1593 
1594 	intel_ring_emit(ring,
1595 			MI_BATCH_BUFFER_START | MI_BATCH_PPGTT_HSW |
1596 			(flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE_HSW));
1597 	/* bit0-7 is the length on GEN6+ */
1598 	intel_ring_emit(ring, offset);
1599 	intel_ring_advance(ring);
1600 
1601 	return 0;
1602 }
1603 
1604 static int
1605 gen6_ring_dispatch_execbuffer(struct intel_ring_buffer *ring,
1606 			      u32 offset, u32 len,
1607 			      unsigned flags)
1608 {
1609 	int ret;
1610 
1611 	ret = intel_ring_begin(ring, 2);
1612 	if (ret)
1613 		return ret;
1614 
1615 	intel_ring_emit(ring,
1616 			MI_BATCH_BUFFER_START |
1617 			(flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE_I965));
1618 	/* bit0-7 is the length on GEN6+ */
1619 	intel_ring_emit(ring, offset);
1620 	intel_ring_advance(ring);
1621 
1622 	return 0;
1623 }
1624 
1625 /* Blitter support (SandyBridge+) */
1626 
1627 static int blt_ring_flush(struct intel_ring_buffer *ring,
1628 			  u32 invalidate, u32 flush)
1629 {
1630 	uint32_t cmd;
1631 	int ret;
1632 
1633 	ret = intel_ring_begin(ring, 4);
1634 	if (ret)
1635 		return ret;
1636 
1637 	cmd = MI_FLUSH_DW;
1638 	/*
1639 	 * Bspec vol 1c.3 - blitter engine command streamer:
1640 	 * "If ENABLED, all TLBs will be invalidated once the flush
1641 	 * operation is complete. This bit is only valid when the
1642 	 * Post-Sync Operation field is a value of 1h or 3h."
1643 	 */
1644 	if (invalidate & I915_GEM_DOMAIN_RENDER)
1645 		cmd |= MI_INVALIDATE_TLB | MI_FLUSH_DW_STORE_INDEX |
1646 			MI_FLUSH_DW_OP_STOREDW;
1647 	intel_ring_emit(ring, cmd);
1648 	intel_ring_emit(ring, I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT);
1649 	intel_ring_emit(ring, 0);
1650 	intel_ring_emit(ring, MI_NOOP);
1651 	intel_ring_advance(ring);
1652 	return 0;
1653 }
1654 
1655 int intel_init_render_ring_buffer(struct drm_device *dev)
1656 {
1657 	drm_i915_private_t *dev_priv = dev->dev_private;
1658 	struct intel_ring_buffer *ring = &dev_priv->ring[RCS];
1659 
1660 	ring->name = "render ring";
1661 	ring->id = RCS;
1662 	ring->mmio_base = RENDER_RING_BASE;
1663 
1664 	if (INTEL_INFO(dev)->gen >= 6) {
1665 		ring->add_request = gen6_add_request;
1666 		ring->flush = gen7_render_ring_flush;
1667 		if (INTEL_INFO(dev)->gen == 6)
1668 			ring->flush = gen6_render_ring_flush;
1669 		ring->irq_get = gen6_ring_get_irq;
1670 		ring->irq_put = gen6_ring_put_irq;
1671 		ring->irq_enable_mask = GT_USER_INTERRUPT;
1672 		ring->get_seqno = gen6_ring_get_seqno;
1673 		ring->set_seqno = ring_set_seqno;
1674 		ring->sync_to = gen6_ring_sync;
1675 		ring->semaphore_register[0] = MI_SEMAPHORE_SYNC_INVALID;
1676 		ring->semaphore_register[1] = MI_SEMAPHORE_SYNC_RV;
1677 		ring->semaphore_register[2] = MI_SEMAPHORE_SYNC_RB;
1678 		ring->signal_mbox[0] = GEN6_VRSYNC;
1679 		ring->signal_mbox[1] = GEN6_BRSYNC;
1680 	} else if (IS_GEN5(dev)) {
1681 		ring->add_request = pc_render_add_request;
1682 		ring->flush = gen4_render_ring_flush;
1683 		ring->get_seqno = pc_render_get_seqno;
1684 		ring->set_seqno = pc_render_set_seqno;
1685 		ring->irq_get = gen5_ring_get_irq;
1686 		ring->irq_put = gen5_ring_put_irq;
1687 		ring->irq_enable_mask = GT_USER_INTERRUPT | GT_PIPE_NOTIFY;
1688 	} else {
1689 		ring->add_request = i9xx_add_request;
1690 		if (INTEL_INFO(dev)->gen < 4)
1691 			ring->flush = gen2_render_ring_flush;
1692 		else
1693 			ring->flush = gen4_render_ring_flush;
1694 		ring->get_seqno = ring_get_seqno;
1695 		ring->set_seqno = ring_set_seqno;
1696 		if (IS_GEN2(dev)) {
1697 			ring->irq_get = i8xx_ring_get_irq;
1698 			ring->irq_put = i8xx_ring_put_irq;
1699 		} else {
1700 			ring->irq_get = i9xx_ring_get_irq;
1701 			ring->irq_put = i9xx_ring_put_irq;
1702 		}
1703 		ring->irq_enable_mask = I915_USER_INTERRUPT;
1704 	}
1705 	ring->write_tail = ring_write_tail;
1706 	if (IS_HASWELL(dev))
1707 		ring->dispatch_execbuffer = hsw_ring_dispatch_execbuffer;
1708 	else if (INTEL_INFO(dev)->gen >= 6)
1709 		ring->dispatch_execbuffer = gen6_ring_dispatch_execbuffer;
1710 	else if (INTEL_INFO(dev)->gen >= 4)
1711 		ring->dispatch_execbuffer = i965_dispatch_execbuffer;
1712 	else if (IS_I830(dev) || IS_845G(dev))
1713 		ring->dispatch_execbuffer = i830_dispatch_execbuffer;
1714 	else
1715 		ring->dispatch_execbuffer = i915_dispatch_execbuffer;
1716 	ring->init = init_render_ring;
1717 	ring->cleanup = render_ring_cleanup;
1718 
1719 	/* Workaround batchbuffer to combat CS tlb bug. */
1720 	if (HAS_BROKEN_CS_TLB(dev)) {
1721 		struct drm_i915_gem_object *obj;
1722 		int ret;
1723 
1724 		obj = i915_gem_alloc_object(dev, I830_BATCH_LIMIT);
1725 		if (obj == NULL) {
1726 			DRM_ERROR("Failed to allocate batch bo\n");
1727 			return -ENOMEM;
1728 		}
1729 
1730 		ret = i915_gem_object_pin(obj, 0, true, false);
1731 		if (ret != 0) {
1732 			drm_gem_object_unreference(&obj->base);
1733 			DRM_ERROR("Failed to ping batch bo\n");
1734 			return ret;
1735 		}
1736 
1737 		ring->private = obj;
1738 	}
1739 
1740 	return intel_init_ring_buffer(dev, ring);
1741 }
1742 
1743 int intel_render_ring_init_dri(struct drm_device *dev, u64 start, u32 size)
1744 {
1745 	drm_i915_private_t *dev_priv = dev->dev_private;
1746 	struct intel_ring_buffer *ring = &dev_priv->ring[RCS];
1747 	int ret;
1748 
1749 	ring->name = "render ring";
1750 	ring->id = RCS;
1751 	ring->mmio_base = RENDER_RING_BASE;
1752 
1753 	if (INTEL_INFO(dev)->gen >= 6) {
1754 		/* non-kms not supported on gen6+ */
1755 		return -ENODEV;
1756 	}
1757 
1758 	/* Note: gem is not supported on gen5/ilk without kms (the corresponding
1759 	 * gem_init ioctl returns with -ENODEV). Hence we do not need to set up
1760 	 * the special gen5 functions. */
1761 	ring->add_request = i9xx_add_request;
1762 	if (INTEL_INFO(dev)->gen < 4)
1763 		ring->flush = gen2_render_ring_flush;
1764 	else
1765 		ring->flush = gen4_render_ring_flush;
1766 	ring->get_seqno = ring_get_seqno;
1767 	ring->set_seqno = ring_set_seqno;
1768 	if (IS_GEN2(dev)) {
1769 		ring->irq_get = i8xx_ring_get_irq;
1770 		ring->irq_put = i8xx_ring_put_irq;
1771 	} else {
1772 		ring->irq_get = i9xx_ring_get_irq;
1773 		ring->irq_put = i9xx_ring_put_irq;
1774 	}
1775 	ring->irq_enable_mask = I915_USER_INTERRUPT;
1776 	ring->write_tail = ring_write_tail;
1777 	if (INTEL_INFO(dev)->gen >= 4)
1778 		ring->dispatch_execbuffer = i965_dispatch_execbuffer;
1779 	else if (IS_I830(dev) || IS_845G(dev))
1780 		ring->dispatch_execbuffer = i830_dispatch_execbuffer;
1781 	else
1782 		ring->dispatch_execbuffer = i915_dispatch_execbuffer;
1783 	ring->init = init_render_ring;
1784 	ring->cleanup = render_ring_cleanup;
1785 
1786 	ring->dev = dev;
1787 	INIT_LIST_HEAD(&ring->active_list);
1788 	INIT_LIST_HEAD(&ring->request_list);
1789 
1790 	ring->size = size;
1791 	ring->effective_size = ring->size;
1792 	if (IS_I830(ring->dev) || IS_845G(ring->dev))
1793 		ring->effective_size -= 128;
1794 
1795 	ring->virtual_start = ioremap_wc(start, size);
1796 	if (ring->virtual_start == NULL) {
1797 		DRM_ERROR("can not ioremap virtual address for"
1798 			  " ring buffer\n");
1799 		return -ENOMEM;
1800 	}
1801 
1802 	if (!I915_NEED_GFX_HWS(dev)) {
1803 		ret = init_phys_hws_pga(ring);
1804 		if (ret)
1805 			return ret;
1806 	}
1807 
1808 	return 0;
1809 }
1810 
1811 int intel_init_bsd_ring_buffer(struct drm_device *dev)
1812 {
1813 	drm_i915_private_t *dev_priv = dev->dev_private;
1814 	struct intel_ring_buffer *ring = &dev_priv->ring[VCS];
1815 
1816 	ring->name = "bsd ring";
1817 	ring->id = VCS;
1818 
1819 	ring->write_tail = ring_write_tail;
1820 	if (IS_GEN6(dev) || IS_GEN7(dev)) {
1821 		ring->mmio_base = GEN6_BSD_RING_BASE;
1822 		/* gen6 bsd needs a special wa for tail updates */
1823 		if (IS_GEN6(dev))
1824 			ring->write_tail = gen6_bsd_ring_write_tail;
1825 		ring->flush = gen6_ring_flush;
1826 		ring->add_request = gen6_add_request;
1827 		ring->get_seqno = gen6_ring_get_seqno;
1828 		ring->set_seqno = ring_set_seqno;
1829 		ring->irq_enable_mask = GEN6_BSD_USER_INTERRUPT;
1830 		ring->irq_get = gen6_ring_get_irq;
1831 		ring->irq_put = gen6_ring_put_irq;
1832 		ring->dispatch_execbuffer = gen6_ring_dispatch_execbuffer;
1833 		ring->sync_to = gen6_ring_sync;
1834 		ring->semaphore_register[0] = MI_SEMAPHORE_SYNC_VR;
1835 		ring->semaphore_register[1] = MI_SEMAPHORE_SYNC_INVALID;
1836 		ring->semaphore_register[2] = MI_SEMAPHORE_SYNC_VB;
1837 		ring->signal_mbox[0] = GEN6_RVSYNC;
1838 		ring->signal_mbox[1] = GEN6_BVSYNC;
1839 	} else {
1840 		ring->mmio_base = BSD_RING_BASE;
1841 		ring->flush = bsd_ring_flush;
1842 		ring->add_request = i9xx_add_request;
1843 		ring->get_seqno = ring_get_seqno;
1844 		ring->set_seqno = ring_set_seqno;
1845 		if (IS_GEN5(dev)) {
1846 			ring->irq_enable_mask = GT_BSD_USER_INTERRUPT;
1847 			ring->irq_get = gen5_ring_get_irq;
1848 			ring->irq_put = gen5_ring_put_irq;
1849 		} else {
1850 			ring->irq_enable_mask = I915_BSD_USER_INTERRUPT;
1851 			ring->irq_get = i9xx_ring_get_irq;
1852 			ring->irq_put = i9xx_ring_put_irq;
1853 		}
1854 		ring->dispatch_execbuffer = i965_dispatch_execbuffer;
1855 	}
1856 	ring->init = init_ring_common;
1857 
1858 	return intel_init_ring_buffer(dev, ring);
1859 }
1860 
1861 int intel_init_blt_ring_buffer(struct drm_device *dev)
1862 {
1863 	drm_i915_private_t *dev_priv = dev->dev_private;
1864 	struct intel_ring_buffer *ring = &dev_priv->ring[BCS];
1865 
1866 	ring->name = "blitter ring";
1867 	ring->id = BCS;
1868 
1869 	ring->mmio_base = BLT_RING_BASE;
1870 	ring->write_tail = ring_write_tail;
1871 	ring->flush = blt_ring_flush;
1872 	ring->add_request = gen6_add_request;
1873 	ring->get_seqno = gen6_ring_get_seqno;
1874 	ring->set_seqno = ring_set_seqno;
1875 	ring->irq_enable_mask = GEN6_BLITTER_USER_INTERRUPT;
1876 	ring->irq_get = gen6_ring_get_irq;
1877 	ring->irq_put = gen6_ring_put_irq;
1878 	ring->dispatch_execbuffer = gen6_ring_dispatch_execbuffer;
1879 	ring->sync_to = gen6_ring_sync;
1880 	ring->semaphore_register[0] = MI_SEMAPHORE_SYNC_BR;
1881 	ring->semaphore_register[1] = MI_SEMAPHORE_SYNC_BV;
1882 	ring->semaphore_register[2] = MI_SEMAPHORE_SYNC_INVALID;
1883 	ring->signal_mbox[0] = GEN6_RBSYNC;
1884 	ring->signal_mbox[1] = GEN6_VBSYNC;
1885 	ring->init = init_ring_common;
1886 
1887 	return intel_init_ring_buffer(dev, ring);
1888 }
1889 
1890 int
1891 intel_ring_flush_all_caches(struct intel_ring_buffer *ring)
1892 {
1893 	int ret;
1894 
1895 	if (!ring->gpu_caches_dirty)
1896 		return 0;
1897 
1898 	ret = ring->flush(ring, 0, I915_GEM_GPU_DOMAINS);
1899 	if (ret)
1900 		return ret;
1901 
1902 	trace_i915_gem_ring_flush(ring, 0, I915_GEM_GPU_DOMAINS);
1903 
1904 	ring->gpu_caches_dirty = false;
1905 	return 0;
1906 }
1907 
1908 int
1909 intel_ring_invalidate_all_caches(struct intel_ring_buffer *ring)
1910 {
1911 	uint32_t flush_domains;
1912 	int ret;
1913 
1914 	flush_domains = 0;
1915 	if (ring->gpu_caches_dirty)
1916 		flush_domains = I915_GEM_GPU_DOMAINS;
1917 
1918 	ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, flush_domains);
1919 	if (ret)
1920 		return ret;
1921 
1922 	trace_i915_gem_ring_flush(ring, I915_GEM_GPU_DOMAINS, flush_domains);
1923 
1924 	ring->gpu_caches_dirty = false;
1925 	return 0;
1926 }
1927