1ad8b1aafSjsg // SPDX-License-Identifier: MIT
2ad8b1aafSjsg /*
3ad8b1aafSjsg * Copyright © 2020 Intel Corporation
4ad8b1aafSjsg */
5ad8b1aafSjsg
6ad8b1aafSjsg #include "gen6_engine_cs.h"
7ad8b1aafSjsg #include "intel_engine.h"
8*1bb76ff1Sjsg #include "intel_engine_regs.h"
9ad8b1aafSjsg #include "intel_gpu_commands.h"
10ad8b1aafSjsg #include "intel_gt.h"
11ad8b1aafSjsg #include "intel_gt_irq.h"
12ad8b1aafSjsg #include "intel_gt_pm_irq.h"
13ad8b1aafSjsg #include "intel_ring.h"
14ad8b1aafSjsg
15ad8b1aafSjsg #define HWS_SCRATCH_ADDR (I915_GEM_HWS_SCRATCH * sizeof(u32))
16ad8b1aafSjsg
17ad8b1aafSjsg /*
18ad8b1aafSjsg * Emits a PIPE_CONTROL with a non-zero post-sync operation, for
19ad8b1aafSjsg * implementing two workarounds on gen6. From section 1.4.7.1
20ad8b1aafSjsg * "PIPE_CONTROL" of the Sandy Bridge PRM volume 2 part 1:
21ad8b1aafSjsg *
22ad8b1aafSjsg * [DevSNB-C+{W/A}] Before any depth stall flush (including those
23ad8b1aafSjsg * produced by non-pipelined state commands), software needs to first
24ad8b1aafSjsg * send a PIPE_CONTROL with no bits set except Post-Sync Operation !=
25ad8b1aafSjsg * 0.
26ad8b1aafSjsg *
27ad8b1aafSjsg * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush Enable
28ad8b1aafSjsg * =1, a PIPE_CONTROL with any non-zero post-sync-op is required.
29ad8b1aafSjsg *
30ad8b1aafSjsg * And the workaround for these two requires this workaround first:
31ad8b1aafSjsg *
32ad8b1aafSjsg * [Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent
33ad8b1aafSjsg * BEFORE the pipe-control with a post-sync op and no write-cache
34ad8b1aafSjsg * flushes.
35ad8b1aafSjsg *
36ad8b1aafSjsg * And this last workaround is tricky because of the requirements on
37ad8b1aafSjsg * that bit. From section 1.4.7.2.3 "Stall" of the Sandy Bridge PRM
38ad8b1aafSjsg * volume 2 part 1:
39ad8b1aafSjsg *
40ad8b1aafSjsg * "1 of the following must also be set:
41ad8b1aafSjsg * - Render Target Cache Flush Enable ([12] of DW1)
42ad8b1aafSjsg * - Depth Cache Flush Enable ([0] of DW1)
43ad8b1aafSjsg * - Stall at Pixel Scoreboard ([1] of DW1)
44ad8b1aafSjsg * - Depth Stall ([13] of DW1)
45ad8b1aafSjsg * - Post-Sync Operation ([13] of DW1)
46ad8b1aafSjsg * - Notify Enable ([8] of DW1)"
47ad8b1aafSjsg *
48ad8b1aafSjsg * The cache flushes require the workaround flush that triggered this
49ad8b1aafSjsg * one, so we can't use it. Depth stall would trigger the same.
50ad8b1aafSjsg * Post-sync nonzero is what triggered this second workaround, so we
51ad8b1aafSjsg * can't use that one either. Notify enable is IRQs, which aren't
52ad8b1aafSjsg * really our business. That leaves only stall at scoreboard.
53ad8b1aafSjsg */
54ad8b1aafSjsg static int
gen6_emit_post_sync_nonzero_flush(struct i915_request * rq)55ad8b1aafSjsg gen6_emit_post_sync_nonzero_flush(struct i915_request *rq)
56ad8b1aafSjsg {
57ad8b1aafSjsg u32 scratch_addr =
58ad8b1aafSjsg intel_gt_scratch_offset(rq->engine->gt,
59ad8b1aafSjsg INTEL_GT_SCRATCH_FIELD_RENDER_FLUSH);
60ad8b1aafSjsg u32 *cs;
61ad8b1aafSjsg
62ad8b1aafSjsg cs = intel_ring_begin(rq, 6);
63ad8b1aafSjsg if (IS_ERR(cs))
64ad8b1aafSjsg return PTR_ERR(cs);
65ad8b1aafSjsg
66ad8b1aafSjsg *cs++ = GFX_OP_PIPE_CONTROL(5);
67ad8b1aafSjsg *cs++ = PIPE_CONTROL_CS_STALL | PIPE_CONTROL_STALL_AT_SCOREBOARD;
68ad8b1aafSjsg *cs++ = scratch_addr | PIPE_CONTROL_GLOBAL_GTT;
69ad8b1aafSjsg *cs++ = 0; /* low dword */
70ad8b1aafSjsg *cs++ = 0; /* high dword */
71ad8b1aafSjsg *cs++ = MI_NOOP;
72ad8b1aafSjsg intel_ring_advance(rq, cs);
73ad8b1aafSjsg
74ad8b1aafSjsg cs = intel_ring_begin(rq, 6);
75ad8b1aafSjsg if (IS_ERR(cs))
76ad8b1aafSjsg return PTR_ERR(cs);
77ad8b1aafSjsg
78ad8b1aafSjsg *cs++ = GFX_OP_PIPE_CONTROL(5);
79ad8b1aafSjsg *cs++ = PIPE_CONTROL_QW_WRITE;
80ad8b1aafSjsg *cs++ = scratch_addr | PIPE_CONTROL_GLOBAL_GTT;
81ad8b1aafSjsg *cs++ = 0;
82ad8b1aafSjsg *cs++ = 0;
83ad8b1aafSjsg *cs++ = MI_NOOP;
84ad8b1aafSjsg intel_ring_advance(rq, cs);
85ad8b1aafSjsg
86ad8b1aafSjsg return 0;
87ad8b1aafSjsg }
88ad8b1aafSjsg
gen6_emit_flush_rcs(struct i915_request * rq,u32 mode)89ad8b1aafSjsg int gen6_emit_flush_rcs(struct i915_request *rq, u32 mode)
90ad8b1aafSjsg {
91ad8b1aafSjsg u32 scratch_addr =
92ad8b1aafSjsg intel_gt_scratch_offset(rq->engine->gt,
93ad8b1aafSjsg INTEL_GT_SCRATCH_FIELD_RENDER_FLUSH);
94ad8b1aafSjsg u32 *cs, flags = 0;
95ad8b1aafSjsg int ret;
96ad8b1aafSjsg
97ad8b1aafSjsg /* Force SNB workarounds for PIPE_CONTROL flushes */
98ad8b1aafSjsg ret = gen6_emit_post_sync_nonzero_flush(rq);
99ad8b1aafSjsg if (ret)
100ad8b1aafSjsg return ret;
101ad8b1aafSjsg
102ad8b1aafSjsg /*
103ad8b1aafSjsg * Just flush everything. Experiments have shown that reducing the
104ad8b1aafSjsg * number of bits based on the write domains has little performance
105ad8b1aafSjsg * impact. And when rearranging requests, the order of flushes is
106ad8b1aafSjsg * unknown.
107ad8b1aafSjsg */
108ad8b1aafSjsg if (mode & EMIT_FLUSH) {
109ad8b1aafSjsg flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
110ad8b1aafSjsg flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
111ad8b1aafSjsg /*
112ad8b1aafSjsg * Ensure that any following seqno writes only happen
113ad8b1aafSjsg * when the render cache is indeed flushed.
114ad8b1aafSjsg */
115ad8b1aafSjsg flags |= PIPE_CONTROL_CS_STALL;
116ad8b1aafSjsg }
117ad8b1aafSjsg if (mode & EMIT_INVALIDATE) {
118ad8b1aafSjsg flags |= PIPE_CONTROL_TLB_INVALIDATE;
119ad8b1aafSjsg flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
120ad8b1aafSjsg flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
121ad8b1aafSjsg flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
122ad8b1aafSjsg flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
123ad8b1aafSjsg flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
124ad8b1aafSjsg /*
125ad8b1aafSjsg * TLB invalidate requires a post-sync write.
126ad8b1aafSjsg */
127ad8b1aafSjsg flags |= PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_CS_STALL;
128ad8b1aafSjsg }
129ad8b1aafSjsg
130ad8b1aafSjsg cs = intel_ring_begin(rq, 4);
131ad8b1aafSjsg if (IS_ERR(cs))
132ad8b1aafSjsg return PTR_ERR(cs);
133ad8b1aafSjsg
134ad8b1aafSjsg *cs++ = GFX_OP_PIPE_CONTROL(4);
135ad8b1aafSjsg *cs++ = flags;
136ad8b1aafSjsg *cs++ = scratch_addr | PIPE_CONTROL_GLOBAL_GTT;
137ad8b1aafSjsg *cs++ = 0;
138ad8b1aafSjsg intel_ring_advance(rq, cs);
139ad8b1aafSjsg
140ad8b1aafSjsg return 0;
141ad8b1aafSjsg }
142ad8b1aafSjsg
gen6_emit_breadcrumb_rcs(struct i915_request * rq,u32 * cs)143ad8b1aafSjsg u32 *gen6_emit_breadcrumb_rcs(struct i915_request *rq, u32 *cs)
144ad8b1aafSjsg {
145ad8b1aafSjsg /* First we do the gen6_emit_post_sync_nonzero_flush w/a */
146ad8b1aafSjsg *cs++ = GFX_OP_PIPE_CONTROL(4);
147ad8b1aafSjsg *cs++ = PIPE_CONTROL_CS_STALL | PIPE_CONTROL_STALL_AT_SCOREBOARD;
148ad8b1aafSjsg *cs++ = 0;
149ad8b1aafSjsg *cs++ = 0;
150ad8b1aafSjsg
151ad8b1aafSjsg *cs++ = GFX_OP_PIPE_CONTROL(4);
152ad8b1aafSjsg *cs++ = PIPE_CONTROL_QW_WRITE;
153ad8b1aafSjsg *cs++ = intel_gt_scratch_offset(rq->engine->gt,
154ad8b1aafSjsg INTEL_GT_SCRATCH_FIELD_DEFAULT) |
155ad8b1aafSjsg PIPE_CONTROL_GLOBAL_GTT;
156ad8b1aafSjsg *cs++ = 0;
157ad8b1aafSjsg
158ad8b1aafSjsg /* Finally we can flush and with it emit the breadcrumb */
159ad8b1aafSjsg *cs++ = GFX_OP_PIPE_CONTROL(4);
160ad8b1aafSjsg *cs++ = (PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
161ad8b1aafSjsg PIPE_CONTROL_DEPTH_CACHE_FLUSH |
162ad8b1aafSjsg PIPE_CONTROL_DC_FLUSH_ENABLE |
163ad8b1aafSjsg PIPE_CONTROL_QW_WRITE |
164ad8b1aafSjsg PIPE_CONTROL_CS_STALL);
1655ca02815Sjsg *cs++ = i915_request_active_seqno(rq) |
166ad8b1aafSjsg PIPE_CONTROL_GLOBAL_GTT;
167ad8b1aafSjsg *cs++ = rq->fence.seqno;
168ad8b1aafSjsg
169ad8b1aafSjsg *cs++ = MI_USER_INTERRUPT;
170ad8b1aafSjsg *cs++ = MI_NOOP;
171ad8b1aafSjsg
172ad8b1aafSjsg rq->tail = intel_ring_offset(rq, cs);
173ad8b1aafSjsg assert_ring_tail_valid(rq->ring, rq->tail);
174ad8b1aafSjsg
175ad8b1aafSjsg return cs;
176ad8b1aafSjsg }
177ad8b1aafSjsg
mi_flush_dw(struct i915_request * rq,u32 flags)178ad8b1aafSjsg static int mi_flush_dw(struct i915_request *rq, u32 flags)
179ad8b1aafSjsg {
180ad8b1aafSjsg u32 cmd, *cs;
181ad8b1aafSjsg
182ad8b1aafSjsg cs = intel_ring_begin(rq, 4);
183ad8b1aafSjsg if (IS_ERR(cs))
184ad8b1aafSjsg return PTR_ERR(cs);
185ad8b1aafSjsg
186ad8b1aafSjsg cmd = MI_FLUSH_DW;
187ad8b1aafSjsg
188ad8b1aafSjsg /*
189ad8b1aafSjsg * We always require a command barrier so that subsequent
190ad8b1aafSjsg * commands, such as breadcrumb interrupts, are strictly ordered
191ad8b1aafSjsg * wrt the contents of the write cache being flushed to memory
192ad8b1aafSjsg * (and thus being coherent from the CPU).
193ad8b1aafSjsg */
194ad8b1aafSjsg cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
195ad8b1aafSjsg
196ad8b1aafSjsg /*
197ad8b1aafSjsg * Bspec vol 1c.3 - blitter engine command streamer:
198ad8b1aafSjsg * "If ENABLED, all TLBs will be invalidated once the flush
199ad8b1aafSjsg * operation is complete. This bit is only valid when the
200ad8b1aafSjsg * Post-Sync Operation field is a value of 1h or 3h."
201ad8b1aafSjsg */
202ad8b1aafSjsg cmd |= flags;
203ad8b1aafSjsg
204ad8b1aafSjsg *cs++ = cmd;
205ad8b1aafSjsg *cs++ = HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT;
206ad8b1aafSjsg *cs++ = 0;
207ad8b1aafSjsg *cs++ = MI_NOOP;
208ad8b1aafSjsg
209ad8b1aafSjsg intel_ring_advance(rq, cs);
210ad8b1aafSjsg
211ad8b1aafSjsg return 0;
212ad8b1aafSjsg }
213ad8b1aafSjsg
gen6_flush_dw(struct i915_request * rq,u32 mode,u32 invflags)214ad8b1aafSjsg static int gen6_flush_dw(struct i915_request *rq, u32 mode, u32 invflags)
215ad8b1aafSjsg {
216ad8b1aafSjsg return mi_flush_dw(rq, mode & EMIT_INVALIDATE ? invflags : 0);
217ad8b1aafSjsg }
218ad8b1aafSjsg
gen6_emit_flush_xcs(struct i915_request * rq,u32 mode)219ad8b1aafSjsg int gen6_emit_flush_xcs(struct i915_request *rq, u32 mode)
220ad8b1aafSjsg {
221ad8b1aafSjsg return gen6_flush_dw(rq, mode, MI_INVALIDATE_TLB);
222ad8b1aafSjsg }
223ad8b1aafSjsg
gen6_emit_flush_vcs(struct i915_request * rq,u32 mode)224ad8b1aafSjsg int gen6_emit_flush_vcs(struct i915_request *rq, u32 mode)
225ad8b1aafSjsg {
226ad8b1aafSjsg return gen6_flush_dw(rq, mode, MI_INVALIDATE_TLB | MI_INVALIDATE_BSD);
227ad8b1aafSjsg }
228ad8b1aafSjsg
gen6_emit_bb_start(struct i915_request * rq,u64 offset,u32 len,unsigned int dispatch_flags)229ad8b1aafSjsg int gen6_emit_bb_start(struct i915_request *rq,
230ad8b1aafSjsg u64 offset, u32 len,
231ad8b1aafSjsg unsigned int dispatch_flags)
232ad8b1aafSjsg {
233ad8b1aafSjsg u32 security;
234ad8b1aafSjsg u32 *cs;
235ad8b1aafSjsg
236ad8b1aafSjsg security = MI_BATCH_NON_SECURE_I965;
237ad8b1aafSjsg if (dispatch_flags & I915_DISPATCH_SECURE)
238ad8b1aafSjsg security = 0;
239ad8b1aafSjsg
240ad8b1aafSjsg cs = intel_ring_begin(rq, 2);
241ad8b1aafSjsg if (IS_ERR(cs))
242ad8b1aafSjsg return PTR_ERR(cs);
243ad8b1aafSjsg
244ad8b1aafSjsg cs = __gen6_emit_bb_start(cs, offset, security);
245ad8b1aafSjsg intel_ring_advance(rq, cs);
246ad8b1aafSjsg
247ad8b1aafSjsg return 0;
248ad8b1aafSjsg }
249ad8b1aafSjsg
250ad8b1aafSjsg int
hsw_emit_bb_start(struct i915_request * rq,u64 offset,u32 len,unsigned int dispatch_flags)251ad8b1aafSjsg hsw_emit_bb_start(struct i915_request *rq,
252ad8b1aafSjsg u64 offset, u32 len,
253ad8b1aafSjsg unsigned int dispatch_flags)
254ad8b1aafSjsg {
255ad8b1aafSjsg u32 security;
256ad8b1aafSjsg u32 *cs;
257ad8b1aafSjsg
258ad8b1aafSjsg security = MI_BATCH_PPGTT_HSW | MI_BATCH_NON_SECURE_HSW;
259ad8b1aafSjsg if (dispatch_flags & I915_DISPATCH_SECURE)
260ad8b1aafSjsg security = 0;
261ad8b1aafSjsg
262ad8b1aafSjsg cs = intel_ring_begin(rq, 2);
263ad8b1aafSjsg if (IS_ERR(cs))
264ad8b1aafSjsg return PTR_ERR(cs);
265ad8b1aafSjsg
266ad8b1aafSjsg cs = __gen6_emit_bb_start(cs, offset, security);
267ad8b1aafSjsg intel_ring_advance(rq, cs);
268ad8b1aafSjsg
269ad8b1aafSjsg return 0;
270ad8b1aafSjsg }
271ad8b1aafSjsg
gen7_stall_cs(struct i915_request * rq)272ad8b1aafSjsg static int gen7_stall_cs(struct i915_request *rq)
273ad8b1aafSjsg {
274ad8b1aafSjsg u32 *cs;
275ad8b1aafSjsg
276ad8b1aafSjsg cs = intel_ring_begin(rq, 4);
277ad8b1aafSjsg if (IS_ERR(cs))
278ad8b1aafSjsg return PTR_ERR(cs);
279ad8b1aafSjsg
280ad8b1aafSjsg *cs++ = GFX_OP_PIPE_CONTROL(4);
281ad8b1aafSjsg *cs++ = PIPE_CONTROL_CS_STALL | PIPE_CONTROL_STALL_AT_SCOREBOARD;
282ad8b1aafSjsg *cs++ = 0;
283ad8b1aafSjsg *cs++ = 0;
284ad8b1aafSjsg intel_ring_advance(rq, cs);
285ad8b1aafSjsg
286ad8b1aafSjsg return 0;
287ad8b1aafSjsg }
288ad8b1aafSjsg
gen7_emit_flush_rcs(struct i915_request * rq,u32 mode)289ad8b1aafSjsg int gen7_emit_flush_rcs(struct i915_request *rq, u32 mode)
290ad8b1aafSjsg {
291ad8b1aafSjsg u32 scratch_addr =
292ad8b1aafSjsg intel_gt_scratch_offset(rq->engine->gt,
293ad8b1aafSjsg INTEL_GT_SCRATCH_FIELD_RENDER_FLUSH);
294ad8b1aafSjsg u32 *cs, flags = 0;
295ad8b1aafSjsg
296ad8b1aafSjsg /*
297ad8b1aafSjsg * Ensure that any following seqno writes only happen when the render
298ad8b1aafSjsg * cache is indeed flushed.
299ad8b1aafSjsg *
300ad8b1aafSjsg * Workaround: 4th PIPE_CONTROL command (except the ones with only
301ad8b1aafSjsg * read-cache invalidate bits set) must have the CS_STALL bit set. We
302ad8b1aafSjsg * don't try to be clever and just set it unconditionally.
303ad8b1aafSjsg */
304ad8b1aafSjsg flags |= PIPE_CONTROL_CS_STALL;
305ad8b1aafSjsg
306ad8b1aafSjsg /*
307ad8b1aafSjsg * CS_STALL suggests at least a post-sync write.
308ad8b1aafSjsg */
309ad8b1aafSjsg flags |= PIPE_CONTROL_QW_WRITE;
310ad8b1aafSjsg flags |= PIPE_CONTROL_GLOBAL_GTT_IVB;
311ad8b1aafSjsg
312ad8b1aafSjsg /*
313ad8b1aafSjsg * Just flush everything. Experiments have shown that reducing the
314ad8b1aafSjsg * number of bits based on the write domains has little performance
315ad8b1aafSjsg * impact.
316ad8b1aafSjsg */
317ad8b1aafSjsg if (mode & EMIT_FLUSH) {
318ad8b1aafSjsg flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
319ad8b1aafSjsg flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
320ad8b1aafSjsg flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
321ad8b1aafSjsg flags |= PIPE_CONTROL_FLUSH_ENABLE;
322ad8b1aafSjsg }
323ad8b1aafSjsg if (mode & EMIT_INVALIDATE) {
324ad8b1aafSjsg flags |= PIPE_CONTROL_TLB_INVALIDATE;
325ad8b1aafSjsg flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
326ad8b1aafSjsg flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
327ad8b1aafSjsg flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
328ad8b1aafSjsg flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
329ad8b1aafSjsg flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
330ad8b1aafSjsg flags |= PIPE_CONTROL_MEDIA_STATE_CLEAR;
331ad8b1aafSjsg
332ad8b1aafSjsg /*
333ad8b1aafSjsg * Workaround: we must issue a pipe_control with CS-stall bit
334ad8b1aafSjsg * set before a pipe_control command that has the state cache
335ad8b1aafSjsg * invalidate bit set.
336ad8b1aafSjsg */
337ad8b1aafSjsg gen7_stall_cs(rq);
338ad8b1aafSjsg }
339ad8b1aafSjsg
340ad8b1aafSjsg cs = intel_ring_begin(rq, 4);
341ad8b1aafSjsg if (IS_ERR(cs))
342ad8b1aafSjsg return PTR_ERR(cs);
343ad8b1aafSjsg
344ad8b1aafSjsg *cs++ = GFX_OP_PIPE_CONTROL(4);
345ad8b1aafSjsg *cs++ = flags;
346ad8b1aafSjsg *cs++ = scratch_addr;
347ad8b1aafSjsg *cs++ = 0;
348ad8b1aafSjsg intel_ring_advance(rq, cs);
349ad8b1aafSjsg
350ad8b1aafSjsg return 0;
351ad8b1aafSjsg }
352ad8b1aafSjsg
gen7_emit_breadcrumb_rcs(struct i915_request * rq,u32 * cs)353ad8b1aafSjsg u32 *gen7_emit_breadcrumb_rcs(struct i915_request *rq, u32 *cs)
354ad8b1aafSjsg {
355ad8b1aafSjsg *cs++ = GFX_OP_PIPE_CONTROL(4);
356ad8b1aafSjsg *cs++ = (PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
357ad8b1aafSjsg PIPE_CONTROL_DEPTH_CACHE_FLUSH |
358ad8b1aafSjsg PIPE_CONTROL_DC_FLUSH_ENABLE |
359ad8b1aafSjsg PIPE_CONTROL_FLUSH_ENABLE |
360ad8b1aafSjsg PIPE_CONTROL_QW_WRITE |
361ad8b1aafSjsg PIPE_CONTROL_GLOBAL_GTT_IVB |
362ad8b1aafSjsg PIPE_CONTROL_CS_STALL);
3635ca02815Sjsg *cs++ = i915_request_active_seqno(rq);
364ad8b1aafSjsg *cs++ = rq->fence.seqno;
365ad8b1aafSjsg
366ad8b1aafSjsg *cs++ = MI_USER_INTERRUPT;
367ad8b1aafSjsg *cs++ = MI_NOOP;
368ad8b1aafSjsg
369ad8b1aafSjsg rq->tail = intel_ring_offset(rq, cs);
370ad8b1aafSjsg assert_ring_tail_valid(rq->ring, rq->tail);
371ad8b1aafSjsg
372ad8b1aafSjsg return cs;
373ad8b1aafSjsg }
374ad8b1aafSjsg
gen6_emit_breadcrumb_xcs(struct i915_request * rq,u32 * cs)375ad8b1aafSjsg u32 *gen6_emit_breadcrumb_xcs(struct i915_request *rq, u32 *cs)
376ad8b1aafSjsg {
377ad8b1aafSjsg GEM_BUG_ON(i915_request_active_timeline(rq)->hwsp_ggtt != rq->engine->status_page.vma);
3785ca02815Sjsg GEM_BUG_ON(offset_in_page(rq->hwsp_seqno) != I915_GEM_HWS_SEQNO_ADDR);
379ad8b1aafSjsg
380ad8b1aafSjsg *cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX;
381ad8b1aafSjsg *cs++ = I915_GEM_HWS_SEQNO_ADDR | MI_FLUSH_DW_USE_GTT;
382ad8b1aafSjsg *cs++ = rq->fence.seqno;
383ad8b1aafSjsg
384ad8b1aafSjsg *cs++ = MI_USER_INTERRUPT;
385ad8b1aafSjsg
386ad8b1aafSjsg rq->tail = intel_ring_offset(rq, cs);
387ad8b1aafSjsg assert_ring_tail_valid(rq->ring, rq->tail);
388ad8b1aafSjsg
389ad8b1aafSjsg return cs;
390ad8b1aafSjsg }
391ad8b1aafSjsg
392ad8b1aafSjsg #define GEN7_XCS_WA 32
gen7_emit_breadcrumb_xcs(struct i915_request * rq,u32 * cs)393ad8b1aafSjsg u32 *gen7_emit_breadcrumb_xcs(struct i915_request *rq, u32 *cs)
394ad8b1aafSjsg {
395ad8b1aafSjsg int i;
396ad8b1aafSjsg
397ad8b1aafSjsg GEM_BUG_ON(i915_request_active_timeline(rq)->hwsp_ggtt != rq->engine->status_page.vma);
3985ca02815Sjsg GEM_BUG_ON(offset_in_page(rq->hwsp_seqno) != I915_GEM_HWS_SEQNO_ADDR);
399ad8b1aafSjsg
400ad8b1aafSjsg *cs++ = MI_FLUSH_DW | MI_INVALIDATE_TLB |
401ad8b1aafSjsg MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX;
402ad8b1aafSjsg *cs++ = I915_GEM_HWS_SEQNO_ADDR | MI_FLUSH_DW_USE_GTT;
403ad8b1aafSjsg *cs++ = rq->fence.seqno;
404ad8b1aafSjsg
405ad8b1aafSjsg for (i = 0; i < GEN7_XCS_WA; i++) {
406ad8b1aafSjsg *cs++ = MI_STORE_DWORD_INDEX;
407ad8b1aafSjsg *cs++ = I915_GEM_HWS_SEQNO_ADDR;
408ad8b1aafSjsg *cs++ = rq->fence.seqno;
409ad8b1aafSjsg }
410ad8b1aafSjsg
411ad8b1aafSjsg *cs++ = MI_FLUSH_DW;
412ad8b1aafSjsg *cs++ = 0;
413ad8b1aafSjsg *cs++ = 0;
414ad8b1aafSjsg
415ad8b1aafSjsg *cs++ = MI_USER_INTERRUPT;
416ad8b1aafSjsg *cs++ = MI_NOOP;
417ad8b1aafSjsg
418ad8b1aafSjsg rq->tail = intel_ring_offset(rq, cs);
419ad8b1aafSjsg assert_ring_tail_valid(rq->ring, rq->tail);
420ad8b1aafSjsg
421ad8b1aafSjsg return cs;
422ad8b1aafSjsg }
423ad8b1aafSjsg #undef GEN7_XCS_WA
424ad8b1aafSjsg
gen6_irq_enable(struct intel_engine_cs * engine)425ad8b1aafSjsg void gen6_irq_enable(struct intel_engine_cs *engine)
426ad8b1aafSjsg {
427ad8b1aafSjsg ENGINE_WRITE(engine, RING_IMR,
428ad8b1aafSjsg ~(engine->irq_enable_mask | engine->irq_keep_mask));
429ad8b1aafSjsg
430ad8b1aafSjsg /* Flush/delay to ensure the RING_IMR is active before the GT IMR */
431ad8b1aafSjsg ENGINE_POSTING_READ(engine, RING_IMR);
432ad8b1aafSjsg
433ad8b1aafSjsg gen5_gt_enable_irq(engine->gt, engine->irq_enable_mask);
434ad8b1aafSjsg }
435ad8b1aafSjsg
gen6_irq_disable(struct intel_engine_cs * engine)436ad8b1aafSjsg void gen6_irq_disable(struct intel_engine_cs *engine)
437ad8b1aafSjsg {
438ad8b1aafSjsg ENGINE_WRITE(engine, RING_IMR, ~engine->irq_keep_mask);
439ad8b1aafSjsg gen5_gt_disable_irq(engine->gt, engine->irq_enable_mask);
440ad8b1aafSjsg }
441ad8b1aafSjsg
hsw_irq_enable_vecs(struct intel_engine_cs * engine)442ad8b1aafSjsg void hsw_irq_enable_vecs(struct intel_engine_cs *engine)
443ad8b1aafSjsg {
444ad8b1aafSjsg ENGINE_WRITE(engine, RING_IMR, ~engine->irq_enable_mask);
445ad8b1aafSjsg
446ad8b1aafSjsg /* Flush/delay to ensure the RING_IMR is active before the GT IMR */
447ad8b1aafSjsg ENGINE_POSTING_READ(engine, RING_IMR);
448ad8b1aafSjsg
449ad8b1aafSjsg gen6_gt_pm_unmask_irq(engine->gt, engine->irq_enable_mask);
450ad8b1aafSjsg }
451ad8b1aafSjsg
hsw_irq_disable_vecs(struct intel_engine_cs * engine)452ad8b1aafSjsg void hsw_irq_disable_vecs(struct intel_engine_cs *engine)
453ad8b1aafSjsg {
454ad8b1aafSjsg ENGINE_WRITE(engine, RING_IMR, ~0);
455ad8b1aafSjsg gen6_gt_pm_mask_irq(engine->gt, engine->irq_enable_mask);
456ad8b1aafSjsg }
457