xref: /openbsd-src/sys/dev/pci/drm/i915/gt/intel_lrc.c (revision 596b68695fdf626a81565d499a5929be2765afa1)
15ca02815Sjsg // SPDX-License-Identifier: MIT
2c349dbc7Sjsg /*
3c349dbc7Sjsg  * Copyright © 2014 Intel Corporation
4c349dbc7Sjsg  */
5c349dbc7Sjsg 
65ca02815Sjsg #include "gem/i915_gem_lmem.h"
7c349dbc7Sjsg 
85ca02815Sjsg #include "gen8_engine_cs.h"
9c349dbc7Sjsg #include "i915_drv.h"
10c349dbc7Sjsg #include "i915_perf.h"
111bb76ff1Sjsg #include "i915_reg.h"
121bb76ff1Sjsg #include "intel_context.h"
135ca02815Sjsg #include "intel_engine.h"
141bb76ff1Sjsg #include "intel_engine_regs.h"
155ca02815Sjsg #include "intel_gpu_commands.h"
16c349dbc7Sjsg #include "intel_gt.h"
171bb76ff1Sjsg #include "intel_gt_regs.h"
185ca02815Sjsg #include "intel_lrc.h"
19c349dbc7Sjsg #include "intel_lrc_reg.h"
20c349dbc7Sjsg #include "intel_ring.h"
21ad8b1aafSjsg #include "shmem_utils.h"
22c349dbc7Sjsg 
23f005ef32Sjsg /*
24f005ef32Sjsg  * The per-platform tables are u8-encoded in @data. Decode @data and set the
25f005ef32Sjsg  * addresses' offset and commands in @regs. The following encoding is used
26f005ef32Sjsg  * for each byte. There are 2 steps: decoding commands and decoding addresses.
27f005ef32Sjsg  *
28f005ef32Sjsg  * Commands:
29f005ef32Sjsg  * [7]: create NOPs - number of NOPs are set in lower bits
30f005ef32Sjsg  * [6]: When creating MI_LOAD_REGISTER_IMM command, allow to set
31f005ef32Sjsg  *      MI_LRI_FORCE_POSTED
32f005ef32Sjsg  * [5:0]: Number of NOPs or registers to set values to in case of
33f005ef32Sjsg  *        MI_LOAD_REGISTER_IMM
34f005ef32Sjsg  *
35f005ef32Sjsg  * Addresses: these are decoded after a MI_LOAD_REGISTER_IMM command by "count"
36f005ef32Sjsg  * number of registers. They are set by using the REG/REG16 macros: the former
37f005ef32Sjsg  * is used for offsets smaller than 0x200 while the latter is for values bigger
38f005ef32Sjsg  * than that. Those macros already set all the bits documented below correctly:
39f005ef32Sjsg  *
40f005ef32Sjsg  * [7]: When a register offset needs more than 6 bits, use additional bytes, to
41f005ef32Sjsg  *      follow, for the lower bits
42f005ef32Sjsg  * [6:0]: Register offset, without considering the engine base.
43f005ef32Sjsg  *
44f005ef32Sjsg  * This function only tweaks the commands and register offsets. Values are not
45f005ef32Sjsg  * filled out.
46f005ef32Sjsg  */
set_offsets(u32 * regs,const u8 * data,const struct intel_engine_cs * engine,bool close)47c349dbc7Sjsg static void set_offsets(u32 *regs,
48c349dbc7Sjsg 			const u8 *data,
49c349dbc7Sjsg 			const struct intel_engine_cs *engine,
505ca02815Sjsg 			bool close)
51c349dbc7Sjsg #define NOP(x) (BIT(7) | (x))
52c349dbc7Sjsg #define LRI(count, flags) ((flags) << 6 | (count) | BUILD_BUG_ON_ZERO(count >= BIT(6)))
53c349dbc7Sjsg #define POSTED BIT(0)
54c349dbc7Sjsg #define REG(x) (((x) >> 2) | BUILD_BUG_ON_ZERO(x >= 0x200))
55c349dbc7Sjsg #define REG16(x) \
56c349dbc7Sjsg 	(((x) >> 9) | BIT(7) | BUILD_BUG_ON_ZERO(x >= 0x10000)), \
57c349dbc7Sjsg 	(((x) >> 2) & 0x7f)
585ca02815Sjsg #define END 0
59c349dbc7Sjsg {
60c349dbc7Sjsg 	const u32 base = engine->mmio_base;
61c349dbc7Sjsg 
62c349dbc7Sjsg 	while (*data) {
63c349dbc7Sjsg 		u8 count, flags;
64c349dbc7Sjsg 
65c349dbc7Sjsg 		if (*data & BIT(7)) { /* skip */
66c349dbc7Sjsg 			count = *data++ & ~BIT(7);
67c349dbc7Sjsg 			regs += count;
68c349dbc7Sjsg 			continue;
69c349dbc7Sjsg 		}
70c349dbc7Sjsg 
71c349dbc7Sjsg 		count = *data & 0x3f;
72c349dbc7Sjsg 		flags = *data >> 6;
73c349dbc7Sjsg 		data++;
74c349dbc7Sjsg 
75c349dbc7Sjsg 		*regs = MI_LOAD_REGISTER_IMM(count);
76c349dbc7Sjsg 		if (flags & POSTED)
77c349dbc7Sjsg 			*regs |= MI_LRI_FORCE_POSTED;
785ca02815Sjsg 		if (GRAPHICS_VER(engine->i915) >= 11)
79ad8b1aafSjsg 			*regs |= MI_LRI_LRM_CS_MMIO;
80c349dbc7Sjsg 		regs++;
81c349dbc7Sjsg 
82c349dbc7Sjsg 		GEM_BUG_ON(!count);
83c349dbc7Sjsg 		do {
84c349dbc7Sjsg 			u32 offset = 0;
85c349dbc7Sjsg 			u8 v;
86c349dbc7Sjsg 
87c349dbc7Sjsg 			do {
88c349dbc7Sjsg 				v = *data++;
89c349dbc7Sjsg 				offset <<= 7;
90c349dbc7Sjsg 				offset |= v & ~BIT(7);
91c349dbc7Sjsg 			} while (v & BIT(7));
92c349dbc7Sjsg 
93c349dbc7Sjsg 			regs[0] = base + (offset << 2);
94c349dbc7Sjsg 			regs += 2;
95c349dbc7Sjsg 		} while (--count);
96c349dbc7Sjsg 	}
97c349dbc7Sjsg 
985ca02815Sjsg 	if (close) {
99c349dbc7Sjsg 		/* Close the batch; used mainly by live_lrc_layout() */
100c349dbc7Sjsg 		*regs = MI_BATCH_BUFFER_END;
1015ca02815Sjsg 		if (GRAPHICS_VER(engine->i915) >= 11)
102c349dbc7Sjsg 			*regs |= BIT(0);
103c349dbc7Sjsg 	}
104c349dbc7Sjsg }
105c349dbc7Sjsg 
106c349dbc7Sjsg static const u8 gen8_xcs_offsets[] = {
107c349dbc7Sjsg 	NOP(1),
108c349dbc7Sjsg 	LRI(11, 0),
109c349dbc7Sjsg 	REG16(0x244),
110c349dbc7Sjsg 	REG(0x034),
111c349dbc7Sjsg 	REG(0x030),
112c349dbc7Sjsg 	REG(0x038),
113c349dbc7Sjsg 	REG(0x03c),
114c349dbc7Sjsg 	REG(0x168),
115c349dbc7Sjsg 	REG(0x140),
116c349dbc7Sjsg 	REG(0x110),
117c349dbc7Sjsg 	REG(0x11c),
118c349dbc7Sjsg 	REG(0x114),
119c349dbc7Sjsg 	REG(0x118),
120c349dbc7Sjsg 
121c349dbc7Sjsg 	NOP(9),
122c349dbc7Sjsg 	LRI(9, 0),
123c349dbc7Sjsg 	REG16(0x3a8),
124c349dbc7Sjsg 	REG16(0x28c),
125c349dbc7Sjsg 	REG16(0x288),
126c349dbc7Sjsg 	REG16(0x284),
127c349dbc7Sjsg 	REG16(0x280),
128c349dbc7Sjsg 	REG16(0x27c),
129c349dbc7Sjsg 	REG16(0x278),
130c349dbc7Sjsg 	REG16(0x274),
131c349dbc7Sjsg 	REG16(0x270),
132c349dbc7Sjsg 
133c349dbc7Sjsg 	NOP(13),
134c349dbc7Sjsg 	LRI(2, 0),
135c349dbc7Sjsg 	REG16(0x200),
136c349dbc7Sjsg 	REG(0x028),
137c349dbc7Sjsg 
1385ca02815Sjsg 	END
139c349dbc7Sjsg };
140c349dbc7Sjsg 
141c349dbc7Sjsg static const u8 gen9_xcs_offsets[] = {
142c349dbc7Sjsg 	NOP(1),
143c349dbc7Sjsg 	LRI(14, POSTED),
144c349dbc7Sjsg 	REG16(0x244),
145c349dbc7Sjsg 	REG(0x034),
146c349dbc7Sjsg 	REG(0x030),
147c349dbc7Sjsg 	REG(0x038),
148c349dbc7Sjsg 	REG(0x03c),
149c349dbc7Sjsg 	REG(0x168),
150c349dbc7Sjsg 	REG(0x140),
151c349dbc7Sjsg 	REG(0x110),
152c349dbc7Sjsg 	REG(0x11c),
153c349dbc7Sjsg 	REG(0x114),
154c349dbc7Sjsg 	REG(0x118),
155c349dbc7Sjsg 	REG(0x1c0),
156c349dbc7Sjsg 	REG(0x1c4),
157c349dbc7Sjsg 	REG(0x1c8),
158c349dbc7Sjsg 
159c349dbc7Sjsg 	NOP(3),
160c349dbc7Sjsg 	LRI(9, POSTED),
161c349dbc7Sjsg 	REG16(0x3a8),
162c349dbc7Sjsg 	REG16(0x28c),
163c349dbc7Sjsg 	REG16(0x288),
164c349dbc7Sjsg 	REG16(0x284),
165c349dbc7Sjsg 	REG16(0x280),
166c349dbc7Sjsg 	REG16(0x27c),
167c349dbc7Sjsg 	REG16(0x278),
168c349dbc7Sjsg 	REG16(0x274),
169c349dbc7Sjsg 	REG16(0x270),
170c349dbc7Sjsg 
171c349dbc7Sjsg 	NOP(13),
172c349dbc7Sjsg 	LRI(1, POSTED),
173c349dbc7Sjsg 	REG16(0x200),
174c349dbc7Sjsg 
175c349dbc7Sjsg 	NOP(13),
176c349dbc7Sjsg 	LRI(44, POSTED),
177c349dbc7Sjsg 	REG(0x028),
178c349dbc7Sjsg 	REG(0x09c),
179c349dbc7Sjsg 	REG(0x0c0),
180c349dbc7Sjsg 	REG(0x178),
181c349dbc7Sjsg 	REG(0x17c),
182c349dbc7Sjsg 	REG16(0x358),
183c349dbc7Sjsg 	REG(0x170),
184c349dbc7Sjsg 	REG(0x150),
185c349dbc7Sjsg 	REG(0x154),
186c349dbc7Sjsg 	REG(0x158),
187c349dbc7Sjsg 	REG16(0x41c),
188c349dbc7Sjsg 	REG16(0x600),
189c349dbc7Sjsg 	REG16(0x604),
190c349dbc7Sjsg 	REG16(0x608),
191c349dbc7Sjsg 	REG16(0x60c),
192c349dbc7Sjsg 	REG16(0x610),
193c349dbc7Sjsg 	REG16(0x614),
194c349dbc7Sjsg 	REG16(0x618),
195c349dbc7Sjsg 	REG16(0x61c),
196c349dbc7Sjsg 	REG16(0x620),
197c349dbc7Sjsg 	REG16(0x624),
198c349dbc7Sjsg 	REG16(0x628),
199c349dbc7Sjsg 	REG16(0x62c),
200c349dbc7Sjsg 	REG16(0x630),
201c349dbc7Sjsg 	REG16(0x634),
202c349dbc7Sjsg 	REG16(0x638),
203c349dbc7Sjsg 	REG16(0x63c),
204c349dbc7Sjsg 	REG16(0x640),
205c349dbc7Sjsg 	REG16(0x644),
206c349dbc7Sjsg 	REG16(0x648),
207c349dbc7Sjsg 	REG16(0x64c),
208c349dbc7Sjsg 	REG16(0x650),
209c349dbc7Sjsg 	REG16(0x654),
210c349dbc7Sjsg 	REG16(0x658),
211c349dbc7Sjsg 	REG16(0x65c),
212c349dbc7Sjsg 	REG16(0x660),
213c349dbc7Sjsg 	REG16(0x664),
214c349dbc7Sjsg 	REG16(0x668),
215c349dbc7Sjsg 	REG16(0x66c),
216c349dbc7Sjsg 	REG16(0x670),
217c349dbc7Sjsg 	REG16(0x674),
218c349dbc7Sjsg 	REG16(0x678),
219c349dbc7Sjsg 	REG16(0x67c),
220c349dbc7Sjsg 	REG(0x068),
221c349dbc7Sjsg 
2225ca02815Sjsg 	END
223c349dbc7Sjsg };
224c349dbc7Sjsg 
225c349dbc7Sjsg static const u8 gen12_xcs_offsets[] = {
226c349dbc7Sjsg 	NOP(1),
227c349dbc7Sjsg 	LRI(13, POSTED),
228c349dbc7Sjsg 	REG16(0x244),
229c349dbc7Sjsg 	REG(0x034),
230c349dbc7Sjsg 	REG(0x030),
231c349dbc7Sjsg 	REG(0x038),
232c349dbc7Sjsg 	REG(0x03c),
233c349dbc7Sjsg 	REG(0x168),
234c349dbc7Sjsg 	REG(0x140),
235c349dbc7Sjsg 	REG(0x110),
236c349dbc7Sjsg 	REG(0x1c0),
237c349dbc7Sjsg 	REG(0x1c4),
238c349dbc7Sjsg 	REG(0x1c8),
239c349dbc7Sjsg 	REG(0x180),
240c349dbc7Sjsg 	REG16(0x2b4),
241c349dbc7Sjsg 
242c349dbc7Sjsg 	NOP(5),
243c349dbc7Sjsg 	LRI(9, POSTED),
244c349dbc7Sjsg 	REG16(0x3a8),
245c349dbc7Sjsg 	REG16(0x28c),
246c349dbc7Sjsg 	REG16(0x288),
247c349dbc7Sjsg 	REG16(0x284),
248c349dbc7Sjsg 	REG16(0x280),
249c349dbc7Sjsg 	REG16(0x27c),
250c349dbc7Sjsg 	REG16(0x278),
251c349dbc7Sjsg 	REG16(0x274),
252c349dbc7Sjsg 	REG16(0x270),
253c349dbc7Sjsg 
2545ca02815Sjsg 	END
255c349dbc7Sjsg };
256c349dbc7Sjsg 
2571bb76ff1Sjsg static const u8 dg2_xcs_offsets[] = {
2581bb76ff1Sjsg 	NOP(1),
2591bb76ff1Sjsg 	LRI(15, POSTED),
2601bb76ff1Sjsg 	REG16(0x244),
2611bb76ff1Sjsg 	REG(0x034),
2621bb76ff1Sjsg 	REG(0x030),
2631bb76ff1Sjsg 	REG(0x038),
2641bb76ff1Sjsg 	REG(0x03c),
2651bb76ff1Sjsg 	REG(0x168),
2661bb76ff1Sjsg 	REG(0x140),
2671bb76ff1Sjsg 	REG(0x110),
2681bb76ff1Sjsg 	REG(0x1c0),
2691bb76ff1Sjsg 	REG(0x1c4),
2701bb76ff1Sjsg 	REG(0x1c8),
2711bb76ff1Sjsg 	REG(0x180),
2721bb76ff1Sjsg 	REG16(0x2b4),
2731bb76ff1Sjsg 	REG(0x120),
2741bb76ff1Sjsg 	REG(0x124),
2751bb76ff1Sjsg 
2761bb76ff1Sjsg 	NOP(1),
2771bb76ff1Sjsg 	LRI(9, POSTED),
2781bb76ff1Sjsg 	REG16(0x3a8),
2791bb76ff1Sjsg 	REG16(0x28c),
2801bb76ff1Sjsg 	REG16(0x288),
2811bb76ff1Sjsg 	REG16(0x284),
2821bb76ff1Sjsg 	REG16(0x280),
2831bb76ff1Sjsg 	REG16(0x27c),
2841bb76ff1Sjsg 	REG16(0x278),
2851bb76ff1Sjsg 	REG16(0x274),
2861bb76ff1Sjsg 	REG16(0x270),
2871bb76ff1Sjsg 
2881bb76ff1Sjsg 	END
2891bb76ff1Sjsg };
2901bb76ff1Sjsg 
291c349dbc7Sjsg static const u8 gen8_rcs_offsets[] = {
292c349dbc7Sjsg 	NOP(1),
293c349dbc7Sjsg 	LRI(14, POSTED),
294c349dbc7Sjsg 	REG16(0x244),
295c349dbc7Sjsg 	REG(0x034),
296c349dbc7Sjsg 	REG(0x030),
297c349dbc7Sjsg 	REG(0x038),
298c349dbc7Sjsg 	REG(0x03c),
299c349dbc7Sjsg 	REG(0x168),
300c349dbc7Sjsg 	REG(0x140),
301c349dbc7Sjsg 	REG(0x110),
302c349dbc7Sjsg 	REG(0x11c),
303c349dbc7Sjsg 	REG(0x114),
304c349dbc7Sjsg 	REG(0x118),
305c349dbc7Sjsg 	REG(0x1c0),
306c349dbc7Sjsg 	REG(0x1c4),
307c349dbc7Sjsg 	REG(0x1c8),
308c349dbc7Sjsg 
309c349dbc7Sjsg 	NOP(3),
310c349dbc7Sjsg 	LRI(9, POSTED),
311c349dbc7Sjsg 	REG16(0x3a8),
312c349dbc7Sjsg 	REG16(0x28c),
313c349dbc7Sjsg 	REG16(0x288),
314c349dbc7Sjsg 	REG16(0x284),
315c349dbc7Sjsg 	REG16(0x280),
316c349dbc7Sjsg 	REG16(0x27c),
317c349dbc7Sjsg 	REG16(0x278),
318c349dbc7Sjsg 	REG16(0x274),
319c349dbc7Sjsg 	REG16(0x270),
320c349dbc7Sjsg 
321c349dbc7Sjsg 	NOP(13),
322c349dbc7Sjsg 	LRI(1, 0),
323c349dbc7Sjsg 	REG(0x0c8),
324c349dbc7Sjsg 
3255ca02815Sjsg 	END
326c349dbc7Sjsg };
327c349dbc7Sjsg 
328c349dbc7Sjsg static const u8 gen9_rcs_offsets[] = {
329c349dbc7Sjsg 	NOP(1),
330c349dbc7Sjsg 	LRI(14, POSTED),
331c349dbc7Sjsg 	REG16(0x244),
332c349dbc7Sjsg 	REG(0x34),
333c349dbc7Sjsg 	REG(0x30),
334c349dbc7Sjsg 	REG(0x38),
335c349dbc7Sjsg 	REG(0x3c),
336c349dbc7Sjsg 	REG(0x168),
337c349dbc7Sjsg 	REG(0x140),
338c349dbc7Sjsg 	REG(0x110),
339c349dbc7Sjsg 	REG(0x11c),
340c349dbc7Sjsg 	REG(0x114),
341c349dbc7Sjsg 	REG(0x118),
342c349dbc7Sjsg 	REG(0x1c0),
343c349dbc7Sjsg 	REG(0x1c4),
344c349dbc7Sjsg 	REG(0x1c8),
345c349dbc7Sjsg 
346c349dbc7Sjsg 	NOP(3),
347c349dbc7Sjsg 	LRI(9, POSTED),
348c349dbc7Sjsg 	REG16(0x3a8),
349c349dbc7Sjsg 	REG16(0x28c),
350c349dbc7Sjsg 	REG16(0x288),
351c349dbc7Sjsg 	REG16(0x284),
352c349dbc7Sjsg 	REG16(0x280),
353c349dbc7Sjsg 	REG16(0x27c),
354c349dbc7Sjsg 	REG16(0x278),
355c349dbc7Sjsg 	REG16(0x274),
356c349dbc7Sjsg 	REG16(0x270),
357c349dbc7Sjsg 
358c349dbc7Sjsg 	NOP(13),
359c349dbc7Sjsg 	LRI(1, 0),
360c349dbc7Sjsg 	REG(0xc8),
361c349dbc7Sjsg 
362c349dbc7Sjsg 	NOP(13),
363c349dbc7Sjsg 	LRI(44, POSTED),
364c349dbc7Sjsg 	REG(0x28),
365c349dbc7Sjsg 	REG(0x9c),
366c349dbc7Sjsg 	REG(0xc0),
367c349dbc7Sjsg 	REG(0x178),
368c349dbc7Sjsg 	REG(0x17c),
369c349dbc7Sjsg 	REG16(0x358),
370c349dbc7Sjsg 	REG(0x170),
371c349dbc7Sjsg 	REG(0x150),
372c349dbc7Sjsg 	REG(0x154),
373c349dbc7Sjsg 	REG(0x158),
374c349dbc7Sjsg 	REG16(0x41c),
375c349dbc7Sjsg 	REG16(0x600),
376c349dbc7Sjsg 	REG16(0x604),
377c349dbc7Sjsg 	REG16(0x608),
378c349dbc7Sjsg 	REG16(0x60c),
379c349dbc7Sjsg 	REG16(0x610),
380c349dbc7Sjsg 	REG16(0x614),
381c349dbc7Sjsg 	REG16(0x618),
382c349dbc7Sjsg 	REG16(0x61c),
383c349dbc7Sjsg 	REG16(0x620),
384c349dbc7Sjsg 	REG16(0x624),
385c349dbc7Sjsg 	REG16(0x628),
386c349dbc7Sjsg 	REG16(0x62c),
387c349dbc7Sjsg 	REG16(0x630),
388c349dbc7Sjsg 	REG16(0x634),
389c349dbc7Sjsg 	REG16(0x638),
390c349dbc7Sjsg 	REG16(0x63c),
391c349dbc7Sjsg 	REG16(0x640),
392c349dbc7Sjsg 	REG16(0x644),
393c349dbc7Sjsg 	REG16(0x648),
394c349dbc7Sjsg 	REG16(0x64c),
395c349dbc7Sjsg 	REG16(0x650),
396c349dbc7Sjsg 	REG16(0x654),
397c349dbc7Sjsg 	REG16(0x658),
398c349dbc7Sjsg 	REG16(0x65c),
399c349dbc7Sjsg 	REG16(0x660),
400c349dbc7Sjsg 	REG16(0x664),
401c349dbc7Sjsg 	REG16(0x668),
402c349dbc7Sjsg 	REG16(0x66c),
403c349dbc7Sjsg 	REG16(0x670),
404c349dbc7Sjsg 	REG16(0x674),
405c349dbc7Sjsg 	REG16(0x678),
406c349dbc7Sjsg 	REG16(0x67c),
407c349dbc7Sjsg 	REG(0x68),
408c349dbc7Sjsg 
4095ca02815Sjsg 	END
410c349dbc7Sjsg };
411c349dbc7Sjsg 
412c349dbc7Sjsg static const u8 gen11_rcs_offsets[] = {
413c349dbc7Sjsg 	NOP(1),
414c349dbc7Sjsg 	LRI(15, POSTED),
415c349dbc7Sjsg 	REG16(0x244),
416c349dbc7Sjsg 	REG(0x034),
417c349dbc7Sjsg 	REG(0x030),
418c349dbc7Sjsg 	REG(0x038),
419c349dbc7Sjsg 	REG(0x03c),
420c349dbc7Sjsg 	REG(0x168),
421c349dbc7Sjsg 	REG(0x140),
422c349dbc7Sjsg 	REG(0x110),
423c349dbc7Sjsg 	REG(0x11c),
424c349dbc7Sjsg 	REG(0x114),
425c349dbc7Sjsg 	REG(0x118),
426c349dbc7Sjsg 	REG(0x1c0),
427c349dbc7Sjsg 	REG(0x1c4),
428c349dbc7Sjsg 	REG(0x1c8),
429c349dbc7Sjsg 	REG(0x180),
430c349dbc7Sjsg 
431c349dbc7Sjsg 	NOP(1),
432c349dbc7Sjsg 	LRI(9, POSTED),
433c349dbc7Sjsg 	REG16(0x3a8),
434c349dbc7Sjsg 	REG16(0x28c),
435c349dbc7Sjsg 	REG16(0x288),
436c349dbc7Sjsg 	REG16(0x284),
437c349dbc7Sjsg 	REG16(0x280),
438c349dbc7Sjsg 	REG16(0x27c),
439c349dbc7Sjsg 	REG16(0x278),
440c349dbc7Sjsg 	REG16(0x274),
441c349dbc7Sjsg 	REG16(0x270),
442c349dbc7Sjsg 
443c349dbc7Sjsg 	LRI(1, POSTED),
444c349dbc7Sjsg 	REG(0x1b0),
445c349dbc7Sjsg 
446c349dbc7Sjsg 	NOP(10),
447c349dbc7Sjsg 	LRI(1, 0),
448c349dbc7Sjsg 	REG(0x0c8),
449c349dbc7Sjsg 
4505ca02815Sjsg 	END
451c349dbc7Sjsg };
452c349dbc7Sjsg 
453c349dbc7Sjsg static const u8 gen12_rcs_offsets[] = {
454c349dbc7Sjsg 	NOP(1),
455c349dbc7Sjsg 	LRI(13, POSTED),
456c349dbc7Sjsg 	REG16(0x244),
457c349dbc7Sjsg 	REG(0x034),
458c349dbc7Sjsg 	REG(0x030),
459c349dbc7Sjsg 	REG(0x038),
460c349dbc7Sjsg 	REG(0x03c),
461c349dbc7Sjsg 	REG(0x168),
462c349dbc7Sjsg 	REG(0x140),
463c349dbc7Sjsg 	REG(0x110),
464c349dbc7Sjsg 	REG(0x1c0),
465c349dbc7Sjsg 	REG(0x1c4),
466c349dbc7Sjsg 	REG(0x1c8),
467c349dbc7Sjsg 	REG(0x180),
468c349dbc7Sjsg 	REG16(0x2b4),
469c349dbc7Sjsg 
470c349dbc7Sjsg 	NOP(5),
471c349dbc7Sjsg 	LRI(9, POSTED),
472c349dbc7Sjsg 	REG16(0x3a8),
473c349dbc7Sjsg 	REG16(0x28c),
474c349dbc7Sjsg 	REG16(0x288),
475c349dbc7Sjsg 	REG16(0x284),
476c349dbc7Sjsg 	REG16(0x280),
477c349dbc7Sjsg 	REG16(0x27c),
478c349dbc7Sjsg 	REG16(0x278),
479c349dbc7Sjsg 	REG16(0x274),
480c349dbc7Sjsg 	REG16(0x270),
481c349dbc7Sjsg 
482c349dbc7Sjsg 	LRI(3, POSTED),
483c349dbc7Sjsg 	REG(0x1b0),
484c349dbc7Sjsg 	REG16(0x5a8),
485c349dbc7Sjsg 	REG16(0x5ac),
486c349dbc7Sjsg 
487c349dbc7Sjsg 	NOP(6),
488c349dbc7Sjsg 	LRI(1, 0),
489c349dbc7Sjsg 	REG(0x0c8),
490ad8b1aafSjsg 	NOP(3 + 9 + 1),
491c349dbc7Sjsg 
492ad8b1aafSjsg 	LRI(51, POSTED),
493ad8b1aafSjsg 	REG16(0x588),
494ad8b1aafSjsg 	REG16(0x588),
495ad8b1aafSjsg 	REG16(0x588),
496ad8b1aafSjsg 	REG16(0x588),
497ad8b1aafSjsg 	REG16(0x588),
498ad8b1aafSjsg 	REG16(0x588),
499ad8b1aafSjsg 	REG(0x028),
500ad8b1aafSjsg 	REG(0x09c),
501ad8b1aafSjsg 	REG(0x0c0),
502ad8b1aafSjsg 	REG(0x178),
503ad8b1aafSjsg 	REG(0x17c),
504ad8b1aafSjsg 	REG16(0x358),
505ad8b1aafSjsg 	REG(0x170),
506ad8b1aafSjsg 	REG(0x150),
507ad8b1aafSjsg 	REG(0x154),
508ad8b1aafSjsg 	REG(0x158),
509ad8b1aafSjsg 	REG16(0x41c),
510ad8b1aafSjsg 	REG16(0x600),
511ad8b1aafSjsg 	REG16(0x604),
512ad8b1aafSjsg 	REG16(0x608),
513ad8b1aafSjsg 	REG16(0x60c),
514ad8b1aafSjsg 	REG16(0x610),
515ad8b1aafSjsg 	REG16(0x614),
516ad8b1aafSjsg 	REG16(0x618),
517ad8b1aafSjsg 	REG16(0x61c),
518ad8b1aafSjsg 	REG16(0x620),
519ad8b1aafSjsg 	REG16(0x624),
520ad8b1aafSjsg 	REG16(0x628),
521ad8b1aafSjsg 	REG16(0x62c),
522ad8b1aafSjsg 	REG16(0x630),
523ad8b1aafSjsg 	REG16(0x634),
524ad8b1aafSjsg 	REG16(0x638),
525ad8b1aafSjsg 	REG16(0x63c),
526ad8b1aafSjsg 	REG16(0x640),
527ad8b1aafSjsg 	REG16(0x644),
528ad8b1aafSjsg 	REG16(0x648),
529ad8b1aafSjsg 	REG16(0x64c),
530ad8b1aafSjsg 	REG16(0x650),
531ad8b1aafSjsg 	REG16(0x654),
532ad8b1aafSjsg 	REG16(0x658),
533ad8b1aafSjsg 	REG16(0x65c),
534ad8b1aafSjsg 	REG16(0x660),
535ad8b1aafSjsg 	REG16(0x664),
536ad8b1aafSjsg 	REG16(0x668),
537ad8b1aafSjsg 	REG16(0x66c),
538ad8b1aafSjsg 	REG16(0x670),
539ad8b1aafSjsg 	REG16(0x674),
540ad8b1aafSjsg 	REG16(0x678),
541ad8b1aafSjsg 	REG16(0x67c),
542ad8b1aafSjsg 	REG(0x068),
543ad8b1aafSjsg 	REG(0x084),
544ad8b1aafSjsg 	NOP(1),
545ad8b1aafSjsg 
5465ca02815Sjsg 	END
5475ca02815Sjsg };
5485ca02815Sjsg 
5495ca02815Sjsg static const u8 xehp_rcs_offsets[] = {
5505ca02815Sjsg 	NOP(1),
5515ca02815Sjsg 	LRI(13, POSTED),
5525ca02815Sjsg 	REG16(0x244),
5535ca02815Sjsg 	REG(0x034),
5545ca02815Sjsg 	REG(0x030),
5555ca02815Sjsg 	REG(0x038),
5565ca02815Sjsg 	REG(0x03c),
5575ca02815Sjsg 	REG(0x168),
5585ca02815Sjsg 	REG(0x140),
5595ca02815Sjsg 	REG(0x110),
5605ca02815Sjsg 	REG(0x1c0),
5615ca02815Sjsg 	REG(0x1c4),
5625ca02815Sjsg 	REG(0x1c8),
5635ca02815Sjsg 	REG(0x180),
5645ca02815Sjsg 	REG16(0x2b4),
5655ca02815Sjsg 
5665ca02815Sjsg 	NOP(5),
5675ca02815Sjsg 	LRI(9, POSTED),
5685ca02815Sjsg 	REG16(0x3a8),
5695ca02815Sjsg 	REG16(0x28c),
5705ca02815Sjsg 	REG16(0x288),
5715ca02815Sjsg 	REG16(0x284),
5725ca02815Sjsg 	REG16(0x280),
5735ca02815Sjsg 	REG16(0x27c),
5745ca02815Sjsg 	REG16(0x278),
5755ca02815Sjsg 	REG16(0x274),
5765ca02815Sjsg 	REG16(0x270),
5775ca02815Sjsg 
5785ca02815Sjsg 	LRI(3, POSTED),
5795ca02815Sjsg 	REG(0x1b0),
5805ca02815Sjsg 	REG16(0x5a8),
5815ca02815Sjsg 	REG16(0x5ac),
5825ca02815Sjsg 
5835ca02815Sjsg 	NOP(6),
5845ca02815Sjsg 	LRI(1, 0),
5855ca02815Sjsg 	REG(0x0c8),
5865ca02815Sjsg 
5875ca02815Sjsg 	END
588c349dbc7Sjsg };
589c349dbc7Sjsg 
5901bb76ff1Sjsg static const u8 dg2_rcs_offsets[] = {
5911bb76ff1Sjsg 	NOP(1),
5921bb76ff1Sjsg 	LRI(15, POSTED),
5931bb76ff1Sjsg 	REG16(0x244),
5941bb76ff1Sjsg 	REG(0x034),
5951bb76ff1Sjsg 	REG(0x030),
5961bb76ff1Sjsg 	REG(0x038),
5971bb76ff1Sjsg 	REG(0x03c),
5981bb76ff1Sjsg 	REG(0x168),
5991bb76ff1Sjsg 	REG(0x140),
6001bb76ff1Sjsg 	REG(0x110),
6011bb76ff1Sjsg 	REG(0x1c0),
6021bb76ff1Sjsg 	REG(0x1c4),
6031bb76ff1Sjsg 	REG(0x1c8),
6041bb76ff1Sjsg 	REG(0x180),
6051bb76ff1Sjsg 	REG16(0x2b4),
6061bb76ff1Sjsg 	REG(0x120),
6071bb76ff1Sjsg 	REG(0x124),
6081bb76ff1Sjsg 
6091bb76ff1Sjsg 	NOP(1),
6101bb76ff1Sjsg 	LRI(9, POSTED),
6111bb76ff1Sjsg 	REG16(0x3a8),
6121bb76ff1Sjsg 	REG16(0x28c),
6131bb76ff1Sjsg 	REG16(0x288),
6141bb76ff1Sjsg 	REG16(0x284),
6151bb76ff1Sjsg 	REG16(0x280),
6161bb76ff1Sjsg 	REG16(0x27c),
6171bb76ff1Sjsg 	REG16(0x278),
6181bb76ff1Sjsg 	REG16(0x274),
6191bb76ff1Sjsg 	REG16(0x270),
6201bb76ff1Sjsg 
6211bb76ff1Sjsg 	LRI(3, POSTED),
6221bb76ff1Sjsg 	REG(0x1b0),
6231bb76ff1Sjsg 	REG16(0x5a8),
6241bb76ff1Sjsg 	REG16(0x5ac),
6251bb76ff1Sjsg 
6261bb76ff1Sjsg 	NOP(6),
6271bb76ff1Sjsg 	LRI(1, 0),
6281bb76ff1Sjsg 	REG(0x0c8),
6291bb76ff1Sjsg 
6301bb76ff1Sjsg 	END
6311bb76ff1Sjsg };
6321bb76ff1Sjsg 
633f005ef32Sjsg static const u8 mtl_rcs_offsets[] = {
634f005ef32Sjsg 	NOP(1),
635f005ef32Sjsg 	LRI(15, POSTED),
636f005ef32Sjsg 	REG16(0x244),
637f005ef32Sjsg 	REG(0x034),
638f005ef32Sjsg 	REG(0x030),
639f005ef32Sjsg 	REG(0x038),
640f005ef32Sjsg 	REG(0x03c),
641f005ef32Sjsg 	REG(0x168),
642f005ef32Sjsg 	REG(0x140),
643f005ef32Sjsg 	REG(0x110),
644f005ef32Sjsg 	REG(0x1c0),
645f005ef32Sjsg 	REG(0x1c4),
646f005ef32Sjsg 	REG(0x1c8),
647f005ef32Sjsg 	REG(0x180),
648f005ef32Sjsg 	REG16(0x2b4),
649f005ef32Sjsg 	REG(0x120),
650f005ef32Sjsg 	REG(0x124),
651f005ef32Sjsg 
652f005ef32Sjsg 	NOP(1),
653f005ef32Sjsg 	LRI(9, POSTED),
654f005ef32Sjsg 	REG16(0x3a8),
655f005ef32Sjsg 	REG16(0x28c),
656f005ef32Sjsg 	REG16(0x288),
657f005ef32Sjsg 	REG16(0x284),
658f005ef32Sjsg 	REG16(0x280),
659f005ef32Sjsg 	REG16(0x27c),
660f005ef32Sjsg 	REG16(0x278),
661f005ef32Sjsg 	REG16(0x274),
662f005ef32Sjsg 	REG16(0x270),
663f005ef32Sjsg 
664f005ef32Sjsg 	NOP(2),
665f005ef32Sjsg 	LRI(2, POSTED),
666f005ef32Sjsg 	REG16(0x5a8),
667f005ef32Sjsg 	REG16(0x5ac),
668f005ef32Sjsg 
669f005ef32Sjsg 	NOP(6),
670f005ef32Sjsg 	LRI(1, 0),
671f005ef32Sjsg 	REG(0x0c8),
672f005ef32Sjsg 
673f005ef32Sjsg 	END
674f005ef32Sjsg };
675f005ef32Sjsg 
676c349dbc7Sjsg #undef END
677c349dbc7Sjsg #undef REG16
678c349dbc7Sjsg #undef REG
679c349dbc7Sjsg #undef LRI
680c349dbc7Sjsg #undef NOP
681c349dbc7Sjsg 
reg_offsets(const struct intel_engine_cs * engine)682c349dbc7Sjsg static const u8 *reg_offsets(const struct intel_engine_cs *engine)
683c349dbc7Sjsg {
684c349dbc7Sjsg 	/*
685c349dbc7Sjsg 	 * The gen12+ lists only have the registers we program in the basic
686c349dbc7Sjsg 	 * default state. We rely on the context image using relative
687c349dbc7Sjsg 	 * addressing to automatic fixup the register state between the
688c349dbc7Sjsg 	 * physical engines for virtual engine.
689c349dbc7Sjsg 	 */
6905ca02815Sjsg 	GEM_BUG_ON(GRAPHICS_VER(engine->i915) >= 12 &&
691c349dbc7Sjsg 		   !intel_engine_has_relative_mmio(engine));
692c349dbc7Sjsg 
6931bb76ff1Sjsg 	if (engine->flags & I915_ENGINE_HAS_RCS_REG_STATE) {
694f005ef32Sjsg 		if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 70))
695f005ef32Sjsg 			return mtl_rcs_offsets;
696f005ef32Sjsg 		else if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55))
6971bb76ff1Sjsg 			return dg2_rcs_offsets;
6981bb76ff1Sjsg 		else if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50))
6995ca02815Sjsg 			return xehp_rcs_offsets;
7005ca02815Sjsg 		else if (GRAPHICS_VER(engine->i915) >= 12)
701c349dbc7Sjsg 			return gen12_rcs_offsets;
7025ca02815Sjsg 		else if (GRAPHICS_VER(engine->i915) >= 11)
703c349dbc7Sjsg 			return gen11_rcs_offsets;
7045ca02815Sjsg 		else if (GRAPHICS_VER(engine->i915) >= 9)
705c349dbc7Sjsg 			return gen9_rcs_offsets;
706c349dbc7Sjsg 		else
707c349dbc7Sjsg 			return gen8_rcs_offsets;
708c349dbc7Sjsg 	} else {
7091bb76ff1Sjsg 		if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55))
7101bb76ff1Sjsg 			return dg2_xcs_offsets;
7111bb76ff1Sjsg 		else if (GRAPHICS_VER(engine->i915) >= 12)
712c349dbc7Sjsg 			return gen12_xcs_offsets;
7135ca02815Sjsg 		else if (GRAPHICS_VER(engine->i915) >= 9)
714c349dbc7Sjsg 			return gen9_xcs_offsets;
715c349dbc7Sjsg 		else
716c349dbc7Sjsg 			return gen8_xcs_offsets;
717c349dbc7Sjsg 	}
718c349dbc7Sjsg }
719c349dbc7Sjsg 
lrc_ring_mi_mode(const struct intel_engine_cs * engine)7205ca02815Sjsg static int lrc_ring_mi_mode(const struct intel_engine_cs *engine)
721c349dbc7Sjsg {
7225ca02815Sjsg 	if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50))
7235ca02815Sjsg 		return 0x70;
7245ca02815Sjsg 	else if (GRAPHICS_VER(engine->i915) >= 12)
7255ca02815Sjsg 		return 0x60;
7265ca02815Sjsg 	else if (GRAPHICS_VER(engine->i915) >= 9)
7275ca02815Sjsg 		return 0x54;
7285ca02815Sjsg 	else if (engine->class == RENDER_CLASS)
7295ca02815Sjsg 		return 0x58;
7305ca02815Sjsg 	else
7315ca02815Sjsg 		return -1;
732c349dbc7Sjsg }
733c349dbc7Sjsg 
lrc_ring_bb_offset(const struct intel_engine_cs * engine)7341bb76ff1Sjsg static int lrc_ring_bb_offset(const struct intel_engine_cs *engine)
7351bb76ff1Sjsg {
7361bb76ff1Sjsg 	if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50))
7371bb76ff1Sjsg 		return 0x80;
7381bb76ff1Sjsg 	else if (GRAPHICS_VER(engine->i915) >= 12)
7391bb76ff1Sjsg 		return 0x70;
7401bb76ff1Sjsg 	else if (GRAPHICS_VER(engine->i915) >= 9)
7411bb76ff1Sjsg 		return 0x64;
7421bb76ff1Sjsg 	else if (GRAPHICS_VER(engine->i915) >= 8 &&
7431bb76ff1Sjsg 		 engine->class == RENDER_CLASS)
7441bb76ff1Sjsg 		return 0xc4;
7451bb76ff1Sjsg 	else
7461bb76ff1Sjsg 		return -1;
7471bb76ff1Sjsg }
7481bb76ff1Sjsg 
lrc_ring_gpr0(const struct intel_engine_cs * engine)7495ca02815Sjsg static int lrc_ring_gpr0(const struct intel_engine_cs *engine)
750c349dbc7Sjsg {
7515ca02815Sjsg 	if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50))
7525ca02815Sjsg 		return 0x84;
7535ca02815Sjsg 	else if (GRAPHICS_VER(engine->i915) >= 12)
7545ca02815Sjsg 		return 0x74;
7555ca02815Sjsg 	else if (GRAPHICS_VER(engine->i915) >= 9)
7565ca02815Sjsg 		return 0x68;
7575ca02815Sjsg 	else if (engine->class == RENDER_CLASS)
7585ca02815Sjsg 		return 0xd8;
7595ca02815Sjsg 	else
7605ca02815Sjsg 		return -1;
761c349dbc7Sjsg }
762c349dbc7Sjsg 
lrc_ring_wa_bb_per_ctx(const struct intel_engine_cs * engine)7635ca02815Sjsg static int lrc_ring_wa_bb_per_ctx(const struct intel_engine_cs *engine)
764c349dbc7Sjsg {
7655ca02815Sjsg 	if (GRAPHICS_VER(engine->i915) >= 12)
7665ca02815Sjsg 		return 0x12;
7675ca02815Sjsg 	else if (GRAPHICS_VER(engine->i915) >= 9 || engine->class == RENDER_CLASS)
7685ca02815Sjsg 		return 0x18;
7695ca02815Sjsg 	else
7705ca02815Sjsg 		return -1;
771c349dbc7Sjsg }
772c349dbc7Sjsg 
lrc_ring_indirect_ptr(const struct intel_engine_cs * engine)7735ca02815Sjsg static int lrc_ring_indirect_ptr(const struct intel_engine_cs *engine)
774c349dbc7Sjsg {
775c349dbc7Sjsg 	int x;
776c349dbc7Sjsg 
7775ca02815Sjsg 	x = lrc_ring_wa_bb_per_ctx(engine);
7785ca02815Sjsg 	if (x < 0)
7795ca02815Sjsg 		return x;
7805ca02815Sjsg 
7815ca02815Sjsg 	return x + 2;
782c349dbc7Sjsg }
783c349dbc7Sjsg 
lrc_ring_indirect_offset(const struct intel_engine_cs * engine)7845ca02815Sjsg static int lrc_ring_indirect_offset(const struct intel_engine_cs *engine)
785c349dbc7Sjsg {
7865ca02815Sjsg 	int x;
787c349dbc7Sjsg 
7885ca02815Sjsg 	x = lrc_ring_indirect_ptr(engine);
7895ca02815Sjsg 	if (x < 0)
7905ca02815Sjsg 		return x;
791c349dbc7Sjsg 
7925ca02815Sjsg 	return x + 2;
793c349dbc7Sjsg }
794c349dbc7Sjsg 
lrc_ring_cmd_buf_cctl(const struct intel_engine_cs * engine)7955ca02815Sjsg static int lrc_ring_cmd_buf_cctl(const struct intel_engine_cs *engine)
796c349dbc7Sjsg {
797c349dbc7Sjsg 
7985ca02815Sjsg 	if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50))
799c349dbc7Sjsg 		/*
8005ca02815Sjsg 		 * Note that the CSFE context has a dummy slot for CMD_BUF_CCTL
8015ca02815Sjsg 		 * simply to match the RCS context image layout.
802c349dbc7Sjsg 		 */
8035ca02815Sjsg 		return 0xc6;
8045ca02815Sjsg 	else if (engine->class != RENDER_CLASS)
8055ca02815Sjsg 		return -1;
8065ca02815Sjsg 	else if (GRAPHICS_VER(engine->i915) >= 12)
8075ca02815Sjsg 		return 0xb6;
8085ca02815Sjsg 	else if (GRAPHICS_VER(engine->i915) >= 11)
8095ca02815Sjsg 		return 0xaa;
810c349dbc7Sjsg 	else
8115ca02815Sjsg 		return -1;
812c349dbc7Sjsg }
813c349dbc7Sjsg 
8145ca02815Sjsg static u32
lrc_ring_indirect_offset_default(const struct intel_engine_cs * engine)8155ca02815Sjsg lrc_ring_indirect_offset_default(const struct intel_engine_cs *engine)
816c349dbc7Sjsg {
817f005ef32Sjsg 	if (GRAPHICS_VER(engine->i915) >= 12)
8185ca02815Sjsg 		return GEN12_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
819f005ef32Sjsg 	else if (GRAPHICS_VER(engine->i915) >= 11)
8205ca02815Sjsg 		return GEN11_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
821f005ef32Sjsg 	else if (GRAPHICS_VER(engine->i915) >= 9)
8225ca02815Sjsg 		return GEN9_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
823f005ef32Sjsg 	else if (GRAPHICS_VER(engine->i915) >= 8)
8245ca02815Sjsg 		return GEN8_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
825f005ef32Sjsg 
826f005ef32Sjsg 	GEM_BUG_ON(GRAPHICS_VER(engine->i915) < 8);
827f005ef32Sjsg 
828f005ef32Sjsg 	return 0;
829c349dbc7Sjsg }
830c349dbc7Sjsg 
831c349dbc7Sjsg static void
lrc_setup_indirect_ctx(u32 * regs,const struct intel_engine_cs * engine,u32 ctx_bb_ggtt_addr,u32 size)8325ca02815Sjsg lrc_setup_indirect_ctx(u32 *regs,
8335ca02815Sjsg 		       const struct intel_engine_cs *engine,
8345ca02815Sjsg 		       u32 ctx_bb_ggtt_addr,
8355ca02815Sjsg 		       u32 size)
836c349dbc7Sjsg {
8375ca02815Sjsg 	GEM_BUG_ON(!size);
8385ca02815Sjsg 	GEM_BUG_ON(!IS_ALIGNED(size, CACHELINE_BYTES));
8395ca02815Sjsg 	GEM_BUG_ON(lrc_ring_indirect_ptr(engine) == -1);
8405ca02815Sjsg 	regs[lrc_ring_indirect_ptr(engine) + 1] =
8415ca02815Sjsg 		ctx_bb_ggtt_addr | (size / CACHELINE_BYTES);
842c349dbc7Sjsg 
8435ca02815Sjsg 	GEM_BUG_ON(lrc_ring_indirect_offset(engine) == -1);
8445ca02815Sjsg 	regs[lrc_ring_indirect_offset(engine) + 1] =
8455ca02815Sjsg 		lrc_ring_indirect_offset_default(engine) << 6;
846c349dbc7Sjsg }
847c349dbc7Sjsg 
init_common_regs(u32 * const regs,const struct intel_context * ce,const struct intel_engine_cs * engine,bool inhibit)8485ca02815Sjsg static void init_common_regs(u32 * const regs,
8495ca02815Sjsg 			     const struct intel_context *ce,
8505ca02815Sjsg 			     const struct intel_engine_cs *engine,
8515ca02815Sjsg 			     bool inhibit)
852c349dbc7Sjsg {
8535ca02815Sjsg 	u32 ctl;
8541bb76ff1Sjsg 	int loc;
8555ca02815Sjsg 
8565ca02815Sjsg 	ctl = _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH);
8575ca02815Sjsg 	ctl |= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT);
8585ca02815Sjsg 	if (inhibit)
8595ca02815Sjsg 		ctl |= CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT;
8605ca02815Sjsg 	if (GRAPHICS_VER(engine->i915) < 11)
8615ca02815Sjsg 		ctl |= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT |
8625ca02815Sjsg 					   CTX_CTRL_RS_CTX_ENABLE);
8635ca02815Sjsg 	regs[CTX_CONTEXT_CONTROL] = ctl;
8645ca02815Sjsg 
8651bb76ff1Sjsg 	regs[CTX_TIMESTAMP] = ce->stats.runtime.last;
8661bb76ff1Sjsg 
8671bb76ff1Sjsg 	loc = lrc_ring_bb_offset(engine);
8681bb76ff1Sjsg 	if (loc != -1)
8691bb76ff1Sjsg 		regs[loc + 1] = 0;
870c349dbc7Sjsg }
871c349dbc7Sjsg 
init_wa_bb_regs(u32 * const regs,const struct intel_engine_cs * engine)8725ca02815Sjsg static void init_wa_bb_regs(u32 * const regs,
8735ca02815Sjsg 			    const struct intel_engine_cs *engine)
874c349dbc7Sjsg {
8755ca02815Sjsg 	const struct i915_ctx_workarounds * const wa_ctx = &engine->wa_ctx;
876ad8b1aafSjsg 
8775ca02815Sjsg 	if (wa_ctx->per_ctx.size) {
8785ca02815Sjsg 		const u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
879ad8b1aafSjsg 
8805ca02815Sjsg 		GEM_BUG_ON(lrc_ring_wa_bb_per_ctx(engine) == -1);
8815ca02815Sjsg 		regs[lrc_ring_wa_bb_per_ctx(engine) + 1] =
8825ca02815Sjsg 			(ggtt_offset + wa_ctx->per_ctx.offset) | 0x01;
883c349dbc7Sjsg 	}
884c349dbc7Sjsg 
8855ca02815Sjsg 	if (wa_ctx->indirect_ctx.size) {
8865ca02815Sjsg 		lrc_setup_indirect_ctx(regs, engine,
8875ca02815Sjsg 				       i915_ggtt_offset(wa_ctx->vma) +
8885ca02815Sjsg 				       wa_ctx->indirect_ctx.offset,
8895ca02815Sjsg 				       wa_ctx->indirect_ctx.size);
8905ca02815Sjsg 	}
891c349dbc7Sjsg }
892c349dbc7Sjsg 
init_ppgtt_regs(u32 * regs,const struct i915_ppgtt * ppgtt)8935ca02815Sjsg static void init_ppgtt_regs(u32 *regs, const struct i915_ppgtt *ppgtt)
894c349dbc7Sjsg {
8955ca02815Sjsg 	if (i915_vm_is_4lvl(&ppgtt->vm)) {
8965ca02815Sjsg 		/* 64b PPGTT (48bit canonical)
8975ca02815Sjsg 		 * PDP0_DESCRIPTOR contains the base address to PML4 and
8985ca02815Sjsg 		 * other PDP Descriptors are ignored.
899c349dbc7Sjsg 		 */
9005ca02815Sjsg 		ASSIGN_CTX_PML4(ppgtt, regs);
901c349dbc7Sjsg 	} else {
9025ca02815Sjsg 		ASSIGN_CTX_PDP(ppgtt, regs, 3);
9035ca02815Sjsg 		ASSIGN_CTX_PDP(ppgtt, regs, 2);
9045ca02815Sjsg 		ASSIGN_CTX_PDP(ppgtt, regs, 1);
9055ca02815Sjsg 		ASSIGN_CTX_PDP(ppgtt, regs, 0);
9065ca02815Sjsg 	}
907ad8b1aafSjsg }
908c349dbc7Sjsg 
vm_alias(struct i915_address_space * vm)9095ca02815Sjsg static struct i915_ppgtt *vm_alias(struct i915_address_space *vm)
910c349dbc7Sjsg {
9115ca02815Sjsg 	if (i915_is_ggtt(vm))
9125ca02815Sjsg 		return i915_vm_to_ggtt(vm)->alias;
913ad8b1aafSjsg 	else
9145ca02815Sjsg 		return i915_vm_to_ppgtt(vm);
915c349dbc7Sjsg }
916c349dbc7Sjsg 
__reset_stop_ring(u32 * regs,const struct intel_engine_cs * engine)9175ca02815Sjsg static void __reset_stop_ring(u32 *regs, const struct intel_engine_cs *engine)
918c349dbc7Sjsg {
9195ca02815Sjsg 	int x;
9205ca02815Sjsg 
9215ca02815Sjsg 	x = lrc_ring_mi_mode(engine);
9225ca02815Sjsg 	if (x != -1) {
9235ca02815Sjsg 		regs[x + 1] &= ~STOP_RING;
9245ca02815Sjsg 		regs[x + 1] |= STOP_RING << 16;
9255ca02815Sjsg 	}
926c349dbc7Sjsg }
927c349dbc7Sjsg 
__lrc_init_regs(u32 * regs,const struct intel_context * ce,const struct intel_engine_cs * engine,bool inhibit)9285ca02815Sjsg static void __lrc_init_regs(u32 *regs,
9295ca02815Sjsg 			    const struct intel_context *ce,
9305ca02815Sjsg 			    const struct intel_engine_cs *engine,
9315ca02815Sjsg 			    bool inhibit)
932c349dbc7Sjsg {
9335ca02815Sjsg 	/*
9345ca02815Sjsg 	 * A context is actually a big batch buffer with several
9355ca02815Sjsg 	 * MI_LOAD_REGISTER_IMM commands followed by (reg, value) pairs. The
9365ca02815Sjsg 	 * values we are setting here are only for the first context restore:
9375ca02815Sjsg 	 * on a subsequent save, the GPU will recreate this batchbuffer with new
9385ca02815Sjsg 	 * values (including all the missing MI_LOAD_REGISTER_IMM commands that
9395ca02815Sjsg 	 * we are not initializing here).
9405ca02815Sjsg 	 *
9415ca02815Sjsg 	 * Must keep consistent with virtual_update_register_offsets().
9425ca02815Sjsg 	 */
9435ca02815Sjsg 
9445ca02815Sjsg 	if (inhibit)
9455ca02815Sjsg 		memset(regs, 0, PAGE_SIZE);
9465ca02815Sjsg 
9475ca02815Sjsg 	set_offsets(regs, reg_offsets(engine), engine, inhibit);
9485ca02815Sjsg 
9495ca02815Sjsg 	init_common_regs(regs, ce, engine, inhibit);
9505ca02815Sjsg 	init_ppgtt_regs(regs, vm_alias(ce->vm));
9515ca02815Sjsg 
9525ca02815Sjsg 	init_wa_bb_regs(regs, engine);
9535ca02815Sjsg 
9545ca02815Sjsg 	__reset_stop_ring(regs, engine);
955c349dbc7Sjsg }
956c349dbc7Sjsg 
lrc_init_regs(const struct intel_context * ce,const struct intel_engine_cs * engine,bool inhibit)9575ca02815Sjsg void lrc_init_regs(const struct intel_context *ce,
9585ca02815Sjsg 		   const struct intel_engine_cs *engine,
9595ca02815Sjsg 		   bool inhibit)
960c349dbc7Sjsg {
9615ca02815Sjsg 	__lrc_init_regs(ce->lrc_reg_state, ce, engine, inhibit);
962c349dbc7Sjsg }
963c349dbc7Sjsg 
lrc_reset_regs(const struct intel_context * ce,const struct intel_engine_cs * engine)9645ca02815Sjsg void lrc_reset_regs(const struct intel_context *ce,
9655ca02815Sjsg 		    const struct intel_engine_cs *engine)
966c349dbc7Sjsg {
9675ca02815Sjsg 	__reset_stop_ring(ce->lrc_reg_state, engine);
968c349dbc7Sjsg }
969c349dbc7Sjsg 
970c349dbc7Sjsg static void
set_redzone(void * vaddr,const struct intel_engine_cs * engine)971c349dbc7Sjsg set_redzone(void *vaddr, const struct intel_engine_cs *engine)
972c349dbc7Sjsg {
973c349dbc7Sjsg 	if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
974c349dbc7Sjsg 		return;
975c349dbc7Sjsg 
976c349dbc7Sjsg 	vaddr += engine->context_size;
977c349dbc7Sjsg 
978c349dbc7Sjsg 	memset(vaddr, CONTEXT_REDZONE, I915_GTT_PAGE_SIZE);
979c349dbc7Sjsg }
980c349dbc7Sjsg 
981c349dbc7Sjsg static void
check_redzone(const void * vaddr,const struct intel_engine_cs * engine)982c349dbc7Sjsg check_redzone(const void *vaddr, const struct intel_engine_cs *engine)
983c349dbc7Sjsg {
984c349dbc7Sjsg 	if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
985c349dbc7Sjsg 		return;
986c349dbc7Sjsg 
987c349dbc7Sjsg 	vaddr += engine->context_size;
988c349dbc7Sjsg 
989c349dbc7Sjsg 	if (memchr_inv(vaddr, CONTEXT_REDZONE, I915_GTT_PAGE_SIZE))
990ad8b1aafSjsg 		drm_err_once(&engine->i915->drm,
991c349dbc7Sjsg 			     "%s context redzone overwritten!\n",
992c349dbc7Sjsg 			     engine->name);
993c349dbc7Sjsg }
994c349dbc7Sjsg 
context_wa_bb_offset(const struct intel_context * ce)9951bb76ff1Sjsg static u32 context_wa_bb_offset(const struct intel_context *ce)
9961bb76ff1Sjsg {
9971bb76ff1Sjsg 	return PAGE_SIZE * ce->wa_bb_page;
9981bb76ff1Sjsg }
9991bb76ff1Sjsg 
context_indirect_bb(const struct intel_context * ce)10001bb76ff1Sjsg static u32 *context_indirect_bb(const struct intel_context *ce)
10011bb76ff1Sjsg {
10021bb76ff1Sjsg 	void *ptr;
10031bb76ff1Sjsg 
10041bb76ff1Sjsg 	GEM_BUG_ON(!ce->wa_bb_page);
10051bb76ff1Sjsg 
10061bb76ff1Sjsg 	ptr = ce->lrc_reg_state;
10071bb76ff1Sjsg 	ptr -= LRC_STATE_OFFSET; /* back to start of context image */
10081bb76ff1Sjsg 	ptr += context_wa_bb_offset(ce);
10091bb76ff1Sjsg 
10101bb76ff1Sjsg 	return ptr;
10111bb76ff1Sjsg }
10121bb76ff1Sjsg 
lrc_init_state(struct intel_context * ce,struct intel_engine_cs * engine,void * state)10135ca02815Sjsg void lrc_init_state(struct intel_context *ce,
10145ca02815Sjsg 		    struct intel_engine_cs *engine,
10155ca02815Sjsg 		    void *state)
10165ca02815Sjsg {
10175ca02815Sjsg 	bool inhibit = true;
10185ca02815Sjsg 
10195ca02815Sjsg 	set_redzone(state, engine);
10205ca02815Sjsg 
10215ca02815Sjsg 	if (engine->default_state) {
10225ca02815Sjsg #ifdef __linux__
10235ca02815Sjsg 		shmem_read(engine->default_state, 0,
10245ca02815Sjsg 			   state, engine->context_size);
10255ca02815Sjsg #else
10265ca02815Sjsg 		uao_read(engine->default_state, 0,
10275ca02815Sjsg 			   state, engine->context_size);
10285ca02815Sjsg #endif
10295ca02815Sjsg 		__set_bit(CONTEXT_VALID_BIT, &ce->flags);
10305ca02815Sjsg 		inhibit = false;
10315ca02815Sjsg 	}
10325ca02815Sjsg 
10335ca02815Sjsg 	/* Clear the ppHWSP (inc. per-context counters) */
10345ca02815Sjsg 	memset(state, 0, PAGE_SIZE);
10355ca02815Sjsg 
10361bb76ff1Sjsg 	/* Clear the indirect wa and storage */
10371bb76ff1Sjsg 	if (ce->wa_bb_page)
10381bb76ff1Sjsg 		memset(state + context_wa_bb_offset(ce), 0, PAGE_SIZE);
10391bb76ff1Sjsg 
10405ca02815Sjsg 	/*
10415ca02815Sjsg 	 * The second page of the context object contains some registers which
10425ca02815Sjsg 	 * must be set up prior to the first execution.
10435ca02815Sjsg 	 */
10445ca02815Sjsg 	__lrc_init_regs(state + LRC_STATE_OFFSET, ce, engine, inhibit);
10455ca02815Sjsg }
10465ca02815Sjsg 
lrc_indirect_bb(const struct intel_context * ce)10471bb76ff1Sjsg u32 lrc_indirect_bb(const struct intel_context *ce)
10481bb76ff1Sjsg {
10491bb76ff1Sjsg 	return i915_ggtt_offset(ce->state) + context_wa_bb_offset(ce);
10501bb76ff1Sjsg }
10511bb76ff1Sjsg 
setup_predicate_disable_wa(const struct intel_context * ce,u32 * cs)10521bb76ff1Sjsg static u32 *setup_predicate_disable_wa(const struct intel_context *ce, u32 *cs)
10531bb76ff1Sjsg {
10541bb76ff1Sjsg 	/* If predication is active, this will be noop'ed */
10551bb76ff1Sjsg 	*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT | (4 - 2);
10561bb76ff1Sjsg 	*cs++ = lrc_indirect_bb(ce) + DG2_PREDICATE_RESULT_WA;
10571bb76ff1Sjsg 	*cs++ = 0;
10581bb76ff1Sjsg 	*cs++ = 0; /* No predication */
10591bb76ff1Sjsg 
10601bb76ff1Sjsg 	/* predicated end, only terminates if SET_PREDICATE_RESULT:0 is clear */
10611bb76ff1Sjsg 	*cs++ = MI_BATCH_BUFFER_END | BIT(15);
10621bb76ff1Sjsg 	*cs++ = MI_SET_PREDICATE | MI_SET_PREDICATE_DISABLE;
10631bb76ff1Sjsg 
10641bb76ff1Sjsg 	/* Instructions are no longer predicated (disabled), we can proceed */
10651bb76ff1Sjsg 	*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT | (4 - 2);
10661bb76ff1Sjsg 	*cs++ = lrc_indirect_bb(ce) + DG2_PREDICATE_RESULT_WA;
10671bb76ff1Sjsg 	*cs++ = 0;
10681bb76ff1Sjsg 	*cs++ = 1; /* enable predication before the next BB */
10691bb76ff1Sjsg 
10701bb76ff1Sjsg 	*cs++ = MI_BATCH_BUFFER_END;
10711bb76ff1Sjsg 	GEM_BUG_ON(offset_in_page(cs) > DG2_PREDICATE_RESULT_WA);
10721bb76ff1Sjsg 
10731bb76ff1Sjsg 	return cs;
10741bb76ff1Sjsg }
10751bb76ff1Sjsg 
10765ca02815Sjsg static struct i915_vma *
__lrc_alloc_state(struct intel_context * ce,struct intel_engine_cs * engine)10775ca02815Sjsg __lrc_alloc_state(struct intel_context *ce, struct intel_engine_cs *engine)
10785ca02815Sjsg {
10795ca02815Sjsg 	struct drm_i915_gem_object *obj;
10805ca02815Sjsg 	struct i915_vma *vma;
10815ca02815Sjsg 	u32 context_size;
10825ca02815Sjsg 
10835ca02815Sjsg 	context_size = round_up(engine->context_size, I915_GTT_PAGE_SIZE);
10845ca02815Sjsg 
10855ca02815Sjsg 	if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
10865ca02815Sjsg 		context_size += I915_GTT_PAGE_SIZE; /* for redzone */
10875ca02815Sjsg 
1088f005ef32Sjsg 	if (GRAPHICS_VER(engine->i915) >= 12) {
10895ca02815Sjsg 		ce->wa_bb_page = context_size / PAGE_SIZE;
10905ca02815Sjsg 		context_size += PAGE_SIZE;
10915ca02815Sjsg 	}
10925ca02815Sjsg 
10931bb76ff1Sjsg 	if (intel_context_is_parent(ce) && intel_engine_uses_guc(engine)) {
10941bb76ff1Sjsg 		ce->parallel.guc.parent_page = context_size / PAGE_SIZE;
10951bb76ff1Sjsg 		context_size += PARENT_SCRATCH_SIZE;
10961bb76ff1Sjsg 	}
10971bb76ff1Sjsg 
10981bb76ff1Sjsg 	obj = i915_gem_object_create_lmem(engine->i915, context_size,
10991bb76ff1Sjsg 					  I915_BO_ALLOC_PM_VOLATILE);
1100f005ef32Sjsg 	if (IS_ERR(obj)) {
11015ca02815Sjsg 		obj = i915_gem_object_create_shmem(engine->i915, context_size);
11025ca02815Sjsg 		if (IS_ERR(obj))
11035ca02815Sjsg 			return ERR_CAST(obj);
11045ca02815Sjsg 
1105f005ef32Sjsg 		/*
1106f005ef32Sjsg 		 * Wa_22016122933: For Media version 13.0, all Media GT shared
1107f005ef32Sjsg 		 * memory needs to be mapped as WC on CPU side and UC (PAT
1108f005ef32Sjsg 		 * index 2) on GPU side.
1109f005ef32Sjsg 		 */
1110f005ef32Sjsg 		if (intel_gt_needs_wa_22016122933(engine->gt))
1111f005ef32Sjsg 			i915_gem_object_set_cache_coherency(obj, I915_CACHE_NONE);
1112f005ef32Sjsg 	}
1113f005ef32Sjsg 
11145ca02815Sjsg 	vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL);
11155ca02815Sjsg 	if (IS_ERR(vma)) {
11165ca02815Sjsg 		i915_gem_object_put(obj);
11175ca02815Sjsg 		return vma;
11185ca02815Sjsg 	}
11195ca02815Sjsg 
11205ca02815Sjsg 	return vma;
11215ca02815Sjsg }
11225ca02815Sjsg 
11235ca02815Sjsg static struct intel_timeline *
pinned_timeline(struct intel_context * ce,struct intel_engine_cs * engine)11245ca02815Sjsg pinned_timeline(struct intel_context *ce, struct intel_engine_cs *engine)
11255ca02815Sjsg {
11265ca02815Sjsg 	struct intel_timeline *tl = fetch_and_zero(&ce->timeline);
11275ca02815Sjsg 
11285ca02815Sjsg 	return intel_timeline_create_from_engine(engine, page_unmask_bits(tl));
11295ca02815Sjsg }
11305ca02815Sjsg 
lrc_alloc(struct intel_context * ce,struct intel_engine_cs * engine)11315ca02815Sjsg int lrc_alloc(struct intel_context *ce, struct intel_engine_cs *engine)
11325ca02815Sjsg {
11335ca02815Sjsg 	struct intel_ring *ring;
11345ca02815Sjsg 	struct i915_vma *vma;
11355ca02815Sjsg 	int err;
11365ca02815Sjsg 
11375ca02815Sjsg 	GEM_BUG_ON(ce->state);
11385ca02815Sjsg 
11395ca02815Sjsg 	vma = __lrc_alloc_state(ce, engine);
11405ca02815Sjsg 	if (IS_ERR(vma))
11415ca02815Sjsg 		return PTR_ERR(vma);
11425ca02815Sjsg 
11435ca02815Sjsg 	ring = intel_engine_create_ring(engine, ce->ring_size);
11445ca02815Sjsg 	if (IS_ERR(ring)) {
11455ca02815Sjsg 		err = PTR_ERR(ring);
11465ca02815Sjsg 		goto err_vma;
11475ca02815Sjsg 	}
11485ca02815Sjsg 
11495ca02815Sjsg 	if (!page_mask_bits(ce->timeline)) {
11505ca02815Sjsg 		struct intel_timeline *tl;
11515ca02815Sjsg 
11525ca02815Sjsg 		/*
11535ca02815Sjsg 		 * Use the static global HWSP for the kernel context, and
11545ca02815Sjsg 		 * a dynamically allocated cacheline for everyone else.
11555ca02815Sjsg 		 */
11565ca02815Sjsg 		if (unlikely(ce->timeline))
11575ca02815Sjsg 			tl = pinned_timeline(ce, engine);
11585ca02815Sjsg 		else
11595ca02815Sjsg 			tl = intel_timeline_create(engine->gt);
11605ca02815Sjsg 		if (IS_ERR(tl)) {
11615ca02815Sjsg 			err = PTR_ERR(tl);
11625ca02815Sjsg 			goto err_ring;
11635ca02815Sjsg 		}
11645ca02815Sjsg 
11655ca02815Sjsg 		ce->timeline = tl;
11665ca02815Sjsg 	}
11675ca02815Sjsg 
11685ca02815Sjsg 	ce->ring = ring;
11695ca02815Sjsg 	ce->state = vma;
11705ca02815Sjsg 
11715ca02815Sjsg 	return 0;
11725ca02815Sjsg 
11735ca02815Sjsg err_ring:
11745ca02815Sjsg 	intel_ring_put(ring);
11755ca02815Sjsg err_vma:
11765ca02815Sjsg 	i915_vma_put(vma);
11775ca02815Sjsg 	return err;
11785ca02815Sjsg }
11795ca02815Sjsg 
lrc_reset(struct intel_context * ce)11805ca02815Sjsg void lrc_reset(struct intel_context *ce)
11815ca02815Sjsg {
11825ca02815Sjsg 	GEM_BUG_ON(!intel_context_is_pinned(ce));
11835ca02815Sjsg 
11845ca02815Sjsg 	intel_ring_reset(ce->ring, ce->ring->emit);
11855ca02815Sjsg 
11865ca02815Sjsg 	/* Scrub away the garbage */
11875ca02815Sjsg 	lrc_init_regs(ce, ce->engine, true);
11885ca02815Sjsg 	ce->lrc.lrca = lrc_update_regs(ce, ce->engine, ce->ring->tail);
11895ca02815Sjsg }
11905ca02815Sjsg 
11915ca02815Sjsg int
lrc_pre_pin(struct intel_context * ce,struct intel_engine_cs * engine,struct i915_gem_ww_ctx * ww,void ** vaddr)11925ca02815Sjsg lrc_pre_pin(struct intel_context *ce,
11935ca02815Sjsg 	    struct intel_engine_cs *engine,
11945ca02815Sjsg 	    struct i915_gem_ww_ctx *ww,
11955ca02815Sjsg 	    void **vaddr)
11965ca02815Sjsg {
11975ca02815Sjsg 	GEM_BUG_ON(!ce->state);
11985ca02815Sjsg 	GEM_BUG_ON(!i915_vma_is_pinned(ce->state));
11995ca02815Sjsg 
12005ca02815Sjsg 	*vaddr = i915_gem_object_pin_map(ce->state->obj,
1201f005ef32Sjsg 					 intel_gt_coherent_map_type(ce->engine->gt,
12025ca02815Sjsg 								    ce->state->obj,
12035ca02815Sjsg 								    false) |
12045ca02815Sjsg 					 I915_MAP_OVERRIDE);
12055ca02815Sjsg 
12065ca02815Sjsg 	return PTR_ERR_OR_ZERO(*vaddr);
12075ca02815Sjsg }
12085ca02815Sjsg 
12095ca02815Sjsg int
lrc_pin(struct intel_context * ce,struct intel_engine_cs * engine,void * vaddr)12105ca02815Sjsg lrc_pin(struct intel_context *ce,
12115ca02815Sjsg 	struct intel_engine_cs *engine,
12125ca02815Sjsg 	void *vaddr)
12135ca02815Sjsg {
12145ca02815Sjsg 	ce->lrc_reg_state = vaddr + LRC_STATE_OFFSET;
12155ca02815Sjsg 
12165ca02815Sjsg 	if (!__test_and_set_bit(CONTEXT_INIT_BIT, &ce->flags))
12175ca02815Sjsg 		lrc_init_state(ce, engine, vaddr);
12185ca02815Sjsg 
12195ca02815Sjsg 	ce->lrc.lrca = lrc_update_regs(ce, engine, ce->ring->tail);
12205ca02815Sjsg 	return 0;
12215ca02815Sjsg }
12225ca02815Sjsg 
lrc_unpin(struct intel_context * ce)12235ca02815Sjsg void lrc_unpin(struct intel_context *ce)
1224c349dbc7Sjsg {
12251bb76ff1Sjsg 	if (unlikely(ce->parallel.last_rq)) {
12261bb76ff1Sjsg 		i915_request_put(ce->parallel.last_rq);
12271bb76ff1Sjsg 		ce->parallel.last_rq = NULL;
12281bb76ff1Sjsg 	}
1229ad8b1aafSjsg 	check_redzone((void *)ce->lrc_reg_state - LRC_STATE_OFFSET,
1230c349dbc7Sjsg 		      ce->engine);
1231ad8b1aafSjsg }
1232c349dbc7Sjsg 
lrc_post_unpin(struct intel_context * ce)12335ca02815Sjsg void lrc_post_unpin(struct intel_context *ce)
1234ad8b1aafSjsg {
1235c349dbc7Sjsg 	i915_gem_object_unpin_map(ce->state->obj);
1236c349dbc7Sjsg }
1237c349dbc7Sjsg 
lrc_fini(struct intel_context * ce)12385ca02815Sjsg void lrc_fini(struct intel_context *ce)
12395ca02815Sjsg {
12405ca02815Sjsg 	if (!ce->state)
12415ca02815Sjsg 		return;
12425ca02815Sjsg 
12435ca02815Sjsg 	intel_ring_put(fetch_and_zero(&ce->ring));
12445ca02815Sjsg 	i915_vma_put(fetch_and_zero(&ce->state));
12455ca02815Sjsg }
12465ca02815Sjsg 
lrc_destroy(struct kref * kref)12475ca02815Sjsg void lrc_destroy(struct kref *kref)
12485ca02815Sjsg {
12495ca02815Sjsg 	struct intel_context *ce = container_of(kref, typeof(*ce), ref);
12505ca02815Sjsg 
12515ca02815Sjsg 	GEM_BUG_ON(!i915_active_is_idle(&ce->active));
12525ca02815Sjsg 	GEM_BUG_ON(intel_context_is_pinned(ce));
12535ca02815Sjsg 
12545ca02815Sjsg 	lrc_fini(ce);
12555ca02815Sjsg 
12565ca02815Sjsg 	intel_context_fini(ce);
12575ca02815Sjsg 	intel_context_free(ce);
12585ca02815Sjsg }
12595ca02815Sjsg 
1260ad8b1aafSjsg static u32 *
gen12_emit_timestamp_wa(const struct intel_context * ce,u32 * cs)1261ad8b1aafSjsg gen12_emit_timestamp_wa(const struct intel_context *ce, u32 *cs)
1262ad8b1aafSjsg {
1263ad8b1aafSjsg 	*cs++ = MI_LOAD_REGISTER_MEM_GEN8 |
1264ad8b1aafSjsg 		MI_SRM_LRM_GLOBAL_GTT |
1265ad8b1aafSjsg 		MI_LRI_LRM_CS_MMIO;
1266ad8b1aafSjsg 	*cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
1267ad8b1aafSjsg 	*cs++ = i915_ggtt_offset(ce->state) + LRC_STATE_OFFSET +
1268ad8b1aafSjsg 		CTX_TIMESTAMP * sizeof(u32);
1269ad8b1aafSjsg 	*cs++ = 0;
1270ad8b1aafSjsg 
1271ad8b1aafSjsg 	*cs++ = MI_LOAD_REGISTER_REG |
1272ad8b1aafSjsg 		MI_LRR_SOURCE_CS_MMIO |
1273ad8b1aafSjsg 		MI_LRI_LRM_CS_MMIO;
1274ad8b1aafSjsg 	*cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
1275ad8b1aafSjsg 	*cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(0));
1276ad8b1aafSjsg 
1277ad8b1aafSjsg 	*cs++ = MI_LOAD_REGISTER_REG |
1278ad8b1aafSjsg 		MI_LRR_SOURCE_CS_MMIO |
1279ad8b1aafSjsg 		MI_LRI_LRM_CS_MMIO;
1280ad8b1aafSjsg 	*cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
1281ad8b1aafSjsg 	*cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(0));
1282ad8b1aafSjsg 
1283ad8b1aafSjsg 	return cs;
1284ad8b1aafSjsg }
1285ad8b1aafSjsg 
1286ad8b1aafSjsg static u32 *
gen12_emit_restore_scratch(const struct intel_context * ce,u32 * cs)1287ad8b1aafSjsg gen12_emit_restore_scratch(const struct intel_context *ce, u32 *cs)
1288ad8b1aafSjsg {
1289ad8b1aafSjsg 	GEM_BUG_ON(lrc_ring_gpr0(ce->engine) == -1);
1290ad8b1aafSjsg 
1291ad8b1aafSjsg 	*cs++ = MI_LOAD_REGISTER_MEM_GEN8 |
1292ad8b1aafSjsg 		MI_SRM_LRM_GLOBAL_GTT |
1293ad8b1aafSjsg 		MI_LRI_LRM_CS_MMIO;
1294ad8b1aafSjsg 	*cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
1295ad8b1aafSjsg 	*cs++ = i915_ggtt_offset(ce->state) + LRC_STATE_OFFSET +
1296ad8b1aafSjsg 		(lrc_ring_gpr0(ce->engine) + 1) * sizeof(u32);
1297ad8b1aafSjsg 	*cs++ = 0;
1298ad8b1aafSjsg 
1299ad8b1aafSjsg 	return cs;
1300ad8b1aafSjsg }
1301ad8b1aafSjsg 
1302ad8b1aafSjsg static u32 *
gen12_emit_cmd_buf_wa(const struct intel_context * ce,u32 * cs)1303ad8b1aafSjsg gen12_emit_cmd_buf_wa(const struct intel_context *ce, u32 *cs)
1304ad8b1aafSjsg {
1305ad8b1aafSjsg 	GEM_BUG_ON(lrc_ring_cmd_buf_cctl(ce->engine) == -1);
1306ad8b1aafSjsg 
1307ad8b1aafSjsg 	*cs++ = MI_LOAD_REGISTER_MEM_GEN8 |
1308ad8b1aafSjsg 		MI_SRM_LRM_GLOBAL_GTT |
1309ad8b1aafSjsg 		MI_LRI_LRM_CS_MMIO;
1310ad8b1aafSjsg 	*cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
1311ad8b1aafSjsg 	*cs++ = i915_ggtt_offset(ce->state) + LRC_STATE_OFFSET +
1312ad8b1aafSjsg 		(lrc_ring_cmd_buf_cctl(ce->engine) + 1) * sizeof(u32);
1313ad8b1aafSjsg 	*cs++ = 0;
1314ad8b1aafSjsg 
1315ad8b1aafSjsg 	*cs++ = MI_LOAD_REGISTER_REG |
1316ad8b1aafSjsg 		MI_LRR_SOURCE_CS_MMIO |
1317ad8b1aafSjsg 		MI_LRI_LRM_CS_MMIO;
1318ad8b1aafSjsg 	*cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
1319ad8b1aafSjsg 	*cs++ = i915_mmio_reg_offset(RING_CMD_BUF_CCTL(0));
1320ad8b1aafSjsg 
1321ad8b1aafSjsg 	return cs;
1322ad8b1aafSjsg }
1323ad8b1aafSjsg 
13241bb76ff1Sjsg /*
13251bb76ff1Sjsg  * The bspec's tuning guide asks us to program a vertical watermark value of
13261bb76ff1Sjsg  * 0x3FF.  However this register is not saved/restored properly by the
13271bb76ff1Sjsg  * hardware, so we're required to apply the desired value via INDIRECT_CTX
13281bb76ff1Sjsg  * batch buffer to ensure the value takes effect properly.  All other bits
13291bb76ff1Sjsg  * in this register should remain at 0 (the hardware default).
13301bb76ff1Sjsg  */
13311bb76ff1Sjsg static u32 *
dg2_emit_draw_watermark_setting(u32 * cs)13321bb76ff1Sjsg dg2_emit_draw_watermark_setting(u32 *cs)
13331bb76ff1Sjsg {
13341bb76ff1Sjsg 	*cs++ = MI_LOAD_REGISTER_IMM(1);
13351bb76ff1Sjsg 	*cs++ = i915_mmio_reg_offset(DRAW_WATERMARK);
13361bb76ff1Sjsg 	*cs++ = REG_FIELD_PREP(VERT_WM_VAL, 0x3FF);
13371bb76ff1Sjsg 
13381bb76ff1Sjsg 	return cs;
13391bb76ff1Sjsg }
13401bb76ff1Sjsg 
1341ad8b1aafSjsg static u32 *
gen12_emit_indirect_ctx_rcs(const struct intel_context * ce,u32 * cs)1342ad8b1aafSjsg gen12_emit_indirect_ctx_rcs(const struct intel_context *ce, u32 *cs)
1343ad8b1aafSjsg {
1344ad8b1aafSjsg 	cs = gen12_emit_timestamp_wa(ce, cs);
1345ad8b1aafSjsg 	cs = gen12_emit_cmd_buf_wa(ce, cs);
1346ad8b1aafSjsg 	cs = gen12_emit_restore_scratch(ce, cs);
1347ad8b1aafSjsg 
13481bb76ff1Sjsg 	/* Wa_16013000631:dg2 */
1349d412e58aSjsg 	if (IS_DG2_G11(ce->engine->i915))
13501bb76ff1Sjsg 		cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE, 0);
13511bb76ff1Sjsg 
1352df2f834eSjsg 	cs = gen12_emit_aux_table_inv(ce->engine, cs);
13531bb76ff1Sjsg 
13541bb76ff1Sjsg 	/* Wa_16014892111 */
1355*596b6869Sjsg 	if (IS_GFX_GT_IP_STEP(ce->engine->gt, IP_VER(12, 70), STEP_A0, STEP_B0) ||
1356*596b6869Sjsg 	    IS_GFX_GT_IP_STEP(ce->engine->gt, IP_VER(12, 71), STEP_A0, STEP_B0) ||
1357f005ef32Sjsg 	    IS_DG2(ce->engine->i915))
13581bb76ff1Sjsg 		cs = dg2_emit_draw_watermark_setting(cs);
13591bb76ff1Sjsg 
1360ad8b1aafSjsg 	return cs;
1361ad8b1aafSjsg }
1362ad8b1aafSjsg 
1363ad8b1aafSjsg static u32 *
gen12_emit_indirect_ctx_xcs(const struct intel_context * ce,u32 * cs)1364ad8b1aafSjsg gen12_emit_indirect_ctx_xcs(const struct intel_context *ce, u32 *cs)
1365ad8b1aafSjsg {
1366ad8b1aafSjsg 	cs = gen12_emit_timestamp_wa(ce, cs);
1367ad8b1aafSjsg 	cs = gen12_emit_restore_scratch(ce, cs);
1368ad8b1aafSjsg 
13691bb76ff1Sjsg 	/* Wa_16013000631:dg2 */
1370d412e58aSjsg 	if (IS_DG2_G11(ce->engine->i915))
13711bb76ff1Sjsg 		if (ce->engine->class == COMPUTE_CLASS)
13721bb76ff1Sjsg 			cs = gen8_emit_pipe_control(cs,
13731bb76ff1Sjsg 						    PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE,
13741bb76ff1Sjsg 						    0);
13751bb76ff1Sjsg 
1376df2f834eSjsg 	return gen12_emit_aux_table_inv(ce->engine, cs);
1377ad8b1aafSjsg }
1378ad8b1aafSjsg 
1379ad8b1aafSjsg static void
setup_indirect_ctx_bb(const struct intel_context * ce,const struct intel_engine_cs * engine,u32 * (* emit)(const struct intel_context *,u32 *))1380ad8b1aafSjsg setup_indirect_ctx_bb(const struct intel_context *ce,
1381ad8b1aafSjsg 		      const struct intel_engine_cs *engine,
1382ad8b1aafSjsg 		      u32 *(*emit)(const struct intel_context *, u32 *))
1383ad8b1aafSjsg {
1384ad8b1aafSjsg 	u32 * const start = context_indirect_bb(ce);
1385ad8b1aafSjsg 	u32 *cs;
1386ad8b1aafSjsg 
1387ad8b1aafSjsg 	cs = emit(ce, start);
1388ad8b1aafSjsg 	GEM_BUG_ON(cs - start > I915_GTT_PAGE_SIZE / sizeof(*cs));
1389ad8b1aafSjsg 	while ((unsigned long)cs % CACHELINE_BYTES)
1390ad8b1aafSjsg 		*cs++ = MI_NOOP;
1391ad8b1aafSjsg 
13921bb76ff1Sjsg 	GEM_BUG_ON(cs - start > DG2_PREDICATE_RESULT_BB / sizeof(*start));
13931bb76ff1Sjsg 	setup_predicate_disable_wa(ce, start + DG2_PREDICATE_RESULT_BB / sizeof(*start));
13941bb76ff1Sjsg 
13955ca02815Sjsg 	lrc_setup_indirect_ctx(ce->lrc_reg_state, engine,
13961bb76ff1Sjsg 			       lrc_indirect_bb(ce),
1397ad8b1aafSjsg 			       (cs - start) * sizeof(*cs));
1398ad8b1aafSjsg }
1399ad8b1aafSjsg 
14005ca02815Sjsg /*
14015ca02815Sjsg  * The context descriptor encodes various attributes of a context,
14025ca02815Sjsg  * including its GTT address and some flags. Because it's fairly
14035ca02815Sjsg  * expensive to calculate, we'll just do it once and cache the result,
14045ca02815Sjsg  * which remains valid until the context is unpinned.
14055ca02815Sjsg  *
14065ca02815Sjsg  * This is what a descriptor looks like, from LSB to MSB::
14075ca02815Sjsg  *
14085ca02815Sjsg  *      bits  0-11:    flags, GEN8_CTX_* (cached in ctx->desc_template)
14095ca02815Sjsg  *      bits 12-31:    LRCA, GTT address of (the HWSP of) this context
14105ca02815Sjsg  *      bits 32-52:    ctx ID, a globally unique tag (highest bit used by GuC)
14115ca02815Sjsg  *      bits 53-54:    mbz, reserved for use by hardware
14125ca02815Sjsg  *      bits 55-63:    group ID, currently unused and set to 0
14135ca02815Sjsg  *
14145ca02815Sjsg  * Starting from Gen11, the upper dword of the descriptor has a new format:
14155ca02815Sjsg  *
14165ca02815Sjsg  *      bits 32-36:    reserved
14175ca02815Sjsg  *      bits 37-47:    SW context ID
14185ca02815Sjsg  *      bits 48:53:    engine instance
14195ca02815Sjsg  *      bit 54:        mbz, reserved for use by hardware
14205ca02815Sjsg  *      bits 55-60:    SW counter
14215ca02815Sjsg  *      bits 61-63:    engine class
14225ca02815Sjsg  *
14235ca02815Sjsg  * On Xe_HP, the upper dword of the descriptor has a new format:
14245ca02815Sjsg  *
14255ca02815Sjsg  *      bits 32-37:    virtual function number
14265ca02815Sjsg  *      bit 38:        mbz, reserved for use by hardware
14275ca02815Sjsg  *      bits 39-54:    SW context ID
14285ca02815Sjsg  *      bits 55-57:    reserved
14295ca02815Sjsg  *      bits 58-63:    SW counter
14305ca02815Sjsg  *
14315ca02815Sjsg  * engine info, SW context ID and SW counter need to form a unique number
14325ca02815Sjsg  * (Context ID) per lrc.
14335ca02815Sjsg  */
lrc_descriptor(const struct intel_context * ce)14345ca02815Sjsg static u32 lrc_descriptor(const struct intel_context *ce)
14355ca02815Sjsg {
14365ca02815Sjsg 	u32 desc;
14375ca02815Sjsg 
14385ca02815Sjsg 	desc = INTEL_LEGACY_32B_CONTEXT;
14395ca02815Sjsg 	if (i915_vm_is_4lvl(ce->vm))
14405ca02815Sjsg 		desc = INTEL_LEGACY_64B_CONTEXT;
14415ca02815Sjsg 	desc <<= GEN8_CTX_ADDRESSING_MODE_SHIFT;
14425ca02815Sjsg 
14435ca02815Sjsg 	desc |= GEN8_CTX_VALID | GEN8_CTX_PRIVILEGE;
14445ca02815Sjsg 	if (GRAPHICS_VER(ce->vm->i915) == 8)
14455ca02815Sjsg 		desc |= GEN8_CTX_L3LLC_COHERENT;
14465ca02815Sjsg 
14475ca02815Sjsg 	return i915_ggtt_offset(ce->state) | desc;
14485ca02815Sjsg }
14495ca02815Sjsg 
lrc_update_regs(const struct intel_context * ce,const struct intel_engine_cs * engine,u32 head)14505ca02815Sjsg u32 lrc_update_regs(const struct intel_context *ce,
1451c349dbc7Sjsg 		    const struct intel_engine_cs *engine,
1452c349dbc7Sjsg 		    u32 head)
1453c349dbc7Sjsg {
1454c349dbc7Sjsg 	struct intel_ring *ring = ce->ring;
1455c349dbc7Sjsg 	u32 *regs = ce->lrc_reg_state;
1456c349dbc7Sjsg 
1457c349dbc7Sjsg 	GEM_BUG_ON(!intel_ring_offset_valid(ring, head));
1458c349dbc7Sjsg 	GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->tail));
1459c349dbc7Sjsg 
1460c349dbc7Sjsg 	regs[CTX_RING_START] = i915_ggtt_offset(ring->vma);
1461c349dbc7Sjsg 	regs[CTX_RING_HEAD] = head;
1462c349dbc7Sjsg 	regs[CTX_RING_TAIL] = ring->tail;
1463c349dbc7Sjsg 	regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID;
1464c349dbc7Sjsg 
1465c349dbc7Sjsg 	/* RPCS */
1466c349dbc7Sjsg 	if (engine->class == RENDER_CLASS) {
1467c349dbc7Sjsg 		regs[CTX_R_PWR_CLK_STATE] =
1468ad8b1aafSjsg 			intel_sseu_make_rpcs(engine->gt, &ce->sseu);
1469c349dbc7Sjsg 
1470c349dbc7Sjsg 		i915_oa_init_reg_state(ce, engine);
1471c349dbc7Sjsg 	}
1472ad8b1aafSjsg 
1473ad8b1aafSjsg 	if (ce->wa_bb_page) {
1474ad8b1aafSjsg 		u32 *(*fn)(const struct intel_context *ce, u32 *cs);
1475ad8b1aafSjsg 
1476ad8b1aafSjsg 		fn = gen12_emit_indirect_ctx_xcs;
1477ad8b1aafSjsg 		if (ce->engine->class == RENDER_CLASS)
1478ad8b1aafSjsg 			fn = gen12_emit_indirect_ctx_rcs;
1479ad8b1aafSjsg 
1480ad8b1aafSjsg 		/* Mutually exclusive wrt to global indirect bb */
1481ad8b1aafSjsg 		GEM_BUG_ON(engine->wa_ctx.indirect_ctx.size);
1482ad8b1aafSjsg 		setup_indirect_ctx_bb(ce, engine, fn);
1483ad8b1aafSjsg 	}
14845ca02815Sjsg 
14855ca02815Sjsg 	return lrc_descriptor(ce) | CTX_DESC_FORCE_RESTORE;
1486c349dbc7Sjsg }
1487c349dbc7Sjsg 
lrc_update_offsets(struct intel_context * ce,struct intel_engine_cs * engine)14885ca02815Sjsg void lrc_update_offsets(struct intel_context *ce,
14895ca02815Sjsg 			struct intel_engine_cs *engine)
1490c349dbc7Sjsg {
14915ca02815Sjsg 	set_offsets(ce->lrc_reg_state, reg_offsets(engine), engine, false);
1492ad8b1aafSjsg }
1493ad8b1aafSjsg 
lrc_check_regs(const struct intel_context * ce,const struct intel_engine_cs * engine,const char * when)14945ca02815Sjsg void lrc_check_regs(const struct intel_context *ce,
14955ca02815Sjsg 		    const struct intel_engine_cs *engine,
14965ca02815Sjsg 		    const char *when)
1497ad8b1aafSjsg {
14985ca02815Sjsg 	const struct intel_ring *ring = ce->ring;
14995ca02815Sjsg 	u32 *regs = ce->lrc_reg_state;
15005ca02815Sjsg 	bool valid = true;
15015ca02815Sjsg 	int x;
1502c349dbc7Sjsg 
15035ca02815Sjsg 	if (regs[CTX_RING_START] != i915_ggtt_offset(ring->vma)) {
15045ca02815Sjsg 		pr_err("%s: context submitted with incorrect RING_START [%08x], expected %08x\n",
15055ca02815Sjsg 		       engine->name,
15065ca02815Sjsg 		       regs[CTX_RING_START],
15075ca02815Sjsg 		       i915_ggtt_offset(ring->vma));
15085ca02815Sjsg 		regs[CTX_RING_START] = i915_ggtt_offset(ring->vma);
15095ca02815Sjsg 		valid = false;
1510c349dbc7Sjsg 	}
1511c349dbc7Sjsg 
15125ca02815Sjsg 	if ((regs[CTX_RING_CTL] & ~(RING_WAIT | RING_WAIT_SEMAPHORE)) !=
15135ca02815Sjsg 	    (RING_CTL_SIZE(ring->size) | RING_VALID)) {
15145ca02815Sjsg 		pr_err("%s: context submitted with incorrect RING_CTL [%08x], expected %08x\n",
15155ca02815Sjsg 		       engine->name,
15165ca02815Sjsg 		       regs[CTX_RING_CTL],
15175ca02815Sjsg 		       (u32)(RING_CTL_SIZE(ring->size) | RING_VALID));
15185ca02815Sjsg 		regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID;
15195ca02815Sjsg 		valid = false;
1520c349dbc7Sjsg 	}
1521c349dbc7Sjsg 
15225ca02815Sjsg 	x = lrc_ring_mi_mode(engine);
15235ca02815Sjsg 	if (x != -1 && regs[x + 1] & (regs[x + 1] >> 16) & STOP_RING) {
15245ca02815Sjsg 		pr_err("%s: context submitted with STOP_RING [%08x] in RING_MI_MODE\n",
15255ca02815Sjsg 		       engine->name, regs[x + 1]);
15265ca02815Sjsg 		regs[x + 1] &= ~STOP_RING;
15275ca02815Sjsg 		regs[x + 1] |= STOP_RING << 16;
15285ca02815Sjsg 		valid = false;
1529c349dbc7Sjsg 	}
1530c349dbc7Sjsg 
15315ca02815Sjsg 	WARN_ONCE(!valid, "Invalid lrc state found %s submission\n", when);
1532c349dbc7Sjsg }
1533c349dbc7Sjsg 
1534c349dbc7Sjsg /*
1535c349dbc7Sjsg  * In this WA we need to set GEN8_L3SQCREG4[21:21] and reset it after
1536c349dbc7Sjsg  * PIPE_CONTROL instruction. This is required for the flush to happen correctly
1537c349dbc7Sjsg  * but there is a slight complication as this is applied in WA batch where the
1538c349dbc7Sjsg  * values are only initialized once so we cannot take register value at the
1539c349dbc7Sjsg  * beginning and reuse it further; hence we save its value to memory, upload a
1540c349dbc7Sjsg  * constant value with bit21 set and then we restore it back with the saved value.
1541c349dbc7Sjsg  * To simplify the WA, a constant value is formed by using the default value
1542c349dbc7Sjsg  * of this register. This shouldn't be a problem because we are only modifying
1543c349dbc7Sjsg  * it for a short period and this batch in non-premptible. We can ofcourse
1544c349dbc7Sjsg  * use additional instructions that read the actual value of the register
1545c349dbc7Sjsg  * at that time and set our bit of interest but it makes the WA complicated.
1546c349dbc7Sjsg  *
1547c349dbc7Sjsg  * This WA is also required for Gen9 so extracting as a function avoids
1548c349dbc7Sjsg  * code duplication.
1549c349dbc7Sjsg  */
1550c349dbc7Sjsg static u32 *
gen8_emit_flush_coherentl3_wa(struct intel_engine_cs * engine,u32 * batch)1551c349dbc7Sjsg gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, u32 *batch)
1552c349dbc7Sjsg {
1553c349dbc7Sjsg 	/* NB no one else is allowed to scribble over scratch + 256! */
1554c349dbc7Sjsg 	*batch++ = MI_STORE_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT;
1555c349dbc7Sjsg 	*batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
1556c349dbc7Sjsg 	*batch++ = intel_gt_scratch_offset(engine->gt,
1557c349dbc7Sjsg 					   INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA);
1558c349dbc7Sjsg 	*batch++ = 0;
1559c349dbc7Sjsg 
1560c349dbc7Sjsg 	*batch++ = MI_LOAD_REGISTER_IMM(1);
1561c349dbc7Sjsg 	*batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
1562c349dbc7Sjsg 	*batch++ = 0x40400000 | GEN8_LQSC_FLUSH_COHERENT_LINES;
1563c349dbc7Sjsg 
1564c349dbc7Sjsg 	batch = gen8_emit_pipe_control(batch,
1565c349dbc7Sjsg 				       PIPE_CONTROL_CS_STALL |
1566c349dbc7Sjsg 				       PIPE_CONTROL_DC_FLUSH_ENABLE,
1567c349dbc7Sjsg 				       0);
1568c349dbc7Sjsg 
1569c349dbc7Sjsg 	*batch++ = MI_LOAD_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT;
1570c349dbc7Sjsg 	*batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
1571c349dbc7Sjsg 	*batch++ = intel_gt_scratch_offset(engine->gt,
1572c349dbc7Sjsg 					   INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA);
1573c349dbc7Sjsg 	*batch++ = 0;
1574c349dbc7Sjsg 
1575c349dbc7Sjsg 	return batch;
1576c349dbc7Sjsg }
1577c349dbc7Sjsg 
1578c349dbc7Sjsg /*
1579c349dbc7Sjsg  * Typically we only have one indirect_ctx and per_ctx batch buffer which are
1580c349dbc7Sjsg  * initialized at the beginning and shared across all contexts but this field
1581c349dbc7Sjsg  * helps us to have multiple batches at different offsets and select them based
1582c349dbc7Sjsg  * on a criteria. At the moment this batch always start at the beginning of the page
1583c349dbc7Sjsg  * and at this point we don't have multiple wa_ctx batch buffers.
1584c349dbc7Sjsg  *
1585c349dbc7Sjsg  * The number of WA applied are not known at the beginning; we use this field
1586c349dbc7Sjsg  * to return the no of DWORDS written.
1587c349dbc7Sjsg  *
1588c349dbc7Sjsg  * It is to be noted that this batch does not contain MI_BATCH_BUFFER_END
1589c349dbc7Sjsg  * so it adds NOOPs as padding to make it cacheline aligned.
1590c349dbc7Sjsg  * MI_BATCH_BUFFER_END will be added to perctx batch and both of them together
1591c349dbc7Sjsg  * makes a complete batch buffer.
1592c349dbc7Sjsg  */
gen8_init_indirectctx_bb(struct intel_engine_cs * engine,u32 * batch)1593c349dbc7Sjsg static u32 *gen8_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
1594c349dbc7Sjsg {
1595c349dbc7Sjsg 	/* WaDisableCtxRestoreArbitration:bdw,chv */
1596c349dbc7Sjsg 	*batch++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
1597c349dbc7Sjsg 
1598c349dbc7Sjsg 	/* WaFlushCoherentL3CacheLinesAtContextSwitch:bdw */
1599c349dbc7Sjsg 	if (IS_BROADWELL(engine->i915))
1600c349dbc7Sjsg 		batch = gen8_emit_flush_coherentl3_wa(engine, batch);
1601c349dbc7Sjsg 
1602c349dbc7Sjsg 	/* WaClearSlmSpaceAtContextSwitch:bdw,chv */
1603c349dbc7Sjsg 	/* Actual scratch location is at 128 bytes offset */
1604c349dbc7Sjsg 	batch = gen8_emit_pipe_control(batch,
1605c349dbc7Sjsg 				       PIPE_CONTROL_FLUSH_L3 |
1606c349dbc7Sjsg 				       PIPE_CONTROL_STORE_DATA_INDEX |
1607c349dbc7Sjsg 				       PIPE_CONTROL_CS_STALL |
1608c349dbc7Sjsg 				       PIPE_CONTROL_QW_WRITE,
1609c349dbc7Sjsg 				       LRC_PPHWSP_SCRATCH_ADDR);
1610c349dbc7Sjsg 
1611c349dbc7Sjsg 	*batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
1612c349dbc7Sjsg 
1613c349dbc7Sjsg 	/* Pad to end of cacheline */
1614c349dbc7Sjsg 	while ((unsigned long)batch % CACHELINE_BYTES)
1615c349dbc7Sjsg 		*batch++ = MI_NOOP;
1616c349dbc7Sjsg 
1617c349dbc7Sjsg 	/*
1618c349dbc7Sjsg 	 * MI_BATCH_BUFFER_END is not required in Indirect ctx BB because
1619c349dbc7Sjsg 	 * execution depends on the length specified in terms of cache lines
1620c349dbc7Sjsg 	 * in the register CTX_RCS_INDIRECT_CTX
1621c349dbc7Sjsg 	 */
1622c349dbc7Sjsg 
1623c349dbc7Sjsg 	return batch;
1624c349dbc7Sjsg }
1625c349dbc7Sjsg 
1626c349dbc7Sjsg struct lri {
1627c349dbc7Sjsg 	i915_reg_t reg;
1628c349dbc7Sjsg 	u32 value;
1629c349dbc7Sjsg };
1630c349dbc7Sjsg 
emit_lri(u32 * batch,const struct lri * lri,unsigned int count)1631c349dbc7Sjsg static u32 *emit_lri(u32 *batch, const struct lri *lri, unsigned int count)
1632c349dbc7Sjsg {
1633c349dbc7Sjsg 	GEM_BUG_ON(!count || count > 63);
1634c349dbc7Sjsg 
1635c349dbc7Sjsg 	*batch++ = MI_LOAD_REGISTER_IMM(count);
1636c349dbc7Sjsg 	do {
1637c349dbc7Sjsg 		*batch++ = i915_mmio_reg_offset(lri->reg);
1638c349dbc7Sjsg 		*batch++ = lri->value;
1639c349dbc7Sjsg 	} while (lri++, --count);
1640c349dbc7Sjsg 	*batch++ = MI_NOOP;
1641c349dbc7Sjsg 
1642c349dbc7Sjsg 	return batch;
1643c349dbc7Sjsg }
1644c349dbc7Sjsg 
gen9_init_indirectctx_bb(struct intel_engine_cs * engine,u32 * batch)1645c349dbc7Sjsg static u32 *gen9_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
1646c349dbc7Sjsg {
1647c349dbc7Sjsg 	static const struct lri lri[] = {
1648c349dbc7Sjsg 		/* WaDisableGatherAtSetShaderCommonSlice:skl,bxt,kbl,glk */
1649c349dbc7Sjsg 		{
1650c349dbc7Sjsg 			COMMON_SLICE_CHICKEN2,
1651c349dbc7Sjsg 			__MASKED_FIELD(GEN9_DISABLE_GATHER_AT_SET_SHADER_COMMON_SLICE,
1652c349dbc7Sjsg 				       0),
1653c349dbc7Sjsg 		},
1654c349dbc7Sjsg 
1655c349dbc7Sjsg 		/* BSpec: 11391 */
1656c349dbc7Sjsg 		{
1657c349dbc7Sjsg 			FF_SLICE_CHICKEN,
1658c349dbc7Sjsg 			__MASKED_FIELD(FF_SLICE_CHICKEN_CL_PROVOKING_VERTEX_FIX,
1659c349dbc7Sjsg 				       FF_SLICE_CHICKEN_CL_PROVOKING_VERTEX_FIX),
1660c349dbc7Sjsg 		},
1661c349dbc7Sjsg 
1662c349dbc7Sjsg 		/* BSpec: 11299 */
1663c349dbc7Sjsg 		{
1664c349dbc7Sjsg 			_3D_CHICKEN3,
1665c349dbc7Sjsg 			__MASKED_FIELD(_3D_CHICKEN_SF_PROVOKING_VERTEX_FIX,
1666c349dbc7Sjsg 				       _3D_CHICKEN_SF_PROVOKING_VERTEX_FIX),
1667c349dbc7Sjsg 		}
1668c349dbc7Sjsg 	};
1669c349dbc7Sjsg 
1670c349dbc7Sjsg 	*batch++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
1671c349dbc7Sjsg 
1672c349dbc7Sjsg 	/* WaFlushCoherentL3CacheLinesAtContextSwitch:skl,bxt,glk */
1673c349dbc7Sjsg 	batch = gen8_emit_flush_coherentl3_wa(engine, batch);
1674c349dbc7Sjsg 
1675c349dbc7Sjsg 	/* WaClearSlmSpaceAtContextSwitch:skl,bxt,kbl,glk,cfl */
1676c349dbc7Sjsg 	batch = gen8_emit_pipe_control(batch,
1677c349dbc7Sjsg 				       PIPE_CONTROL_FLUSH_L3 |
1678c349dbc7Sjsg 				       PIPE_CONTROL_STORE_DATA_INDEX |
1679c349dbc7Sjsg 				       PIPE_CONTROL_CS_STALL |
1680c349dbc7Sjsg 				       PIPE_CONTROL_QW_WRITE,
1681c349dbc7Sjsg 				       LRC_PPHWSP_SCRATCH_ADDR);
1682c349dbc7Sjsg 
1683c349dbc7Sjsg 	batch = emit_lri(batch, lri, ARRAY_SIZE(lri));
1684c349dbc7Sjsg 
1685c349dbc7Sjsg 	/* WaMediaPoolStateCmdInWABB:bxt,glk */
1686c349dbc7Sjsg 	if (HAS_POOLED_EU(engine->i915)) {
1687c349dbc7Sjsg 		/*
1688c349dbc7Sjsg 		 * EU pool configuration is setup along with golden context
1689c349dbc7Sjsg 		 * during context initialization. This value depends on
1690c349dbc7Sjsg 		 * device type (2x6 or 3x6) and needs to be updated based
1691c349dbc7Sjsg 		 * on which subslice is disabled especially for 2x6
1692c349dbc7Sjsg 		 * devices, however it is safe to load default
1693c349dbc7Sjsg 		 * configuration of 3x6 device instead of masking off
1694c349dbc7Sjsg 		 * corresponding bits because HW ignores bits of a disabled
1695c349dbc7Sjsg 		 * subslice and drops down to appropriate config. Please
1696c349dbc7Sjsg 		 * see render_state_setup() in i915_gem_render_state.c for
1697c349dbc7Sjsg 		 * possible configurations, to avoid duplication they are
1698c349dbc7Sjsg 		 * not shown here again.
1699c349dbc7Sjsg 		 */
1700c349dbc7Sjsg 		*batch++ = GEN9_MEDIA_POOL_STATE;
1701c349dbc7Sjsg 		*batch++ = GEN9_MEDIA_POOL_ENABLE;
1702c349dbc7Sjsg 		*batch++ = 0x00777000;
1703c349dbc7Sjsg 		*batch++ = 0;
1704c349dbc7Sjsg 		*batch++ = 0;
1705c349dbc7Sjsg 		*batch++ = 0;
1706c349dbc7Sjsg 	}
1707c349dbc7Sjsg 
1708c349dbc7Sjsg 	*batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
1709c349dbc7Sjsg 
1710c349dbc7Sjsg 	/* Pad to end of cacheline */
1711c349dbc7Sjsg 	while ((unsigned long)batch % CACHELINE_BYTES)
1712c349dbc7Sjsg 		*batch++ = MI_NOOP;
1713c349dbc7Sjsg 
1714c349dbc7Sjsg 	return batch;
1715c349dbc7Sjsg }
1716c349dbc7Sjsg 
17175ca02815Sjsg #define CTX_WA_BB_SIZE (PAGE_SIZE)
1718c349dbc7Sjsg 
lrc_create_wa_ctx(struct intel_engine_cs * engine)17195ca02815Sjsg static int lrc_create_wa_ctx(struct intel_engine_cs *engine)
1720c349dbc7Sjsg {
1721c349dbc7Sjsg 	struct drm_i915_gem_object *obj;
1722c349dbc7Sjsg 	struct i915_vma *vma;
1723c349dbc7Sjsg 	int err;
1724c349dbc7Sjsg 
17255ca02815Sjsg 	obj = i915_gem_object_create_shmem(engine->i915, CTX_WA_BB_SIZE);
1726c349dbc7Sjsg 	if (IS_ERR(obj))
1727c349dbc7Sjsg 		return PTR_ERR(obj);
1728c349dbc7Sjsg 
1729c349dbc7Sjsg 	vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL);
1730c349dbc7Sjsg 	if (IS_ERR(vma)) {
1731c349dbc7Sjsg 		err = PTR_ERR(vma);
1732c349dbc7Sjsg 		goto err;
1733c349dbc7Sjsg 	}
1734c349dbc7Sjsg 
1735c349dbc7Sjsg 	engine->wa_ctx.vma = vma;
1736c349dbc7Sjsg 	return 0;
1737c349dbc7Sjsg 
1738c349dbc7Sjsg err:
1739c349dbc7Sjsg 	i915_gem_object_put(obj);
1740c349dbc7Sjsg 	return err;
1741c349dbc7Sjsg }
1742c349dbc7Sjsg 
lrc_fini_wa_ctx(struct intel_engine_cs * engine)17435ca02815Sjsg void lrc_fini_wa_ctx(struct intel_engine_cs *engine)
1744c349dbc7Sjsg {
1745c349dbc7Sjsg 	i915_vma_unpin_and_release(&engine->wa_ctx.vma, 0);
1746c349dbc7Sjsg }
1747c349dbc7Sjsg 
1748c349dbc7Sjsg typedef u32 *(*wa_bb_func_t)(struct intel_engine_cs *engine, u32 *batch);
1749c349dbc7Sjsg 
lrc_init_wa_ctx(struct intel_engine_cs * engine)17505ca02815Sjsg void lrc_init_wa_ctx(struct intel_engine_cs *engine)
1751c349dbc7Sjsg {
1752c349dbc7Sjsg 	struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx;
17535ca02815Sjsg 	struct i915_wa_ctx_bb *wa_bb[] = {
17545ca02815Sjsg 		&wa_ctx->indirect_ctx, &wa_ctx->per_ctx
17555ca02815Sjsg 	};
17565ca02815Sjsg 	wa_bb_func_t wa_bb_fn[ARRAY_SIZE(wa_bb)];
17575ca02815Sjsg 	struct i915_gem_ww_ctx ww;
1758c349dbc7Sjsg 	void *batch, *batch_ptr;
1759c349dbc7Sjsg 	unsigned int i;
17605ca02815Sjsg 	int err;
1761c349dbc7Sjsg 
1762f005ef32Sjsg 	if (GRAPHICS_VER(engine->i915) >= 11 ||
1763f005ef32Sjsg 	    !(engine->flags & I915_ENGINE_HAS_RCS_REG_STATE))
17645ca02815Sjsg 		return;
1765c349dbc7Sjsg 
1766f005ef32Sjsg 	if (GRAPHICS_VER(engine->i915) == 9) {
1767c349dbc7Sjsg 		wa_bb_fn[0] = gen9_init_indirectctx_bb;
1768c349dbc7Sjsg 		wa_bb_fn[1] = NULL;
1769f005ef32Sjsg 	} else if (GRAPHICS_VER(engine->i915) == 8) {
1770c349dbc7Sjsg 		wa_bb_fn[0] = gen8_init_indirectctx_bb;
1771c349dbc7Sjsg 		wa_bb_fn[1] = NULL;
1772c349dbc7Sjsg 	}
1773c349dbc7Sjsg 
17745ca02815Sjsg 	err = lrc_create_wa_ctx(engine);
17755ca02815Sjsg 	if (err) {
17765ca02815Sjsg 		/*
17775ca02815Sjsg 		 * We continue even if we fail to initialize WA batch
17785ca02815Sjsg 		 * because we only expect rare glitches but nothing
17795ca02815Sjsg 		 * critical to prevent us from using GPU
17805ca02815Sjsg 		 */
17815ca02815Sjsg 		drm_err(&engine->i915->drm,
17825ca02815Sjsg 			"Ignoring context switch w/a allocation error:%d\n",
17835ca02815Sjsg 			err);
17845ca02815Sjsg 		return;
1785c349dbc7Sjsg 	}
1786c349dbc7Sjsg 
17875ca02815Sjsg 	if (!engine->wa_ctx.vma)
17885ca02815Sjsg 		return;
17895ca02815Sjsg 
17905ca02815Sjsg 	i915_gem_ww_ctx_init(&ww, true);
17915ca02815Sjsg retry:
17925ca02815Sjsg 	err = i915_gem_object_lock(wa_ctx->vma->obj, &ww);
17935ca02815Sjsg 	if (!err)
17945ca02815Sjsg 		err = i915_ggtt_pin(wa_ctx->vma, &ww, 0, PIN_HIGH);
17955ca02815Sjsg 	if (err)
17965ca02815Sjsg 		goto err;
17975ca02815Sjsg 
1798ad8b1aafSjsg 	batch = i915_gem_object_pin_map(wa_ctx->vma->obj, I915_MAP_WB);
17995ca02815Sjsg 	if (IS_ERR(batch)) {
18005ca02815Sjsg 		err = PTR_ERR(batch);
18015ca02815Sjsg 		goto err_unpin;
18025ca02815Sjsg 	}
1803c349dbc7Sjsg 
1804c349dbc7Sjsg 	/*
1805c349dbc7Sjsg 	 * Emit the two workaround batch buffers, recording the offset from the
1806c349dbc7Sjsg 	 * start of the workaround batch buffer object for each and their
1807c349dbc7Sjsg 	 * respective sizes.
1808c349dbc7Sjsg 	 */
1809ad8b1aafSjsg 	batch_ptr = batch;
1810c349dbc7Sjsg 	for (i = 0; i < ARRAY_SIZE(wa_bb_fn); i++) {
1811c349dbc7Sjsg 		wa_bb[i]->offset = batch_ptr - batch;
1812c349dbc7Sjsg 		if (GEM_DEBUG_WARN_ON(!IS_ALIGNED(wa_bb[i]->offset,
1813c349dbc7Sjsg 						  CACHELINE_BYTES))) {
18145ca02815Sjsg 			err = -EINVAL;
1815c349dbc7Sjsg 			break;
1816c349dbc7Sjsg 		}
1817c349dbc7Sjsg 		if (wa_bb_fn[i])
1818c349dbc7Sjsg 			batch_ptr = wa_bb_fn[i](engine, batch_ptr);
1819c349dbc7Sjsg 		wa_bb[i]->size = batch_ptr - (batch + wa_bb[i]->offset);
1820c349dbc7Sjsg 	}
18215ca02815Sjsg 	GEM_BUG_ON(batch_ptr - batch > CTX_WA_BB_SIZE);
1822c349dbc7Sjsg 
1823ad8b1aafSjsg 	__i915_gem_object_flush_map(wa_ctx->vma->obj, 0, batch_ptr - batch);
1824ad8b1aafSjsg 	__i915_gem_object_release_map(wa_ctx->vma->obj);
1825c349dbc7Sjsg 
18265ca02815Sjsg 	/* Verify that we can handle failure to setup the wa_ctx */
18275ca02815Sjsg 	if (!err)
18285ca02815Sjsg 		err = i915_inject_probe_error(engine->i915, -ENODEV);
18295ca02815Sjsg 
18305ca02815Sjsg err_unpin:
18315ca02815Sjsg 	if (err)
18325ca02815Sjsg 		i915_vma_unpin(wa_ctx->vma);
18335ca02815Sjsg err:
18345ca02815Sjsg 	if (err == -EDEADLK) {
18355ca02815Sjsg 		err = i915_gem_ww_ctx_backoff(&ww);
18365ca02815Sjsg 		if (!err)
18375ca02815Sjsg 			goto retry;
18385ca02815Sjsg 	}
18395ca02815Sjsg 	i915_gem_ww_ctx_fini(&ww);
18405ca02815Sjsg 
18415ca02815Sjsg 	if (err) {
18425ca02815Sjsg 		i915_vma_put(engine->wa_ctx.vma);
18435ca02815Sjsg 
18445ca02815Sjsg 		/* Clear all flags to prevent further use */
18455ca02815Sjsg 		memset(wa_ctx, 0, sizeof(*wa_ctx));
18465ca02815Sjsg 	}
1847c349dbc7Sjsg }
1848c349dbc7Sjsg 
st_runtime_underflow(struct intel_context_stats * stats,s32 dt)18491bb76ff1Sjsg static void st_runtime_underflow(struct intel_context_stats *stats, s32 dt)
1850ad8b1aafSjsg {
18515ca02815Sjsg #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
18521bb76ff1Sjsg 	stats->runtime.num_underflow++;
18531bb76ff1Sjsg 	stats->runtime.max_underflow =
18541bb76ff1Sjsg 		max_t(u32, stats->runtime.max_underflow, -dt);
1855c349dbc7Sjsg #endif
1856c349dbc7Sjsg }
1857c349dbc7Sjsg 
lrc_get_runtime(const struct intel_context * ce)18581bb76ff1Sjsg static u32 lrc_get_runtime(const struct intel_context *ce)
18591bb76ff1Sjsg {
18601bb76ff1Sjsg 	/*
18611bb76ff1Sjsg 	 * We can use either ppHWSP[16] which is recorded before the context
18621bb76ff1Sjsg 	 * switch (and so excludes the cost of context switches) or use the
18631bb76ff1Sjsg 	 * value from the context image itself, which is saved/restored earlier
18641bb76ff1Sjsg 	 * and so includes the cost of the save.
18651bb76ff1Sjsg 	 */
18661bb76ff1Sjsg 	return READ_ONCE(ce->lrc_reg_state[CTX_TIMESTAMP]);
18671bb76ff1Sjsg }
18681bb76ff1Sjsg 
lrc_update_runtime(struct intel_context * ce)18695ca02815Sjsg void lrc_update_runtime(struct intel_context *ce)
1870c349dbc7Sjsg {
18711bb76ff1Sjsg 	struct intel_context_stats *stats = &ce->stats;
18725ca02815Sjsg 	u32 old;
18735ca02815Sjsg 	s32 dt;
1874c349dbc7Sjsg 
18751bb76ff1Sjsg 	old = stats->runtime.last;
18761bb76ff1Sjsg 	stats->runtime.last = lrc_get_runtime(ce);
18771bb76ff1Sjsg 	dt = stats->runtime.last - old;
18781bb76ff1Sjsg 	if (!dt)
1879c349dbc7Sjsg 		return;
1880c349dbc7Sjsg 
18815ca02815Sjsg 	if (unlikely(dt < 0)) {
18825ca02815Sjsg 		CE_TRACE(ce, "runtime underflow: last=%u, new=%u, delta=%d\n",
18831bb76ff1Sjsg 			 old, stats->runtime.last, dt);
18841bb76ff1Sjsg 		st_runtime_underflow(stats, dt);
18855ca02815Sjsg 		return;
1886c349dbc7Sjsg 	}
1887c349dbc7Sjsg 
18881bb76ff1Sjsg 	ewma_runtime_add(&stats->runtime.avg, dt);
18891bb76ff1Sjsg 	stats->runtime.total += dt;
1890c349dbc7Sjsg }
1891c349dbc7Sjsg 
1892c349dbc7Sjsg #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
1893c349dbc7Sjsg #include "selftest_lrc.c"
1894c349dbc7Sjsg #endif
1895