15ca02815Sjsg // SPDX-License-Identifier: MIT
2c349dbc7Sjsg /*
3c349dbc7Sjsg * Copyright © 2014 Intel Corporation
4c349dbc7Sjsg */
5c349dbc7Sjsg
65ca02815Sjsg #include "gem/i915_gem_lmem.h"
7c349dbc7Sjsg
85ca02815Sjsg #include "gen8_engine_cs.h"
9c349dbc7Sjsg #include "i915_drv.h"
10c349dbc7Sjsg #include "i915_perf.h"
111bb76ff1Sjsg #include "i915_reg.h"
121bb76ff1Sjsg #include "intel_context.h"
135ca02815Sjsg #include "intel_engine.h"
141bb76ff1Sjsg #include "intel_engine_regs.h"
155ca02815Sjsg #include "intel_gpu_commands.h"
16c349dbc7Sjsg #include "intel_gt.h"
171bb76ff1Sjsg #include "intel_gt_regs.h"
185ca02815Sjsg #include "intel_lrc.h"
19c349dbc7Sjsg #include "intel_lrc_reg.h"
20c349dbc7Sjsg #include "intel_ring.h"
21ad8b1aafSjsg #include "shmem_utils.h"
22c349dbc7Sjsg
23f005ef32Sjsg /*
24f005ef32Sjsg * The per-platform tables are u8-encoded in @data. Decode @data and set the
25f005ef32Sjsg * addresses' offset and commands in @regs. The following encoding is used
26f005ef32Sjsg * for each byte. There are 2 steps: decoding commands and decoding addresses.
27f005ef32Sjsg *
28f005ef32Sjsg * Commands:
29f005ef32Sjsg * [7]: create NOPs - number of NOPs are set in lower bits
30f005ef32Sjsg * [6]: When creating MI_LOAD_REGISTER_IMM command, allow to set
31f005ef32Sjsg * MI_LRI_FORCE_POSTED
32f005ef32Sjsg * [5:0]: Number of NOPs or registers to set values to in case of
33f005ef32Sjsg * MI_LOAD_REGISTER_IMM
34f005ef32Sjsg *
35f005ef32Sjsg * Addresses: these are decoded after a MI_LOAD_REGISTER_IMM command by "count"
36f005ef32Sjsg * number of registers. They are set by using the REG/REG16 macros: the former
37f005ef32Sjsg * is used for offsets smaller than 0x200 while the latter is for values bigger
38f005ef32Sjsg * than that. Those macros already set all the bits documented below correctly:
39f005ef32Sjsg *
40f005ef32Sjsg * [7]: When a register offset needs more than 6 bits, use additional bytes, to
41f005ef32Sjsg * follow, for the lower bits
42f005ef32Sjsg * [6:0]: Register offset, without considering the engine base.
43f005ef32Sjsg *
44f005ef32Sjsg * This function only tweaks the commands and register offsets. Values are not
45f005ef32Sjsg * filled out.
46f005ef32Sjsg */
set_offsets(u32 * regs,const u8 * data,const struct intel_engine_cs * engine,bool close)47c349dbc7Sjsg static void set_offsets(u32 *regs,
48c349dbc7Sjsg const u8 *data,
49c349dbc7Sjsg const struct intel_engine_cs *engine,
505ca02815Sjsg bool close)
51c349dbc7Sjsg #define NOP(x) (BIT(7) | (x))
52c349dbc7Sjsg #define LRI(count, flags) ((flags) << 6 | (count) | BUILD_BUG_ON_ZERO(count >= BIT(6)))
53c349dbc7Sjsg #define POSTED BIT(0)
54c349dbc7Sjsg #define REG(x) (((x) >> 2) | BUILD_BUG_ON_ZERO(x >= 0x200))
55c349dbc7Sjsg #define REG16(x) \
56c349dbc7Sjsg (((x) >> 9) | BIT(7) | BUILD_BUG_ON_ZERO(x >= 0x10000)), \
57c349dbc7Sjsg (((x) >> 2) & 0x7f)
585ca02815Sjsg #define END 0
59c349dbc7Sjsg {
60c349dbc7Sjsg const u32 base = engine->mmio_base;
61c349dbc7Sjsg
62c349dbc7Sjsg while (*data) {
63c349dbc7Sjsg u8 count, flags;
64c349dbc7Sjsg
65c349dbc7Sjsg if (*data & BIT(7)) { /* skip */
66c349dbc7Sjsg count = *data++ & ~BIT(7);
67c349dbc7Sjsg regs += count;
68c349dbc7Sjsg continue;
69c349dbc7Sjsg }
70c349dbc7Sjsg
71c349dbc7Sjsg count = *data & 0x3f;
72c349dbc7Sjsg flags = *data >> 6;
73c349dbc7Sjsg data++;
74c349dbc7Sjsg
75c349dbc7Sjsg *regs = MI_LOAD_REGISTER_IMM(count);
76c349dbc7Sjsg if (flags & POSTED)
77c349dbc7Sjsg *regs |= MI_LRI_FORCE_POSTED;
785ca02815Sjsg if (GRAPHICS_VER(engine->i915) >= 11)
79ad8b1aafSjsg *regs |= MI_LRI_LRM_CS_MMIO;
80c349dbc7Sjsg regs++;
81c349dbc7Sjsg
82c349dbc7Sjsg GEM_BUG_ON(!count);
83c349dbc7Sjsg do {
84c349dbc7Sjsg u32 offset = 0;
85c349dbc7Sjsg u8 v;
86c349dbc7Sjsg
87c349dbc7Sjsg do {
88c349dbc7Sjsg v = *data++;
89c349dbc7Sjsg offset <<= 7;
90c349dbc7Sjsg offset |= v & ~BIT(7);
91c349dbc7Sjsg } while (v & BIT(7));
92c349dbc7Sjsg
93c349dbc7Sjsg regs[0] = base + (offset << 2);
94c349dbc7Sjsg regs += 2;
95c349dbc7Sjsg } while (--count);
96c349dbc7Sjsg }
97c349dbc7Sjsg
985ca02815Sjsg if (close) {
99c349dbc7Sjsg /* Close the batch; used mainly by live_lrc_layout() */
100c349dbc7Sjsg *regs = MI_BATCH_BUFFER_END;
1015ca02815Sjsg if (GRAPHICS_VER(engine->i915) >= 11)
102c349dbc7Sjsg *regs |= BIT(0);
103c349dbc7Sjsg }
104c349dbc7Sjsg }
105c349dbc7Sjsg
106c349dbc7Sjsg static const u8 gen8_xcs_offsets[] = {
107c349dbc7Sjsg NOP(1),
108c349dbc7Sjsg LRI(11, 0),
109c349dbc7Sjsg REG16(0x244),
110c349dbc7Sjsg REG(0x034),
111c349dbc7Sjsg REG(0x030),
112c349dbc7Sjsg REG(0x038),
113c349dbc7Sjsg REG(0x03c),
114c349dbc7Sjsg REG(0x168),
115c349dbc7Sjsg REG(0x140),
116c349dbc7Sjsg REG(0x110),
117c349dbc7Sjsg REG(0x11c),
118c349dbc7Sjsg REG(0x114),
119c349dbc7Sjsg REG(0x118),
120c349dbc7Sjsg
121c349dbc7Sjsg NOP(9),
122c349dbc7Sjsg LRI(9, 0),
123c349dbc7Sjsg REG16(0x3a8),
124c349dbc7Sjsg REG16(0x28c),
125c349dbc7Sjsg REG16(0x288),
126c349dbc7Sjsg REG16(0x284),
127c349dbc7Sjsg REG16(0x280),
128c349dbc7Sjsg REG16(0x27c),
129c349dbc7Sjsg REG16(0x278),
130c349dbc7Sjsg REG16(0x274),
131c349dbc7Sjsg REG16(0x270),
132c349dbc7Sjsg
133c349dbc7Sjsg NOP(13),
134c349dbc7Sjsg LRI(2, 0),
135c349dbc7Sjsg REG16(0x200),
136c349dbc7Sjsg REG(0x028),
137c349dbc7Sjsg
1385ca02815Sjsg END
139c349dbc7Sjsg };
140c349dbc7Sjsg
141c349dbc7Sjsg static const u8 gen9_xcs_offsets[] = {
142c349dbc7Sjsg NOP(1),
143c349dbc7Sjsg LRI(14, POSTED),
144c349dbc7Sjsg REG16(0x244),
145c349dbc7Sjsg REG(0x034),
146c349dbc7Sjsg REG(0x030),
147c349dbc7Sjsg REG(0x038),
148c349dbc7Sjsg REG(0x03c),
149c349dbc7Sjsg REG(0x168),
150c349dbc7Sjsg REG(0x140),
151c349dbc7Sjsg REG(0x110),
152c349dbc7Sjsg REG(0x11c),
153c349dbc7Sjsg REG(0x114),
154c349dbc7Sjsg REG(0x118),
155c349dbc7Sjsg REG(0x1c0),
156c349dbc7Sjsg REG(0x1c4),
157c349dbc7Sjsg REG(0x1c8),
158c349dbc7Sjsg
159c349dbc7Sjsg NOP(3),
160c349dbc7Sjsg LRI(9, POSTED),
161c349dbc7Sjsg REG16(0x3a8),
162c349dbc7Sjsg REG16(0x28c),
163c349dbc7Sjsg REG16(0x288),
164c349dbc7Sjsg REG16(0x284),
165c349dbc7Sjsg REG16(0x280),
166c349dbc7Sjsg REG16(0x27c),
167c349dbc7Sjsg REG16(0x278),
168c349dbc7Sjsg REG16(0x274),
169c349dbc7Sjsg REG16(0x270),
170c349dbc7Sjsg
171c349dbc7Sjsg NOP(13),
172c349dbc7Sjsg LRI(1, POSTED),
173c349dbc7Sjsg REG16(0x200),
174c349dbc7Sjsg
175c349dbc7Sjsg NOP(13),
176c349dbc7Sjsg LRI(44, POSTED),
177c349dbc7Sjsg REG(0x028),
178c349dbc7Sjsg REG(0x09c),
179c349dbc7Sjsg REG(0x0c0),
180c349dbc7Sjsg REG(0x178),
181c349dbc7Sjsg REG(0x17c),
182c349dbc7Sjsg REG16(0x358),
183c349dbc7Sjsg REG(0x170),
184c349dbc7Sjsg REG(0x150),
185c349dbc7Sjsg REG(0x154),
186c349dbc7Sjsg REG(0x158),
187c349dbc7Sjsg REG16(0x41c),
188c349dbc7Sjsg REG16(0x600),
189c349dbc7Sjsg REG16(0x604),
190c349dbc7Sjsg REG16(0x608),
191c349dbc7Sjsg REG16(0x60c),
192c349dbc7Sjsg REG16(0x610),
193c349dbc7Sjsg REG16(0x614),
194c349dbc7Sjsg REG16(0x618),
195c349dbc7Sjsg REG16(0x61c),
196c349dbc7Sjsg REG16(0x620),
197c349dbc7Sjsg REG16(0x624),
198c349dbc7Sjsg REG16(0x628),
199c349dbc7Sjsg REG16(0x62c),
200c349dbc7Sjsg REG16(0x630),
201c349dbc7Sjsg REG16(0x634),
202c349dbc7Sjsg REG16(0x638),
203c349dbc7Sjsg REG16(0x63c),
204c349dbc7Sjsg REG16(0x640),
205c349dbc7Sjsg REG16(0x644),
206c349dbc7Sjsg REG16(0x648),
207c349dbc7Sjsg REG16(0x64c),
208c349dbc7Sjsg REG16(0x650),
209c349dbc7Sjsg REG16(0x654),
210c349dbc7Sjsg REG16(0x658),
211c349dbc7Sjsg REG16(0x65c),
212c349dbc7Sjsg REG16(0x660),
213c349dbc7Sjsg REG16(0x664),
214c349dbc7Sjsg REG16(0x668),
215c349dbc7Sjsg REG16(0x66c),
216c349dbc7Sjsg REG16(0x670),
217c349dbc7Sjsg REG16(0x674),
218c349dbc7Sjsg REG16(0x678),
219c349dbc7Sjsg REG16(0x67c),
220c349dbc7Sjsg REG(0x068),
221c349dbc7Sjsg
2225ca02815Sjsg END
223c349dbc7Sjsg };
224c349dbc7Sjsg
225c349dbc7Sjsg static const u8 gen12_xcs_offsets[] = {
226c349dbc7Sjsg NOP(1),
227c349dbc7Sjsg LRI(13, POSTED),
228c349dbc7Sjsg REG16(0x244),
229c349dbc7Sjsg REG(0x034),
230c349dbc7Sjsg REG(0x030),
231c349dbc7Sjsg REG(0x038),
232c349dbc7Sjsg REG(0x03c),
233c349dbc7Sjsg REG(0x168),
234c349dbc7Sjsg REG(0x140),
235c349dbc7Sjsg REG(0x110),
236c349dbc7Sjsg REG(0x1c0),
237c349dbc7Sjsg REG(0x1c4),
238c349dbc7Sjsg REG(0x1c8),
239c349dbc7Sjsg REG(0x180),
240c349dbc7Sjsg REG16(0x2b4),
241c349dbc7Sjsg
242c349dbc7Sjsg NOP(5),
243c349dbc7Sjsg LRI(9, POSTED),
244c349dbc7Sjsg REG16(0x3a8),
245c349dbc7Sjsg REG16(0x28c),
246c349dbc7Sjsg REG16(0x288),
247c349dbc7Sjsg REG16(0x284),
248c349dbc7Sjsg REG16(0x280),
249c349dbc7Sjsg REG16(0x27c),
250c349dbc7Sjsg REG16(0x278),
251c349dbc7Sjsg REG16(0x274),
252c349dbc7Sjsg REG16(0x270),
253c349dbc7Sjsg
2545ca02815Sjsg END
255c349dbc7Sjsg };
256c349dbc7Sjsg
2571bb76ff1Sjsg static const u8 dg2_xcs_offsets[] = {
2581bb76ff1Sjsg NOP(1),
2591bb76ff1Sjsg LRI(15, POSTED),
2601bb76ff1Sjsg REG16(0x244),
2611bb76ff1Sjsg REG(0x034),
2621bb76ff1Sjsg REG(0x030),
2631bb76ff1Sjsg REG(0x038),
2641bb76ff1Sjsg REG(0x03c),
2651bb76ff1Sjsg REG(0x168),
2661bb76ff1Sjsg REG(0x140),
2671bb76ff1Sjsg REG(0x110),
2681bb76ff1Sjsg REG(0x1c0),
2691bb76ff1Sjsg REG(0x1c4),
2701bb76ff1Sjsg REG(0x1c8),
2711bb76ff1Sjsg REG(0x180),
2721bb76ff1Sjsg REG16(0x2b4),
2731bb76ff1Sjsg REG(0x120),
2741bb76ff1Sjsg REG(0x124),
2751bb76ff1Sjsg
2761bb76ff1Sjsg NOP(1),
2771bb76ff1Sjsg LRI(9, POSTED),
2781bb76ff1Sjsg REG16(0x3a8),
2791bb76ff1Sjsg REG16(0x28c),
2801bb76ff1Sjsg REG16(0x288),
2811bb76ff1Sjsg REG16(0x284),
2821bb76ff1Sjsg REG16(0x280),
2831bb76ff1Sjsg REG16(0x27c),
2841bb76ff1Sjsg REG16(0x278),
2851bb76ff1Sjsg REG16(0x274),
2861bb76ff1Sjsg REG16(0x270),
2871bb76ff1Sjsg
2881bb76ff1Sjsg END
2891bb76ff1Sjsg };
2901bb76ff1Sjsg
291c349dbc7Sjsg static const u8 gen8_rcs_offsets[] = {
292c349dbc7Sjsg NOP(1),
293c349dbc7Sjsg LRI(14, POSTED),
294c349dbc7Sjsg REG16(0x244),
295c349dbc7Sjsg REG(0x034),
296c349dbc7Sjsg REG(0x030),
297c349dbc7Sjsg REG(0x038),
298c349dbc7Sjsg REG(0x03c),
299c349dbc7Sjsg REG(0x168),
300c349dbc7Sjsg REG(0x140),
301c349dbc7Sjsg REG(0x110),
302c349dbc7Sjsg REG(0x11c),
303c349dbc7Sjsg REG(0x114),
304c349dbc7Sjsg REG(0x118),
305c349dbc7Sjsg REG(0x1c0),
306c349dbc7Sjsg REG(0x1c4),
307c349dbc7Sjsg REG(0x1c8),
308c349dbc7Sjsg
309c349dbc7Sjsg NOP(3),
310c349dbc7Sjsg LRI(9, POSTED),
311c349dbc7Sjsg REG16(0x3a8),
312c349dbc7Sjsg REG16(0x28c),
313c349dbc7Sjsg REG16(0x288),
314c349dbc7Sjsg REG16(0x284),
315c349dbc7Sjsg REG16(0x280),
316c349dbc7Sjsg REG16(0x27c),
317c349dbc7Sjsg REG16(0x278),
318c349dbc7Sjsg REG16(0x274),
319c349dbc7Sjsg REG16(0x270),
320c349dbc7Sjsg
321c349dbc7Sjsg NOP(13),
322c349dbc7Sjsg LRI(1, 0),
323c349dbc7Sjsg REG(0x0c8),
324c349dbc7Sjsg
3255ca02815Sjsg END
326c349dbc7Sjsg };
327c349dbc7Sjsg
328c349dbc7Sjsg static const u8 gen9_rcs_offsets[] = {
329c349dbc7Sjsg NOP(1),
330c349dbc7Sjsg LRI(14, POSTED),
331c349dbc7Sjsg REG16(0x244),
332c349dbc7Sjsg REG(0x34),
333c349dbc7Sjsg REG(0x30),
334c349dbc7Sjsg REG(0x38),
335c349dbc7Sjsg REG(0x3c),
336c349dbc7Sjsg REG(0x168),
337c349dbc7Sjsg REG(0x140),
338c349dbc7Sjsg REG(0x110),
339c349dbc7Sjsg REG(0x11c),
340c349dbc7Sjsg REG(0x114),
341c349dbc7Sjsg REG(0x118),
342c349dbc7Sjsg REG(0x1c0),
343c349dbc7Sjsg REG(0x1c4),
344c349dbc7Sjsg REG(0x1c8),
345c349dbc7Sjsg
346c349dbc7Sjsg NOP(3),
347c349dbc7Sjsg LRI(9, POSTED),
348c349dbc7Sjsg REG16(0x3a8),
349c349dbc7Sjsg REG16(0x28c),
350c349dbc7Sjsg REG16(0x288),
351c349dbc7Sjsg REG16(0x284),
352c349dbc7Sjsg REG16(0x280),
353c349dbc7Sjsg REG16(0x27c),
354c349dbc7Sjsg REG16(0x278),
355c349dbc7Sjsg REG16(0x274),
356c349dbc7Sjsg REG16(0x270),
357c349dbc7Sjsg
358c349dbc7Sjsg NOP(13),
359c349dbc7Sjsg LRI(1, 0),
360c349dbc7Sjsg REG(0xc8),
361c349dbc7Sjsg
362c349dbc7Sjsg NOP(13),
363c349dbc7Sjsg LRI(44, POSTED),
364c349dbc7Sjsg REG(0x28),
365c349dbc7Sjsg REG(0x9c),
366c349dbc7Sjsg REG(0xc0),
367c349dbc7Sjsg REG(0x178),
368c349dbc7Sjsg REG(0x17c),
369c349dbc7Sjsg REG16(0x358),
370c349dbc7Sjsg REG(0x170),
371c349dbc7Sjsg REG(0x150),
372c349dbc7Sjsg REG(0x154),
373c349dbc7Sjsg REG(0x158),
374c349dbc7Sjsg REG16(0x41c),
375c349dbc7Sjsg REG16(0x600),
376c349dbc7Sjsg REG16(0x604),
377c349dbc7Sjsg REG16(0x608),
378c349dbc7Sjsg REG16(0x60c),
379c349dbc7Sjsg REG16(0x610),
380c349dbc7Sjsg REG16(0x614),
381c349dbc7Sjsg REG16(0x618),
382c349dbc7Sjsg REG16(0x61c),
383c349dbc7Sjsg REG16(0x620),
384c349dbc7Sjsg REG16(0x624),
385c349dbc7Sjsg REG16(0x628),
386c349dbc7Sjsg REG16(0x62c),
387c349dbc7Sjsg REG16(0x630),
388c349dbc7Sjsg REG16(0x634),
389c349dbc7Sjsg REG16(0x638),
390c349dbc7Sjsg REG16(0x63c),
391c349dbc7Sjsg REG16(0x640),
392c349dbc7Sjsg REG16(0x644),
393c349dbc7Sjsg REG16(0x648),
394c349dbc7Sjsg REG16(0x64c),
395c349dbc7Sjsg REG16(0x650),
396c349dbc7Sjsg REG16(0x654),
397c349dbc7Sjsg REG16(0x658),
398c349dbc7Sjsg REG16(0x65c),
399c349dbc7Sjsg REG16(0x660),
400c349dbc7Sjsg REG16(0x664),
401c349dbc7Sjsg REG16(0x668),
402c349dbc7Sjsg REG16(0x66c),
403c349dbc7Sjsg REG16(0x670),
404c349dbc7Sjsg REG16(0x674),
405c349dbc7Sjsg REG16(0x678),
406c349dbc7Sjsg REG16(0x67c),
407c349dbc7Sjsg REG(0x68),
408c349dbc7Sjsg
4095ca02815Sjsg END
410c349dbc7Sjsg };
411c349dbc7Sjsg
412c349dbc7Sjsg static const u8 gen11_rcs_offsets[] = {
413c349dbc7Sjsg NOP(1),
414c349dbc7Sjsg LRI(15, POSTED),
415c349dbc7Sjsg REG16(0x244),
416c349dbc7Sjsg REG(0x034),
417c349dbc7Sjsg REG(0x030),
418c349dbc7Sjsg REG(0x038),
419c349dbc7Sjsg REG(0x03c),
420c349dbc7Sjsg REG(0x168),
421c349dbc7Sjsg REG(0x140),
422c349dbc7Sjsg REG(0x110),
423c349dbc7Sjsg REG(0x11c),
424c349dbc7Sjsg REG(0x114),
425c349dbc7Sjsg REG(0x118),
426c349dbc7Sjsg REG(0x1c0),
427c349dbc7Sjsg REG(0x1c4),
428c349dbc7Sjsg REG(0x1c8),
429c349dbc7Sjsg REG(0x180),
430c349dbc7Sjsg
431c349dbc7Sjsg NOP(1),
432c349dbc7Sjsg LRI(9, POSTED),
433c349dbc7Sjsg REG16(0x3a8),
434c349dbc7Sjsg REG16(0x28c),
435c349dbc7Sjsg REG16(0x288),
436c349dbc7Sjsg REG16(0x284),
437c349dbc7Sjsg REG16(0x280),
438c349dbc7Sjsg REG16(0x27c),
439c349dbc7Sjsg REG16(0x278),
440c349dbc7Sjsg REG16(0x274),
441c349dbc7Sjsg REG16(0x270),
442c349dbc7Sjsg
443c349dbc7Sjsg LRI(1, POSTED),
444c349dbc7Sjsg REG(0x1b0),
445c349dbc7Sjsg
446c349dbc7Sjsg NOP(10),
447c349dbc7Sjsg LRI(1, 0),
448c349dbc7Sjsg REG(0x0c8),
449c349dbc7Sjsg
4505ca02815Sjsg END
451c349dbc7Sjsg };
452c349dbc7Sjsg
453c349dbc7Sjsg static const u8 gen12_rcs_offsets[] = {
454c349dbc7Sjsg NOP(1),
455c349dbc7Sjsg LRI(13, POSTED),
456c349dbc7Sjsg REG16(0x244),
457c349dbc7Sjsg REG(0x034),
458c349dbc7Sjsg REG(0x030),
459c349dbc7Sjsg REG(0x038),
460c349dbc7Sjsg REG(0x03c),
461c349dbc7Sjsg REG(0x168),
462c349dbc7Sjsg REG(0x140),
463c349dbc7Sjsg REG(0x110),
464c349dbc7Sjsg REG(0x1c0),
465c349dbc7Sjsg REG(0x1c4),
466c349dbc7Sjsg REG(0x1c8),
467c349dbc7Sjsg REG(0x180),
468c349dbc7Sjsg REG16(0x2b4),
469c349dbc7Sjsg
470c349dbc7Sjsg NOP(5),
471c349dbc7Sjsg LRI(9, POSTED),
472c349dbc7Sjsg REG16(0x3a8),
473c349dbc7Sjsg REG16(0x28c),
474c349dbc7Sjsg REG16(0x288),
475c349dbc7Sjsg REG16(0x284),
476c349dbc7Sjsg REG16(0x280),
477c349dbc7Sjsg REG16(0x27c),
478c349dbc7Sjsg REG16(0x278),
479c349dbc7Sjsg REG16(0x274),
480c349dbc7Sjsg REG16(0x270),
481c349dbc7Sjsg
482c349dbc7Sjsg LRI(3, POSTED),
483c349dbc7Sjsg REG(0x1b0),
484c349dbc7Sjsg REG16(0x5a8),
485c349dbc7Sjsg REG16(0x5ac),
486c349dbc7Sjsg
487c349dbc7Sjsg NOP(6),
488c349dbc7Sjsg LRI(1, 0),
489c349dbc7Sjsg REG(0x0c8),
490ad8b1aafSjsg NOP(3 + 9 + 1),
491c349dbc7Sjsg
492ad8b1aafSjsg LRI(51, POSTED),
493ad8b1aafSjsg REG16(0x588),
494ad8b1aafSjsg REG16(0x588),
495ad8b1aafSjsg REG16(0x588),
496ad8b1aafSjsg REG16(0x588),
497ad8b1aafSjsg REG16(0x588),
498ad8b1aafSjsg REG16(0x588),
499ad8b1aafSjsg REG(0x028),
500ad8b1aafSjsg REG(0x09c),
501ad8b1aafSjsg REG(0x0c0),
502ad8b1aafSjsg REG(0x178),
503ad8b1aafSjsg REG(0x17c),
504ad8b1aafSjsg REG16(0x358),
505ad8b1aafSjsg REG(0x170),
506ad8b1aafSjsg REG(0x150),
507ad8b1aafSjsg REG(0x154),
508ad8b1aafSjsg REG(0x158),
509ad8b1aafSjsg REG16(0x41c),
510ad8b1aafSjsg REG16(0x600),
511ad8b1aafSjsg REG16(0x604),
512ad8b1aafSjsg REG16(0x608),
513ad8b1aafSjsg REG16(0x60c),
514ad8b1aafSjsg REG16(0x610),
515ad8b1aafSjsg REG16(0x614),
516ad8b1aafSjsg REG16(0x618),
517ad8b1aafSjsg REG16(0x61c),
518ad8b1aafSjsg REG16(0x620),
519ad8b1aafSjsg REG16(0x624),
520ad8b1aafSjsg REG16(0x628),
521ad8b1aafSjsg REG16(0x62c),
522ad8b1aafSjsg REG16(0x630),
523ad8b1aafSjsg REG16(0x634),
524ad8b1aafSjsg REG16(0x638),
525ad8b1aafSjsg REG16(0x63c),
526ad8b1aafSjsg REG16(0x640),
527ad8b1aafSjsg REG16(0x644),
528ad8b1aafSjsg REG16(0x648),
529ad8b1aafSjsg REG16(0x64c),
530ad8b1aafSjsg REG16(0x650),
531ad8b1aafSjsg REG16(0x654),
532ad8b1aafSjsg REG16(0x658),
533ad8b1aafSjsg REG16(0x65c),
534ad8b1aafSjsg REG16(0x660),
535ad8b1aafSjsg REG16(0x664),
536ad8b1aafSjsg REG16(0x668),
537ad8b1aafSjsg REG16(0x66c),
538ad8b1aafSjsg REG16(0x670),
539ad8b1aafSjsg REG16(0x674),
540ad8b1aafSjsg REG16(0x678),
541ad8b1aafSjsg REG16(0x67c),
542ad8b1aafSjsg REG(0x068),
543ad8b1aafSjsg REG(0x084),
544ad8b1aafSjsg NOP(1),
545ad8b1aafSjsg
5465ca02815Sjsg END
5475ca02815Sjsg };
5485ca02815Sjsg
5495ca02815Sjsg static const u8 xehp_rcs_offsets[] = {
5505ca02815Sjsg NOP(1),
5515ca02815Sjsg LRI(13, POSTED),
5525ca02815Sjsg REG16(0x244),
5535ca02815Sjsg REG(0x034),
5545ca02815Sjsg REG(0x030),
5555ca02815Sjsg REG(0x038),
5565ca02815Sjsg REG(0x03c),
5575ca02815Sjsg REG(0x168),
5585ca02815Sjsg REG(0x140),
5595ca02815Sjsg REG(0x110),
5605ca02815Sjsg REG(0x1c0),
5615ca02815Sjsg REG(0x1c4),
5625ca02815Sjsg REG(0x1c8),
5635ca02815Sjsg REG(0x180),
5645ca02815Sjsg REG16(0x2b4),
5655ca02815Sjsg
5665ca02815Sjsg NOP(5),
5675ca02815Sjsg LRI(9, POSTED),
5685ca02815Sjsg REG16(0x3a8),
5695ca02815Sjsg REG16(0x28c),
5705ca02815Sjsg REG16(0x288),
5715ca02815Sjsg REG16(0x284),
5725ca02815Sjsg REG16(0x280),
5735ca02815Sjsg REG16(0x27c),
5745ca02815Sjsg REG16(0x278),
5755ca02815Sjsg REG16(0x274),
5765ca02815Sjsg REG16(0x270),
5775ca02815Sjsg
5785ca02815Sjsg LRI(3, POSTED),
5795ca02815Sjsg REG(0x1b0),
5805ca02815Sjsg REG16(0x5a8),
5815ca02815Sjsg REG16(0x5ac),
5825ca02815Sjsg
5835ca02815Sjsg NOP(6),
5845ca02815Sjsg LRI(1, 0),
5855ca02815Sjsg REG(0x0c8),
5865ca02815Sjsg
5875ca02815Sjsg END
588c349dbc7Sjsg };
589c349dbc7Sjsg
5901bb76ff1Sjsg static const u8 dg2_rcs_offsets[] = {
5911bb76ff1Sjsg NOP(1),
5921bb76ff1Sjsg LRI(15, POSTED),
5931bb76ff1Sjsg REG16(0x244),
5941bb76ff1Sjsg REG(0x034),
5951bb76ff1Sjsg REG(0x030),
5961bb76ff1Sjsg REG(0x038),
5971bb76ff1Sjsg REG(0x03c),
5981bb76ff1Sjsg REG(0x168),
5991bb76ff1Sjsg REG(0x140),
6001bb76ff1Sjsg REG(0x110),
6011bb76ff1Sjsg REG(0x1c0),
6021bb76ff1Sjsg REG(0x1c4),
6031bb76ff1Sjsg REG(0x1c8),
6041bb76ff1Sjsg REG(0x180),
6051bb76ff1Sjsg REG16(0x2b4),
6061bb76ff1Sjsg REG(0x120),
6071bb76ff1Sjsg REG(0x124),
6081bb76ff1Sjsg
6091bb76ff1Sjsg NOP(1),
6101bb76ff1Sjsg LRI(9, POSTED),
6111bb76ff1Sjsg REG16(0x3a8),
6121bb76ff1Sjsg REG16(0x28c),
6131bb76ff1Sjsg REG16(0x288),
6141bb76ff1Sjsg REG16(0x284),
6151bb76ff1Sjsg REG16(0x280),
6161bb76ff1Sjsg REG16(0x27c),
6171bb76ff1Sjsg REG16(0x278),
6181bb76ff1Sjsg REG16(0x274),
6191bb76ff1Sjsg REG16(0x270),
6201bb76ff1Sjsg
6211bb76ff1Sjsg LRI(3, POSTED),
6221bb76ff1Sjsg REG(0x1b0),
6231bb76ff1Sjsg REG16(0x5a8),
6241bb76ff1Sjsg REG16(0x5ac),
6251bb76ff1Sjsg
6261bb76ff1Sjsg NOP(6),
6271bb76ff1Sjsg LRI(1, 0),
6281bb76ff1Sjsg REG(0x0c8),
6291bb76ff1Sjsg
6301bb76ff1Sjsg END
6311bb76ff1Sjsg };
6321bb76ff1Sjsg
633f005ef32Sjsg static const u8 mtl_rcs_offsets[] = {
634f005ef32Sjsg NOP(1),
635f005ef32Sjsg LRI(15, POSTED),
636f005ef32Sjsg REG16(0x244),
637f005ef32Sjsg REG(0x034),
638f005ef32Sjsg REG(0x030),
639f005ef32Sjsg REG(0x038),
640f005ef32Sjsg REG(0x03c),
641f005ef32Sjsg REG(0x168),
642f005ef32Sjsg REG(0x140),
643f005ef32Sjsg REG(0x110),
644f005ef32Sjsg REG(0x1c0),
645f005ef32Sjsg REG(0x1c4),
646f005ef32Sjsg REG(0x1c8),
647f005ef32Sjsg REG(0x180),
648f005ef32Sjsg REG16(0x2b4),
649f005ef32Sjsg REG(0x120),
650f005ef32Sjsg REG(0x124),
651f005ef32Sjsg
652f005ef32Sjsg NOP(1),
653f005ef32Sjsg LRI(9, POSTED),
654f005ef32Sjsg REG16(0x3a8),
655f005ef32Sjsg REG16(0x28c),
656f005ef32Sjsg REG16(0x288),
657f005ef32Sjsg REG16(0x284),
658f005ef32Sjsg REG16(0x280),
659f005ef32Sjsg REG16(0x27c),
660f005ef32Sjsg REG16(0x278),
661f005ef32Sjsg REG16(0x274),
662f005ef32Sjsg REG16(0x270),
663f005ef32Sjsg
664f005ef32Sjsg NOP(2),
665f005ef32Sjsg LRI(2, POSTED),
666f005ef32Sjsg REG16(0x5a8),
667f005ef32Sjsg REG16(0x5ac),
668f005ef32Sjsg
669f005ef32Sjsg NOP(6),
670f005ef32Sjsg LRI(1, 0),
671f005ef32Sjsg REG(0x0c8),
672f005ef32Sjsg
673f005ef32Sjsg END
674f005ef32Sjsg };
675f005ef32Sjsg
676c349dbc7Sjsg #undef END
677c349dbc7Sjsg #undef REG16
678c349dbc7Sjsg #undef REG
679c349dbc7Sjsg #undef LRI
680c349dbc7Sjsg #undef NOP
681c349dbc7Sjsg
reg_offsets(const struct intel_engine_cs * engine)682c349dbc7Sjsg static const u8 *reg_offsets(const struct intel_engine_cs *engine)
683c349dbc7Sjsg {
684c349dbc7Sjsg /*
685c349dbc7Sjsg * The gen12+ lists only have the registers we program in the basic
686c349dbc7Sjsg * default state. We rely on the context image using relative
687c349dbc7Sjsg * addressing to automatic fixup the register state between the
688c349dbc7Sjsg * physical engines for virtual engine.
689c349dbc7Sjsg */
6905ca02815Sjsg GEM_BUG_ON(GRAPHICS_VER(engine->i915) >= 12 &&
691c349dbc7Sjsg !intel_engine_has_relative_mmio(engine));
692c349dbc7Sjsg
6931bb76ff1Sjsg if (engine->flags & I915_ENGINE_HAS_RCS_REG_STATE) {
694f005ef32Sjsg if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 70))
695f005ef32Sjsg return mtl_rcs_offsets;
696f005ef32Sjsg else if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55))
6971bb76ff1Sjsg return dg2_rcs_offsets;
6981bb76ff1Sjsg else if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50))
6995ca02815Sjsg return xehp_rcs_offsets;
7005ca02815Sjsg else if (GRAPHICS_VER(engine->i915) >= 12)
701c349dbc7Sjsg return gen12_rcs_offsets;
7025ca02815Sjsg else if (GRAPHICS_VER(engine->i915) >= 11)
703c349dbc7Sjsg return gen11_rcs_offsets;
7045ca02815Sjsg else if (GRAPHICS_VER(engine->i915) >= 9)
705c349dbc7Sjsg return gen9_rcs_offsets;
706c349dbc7Sjsg else
707c349dbc7Sjsg return gen8_rcs_offsets;
708c349dbc7Sjsg } else {
7091bb76ff1Sjsg if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55))
7101bb76ff1Sjsg return dg2_xcs_offsets;
7111bb76ff1Sjsg else if (GRAPHICS_VER(engine->i915) >= 12)
712c349dbc7Sjsg return gen12_xcs_offsets;
7135ca02815Sjsg else if (GRAPHICS_VER(engine->i915) >= 9)
714c349dbc7Sjsg return gen9_xcs_offsets;
715c349dbc7Sjsg else
716c349dbc7Sjsg return gen8_xcs_offsets;
717c349dbc7Sjsg }
718c349dbc7Sjsg }
719c349dbc7Sjsg
lrc_ring_mi_mode(const struct intel_engine_cs * engine)7205ca02815Sjsg static int lrc_ring_mi_mode(const struct intel_engine_cs *engine)
721c349dbc7Sjsg {
7225ca02815Sjsg if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50))
7235ca02815Sjsg return 0x70;
7245ca02815Sjsg else if (GRAPHICS_VER(engine->i915) >= 12)
7255ca02815Sjsg return 0x60;
7265ca02815Sjsg else if (GRAPHICS_VER(engine->i915) >= 9)
7275ca02815Sjsg return 0x54;
7285ca02815Sjsg else if (engine->class == RENDER_CLASS)
7295ca02815Sjsg return 0x58;
7305ca02815Sjsg else
7315ca02815Sjsg return -1;
732c349dbc7Sjsg }
733c349dbc7Sjsg
lrc_ring_bb_offset(const struct intel_engine_cs * engine)7341bb76ff1Sjsg static int lrc_ring_bb_offset(const struct intel_engine_cs *engine)
7351bb76ff1Sjsg {
7361bb76ff1Sjsg if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50))
7371bb76ff1Sjsg return 0x80;
7381bb76ff1Sjsg else if (GRAPHICS_VER(engine->i915) >= 12)
7391bb76ff1Sjsg return 0x70;
7401bb76ff1Sjsg else if (GRAPHICS_VER(engine->i915) >= 9)
7411bb76ff1Sjsg return 0x64;
7421bb76ff1Sjsg else if (GRAPHICS_VER(engine->i915) >= 8 &&
7431bb76ff1Sjsg engine->class == RENDER_CLASS)
7441bb76ff1Sjsg return 0xc4;
7451bb76ff1Sjsg else
7461bb76ff1Sjsg return -1;
7471bb76ff1Sjsg }
7481bb76ff1Sjsg
lrc_ring_gpr0(const struct intel_engine_cs * engine)7495ca02815Sjsg static int lrc_ring_gpr0(const struct intel_engine_cs *engine)
750c349dbc7Sjsg {
7515ca02815Sjsg if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50))
7525ca02815Sjsg return 0x84;
7535ca02815Sjsg else if (GRAPHICS_VER(engine->i915) >= 12)
7545ca02815Sjsg return 0x74;
7555ca02815Sjsg else if (GRAPHICS_VER(engine->i915) >= 9)
7565ca02815Sjsg return 0x68;
7575ca02815Sjsg else if (engine->class == RENDER_CLASS)
7585ca02815Sjsg return 0xd8;
7595ca02815Sjsg else
7605ca02815Sjsg return -1;
761c349dbc7Sjsg }
762c349dbc7Sjsg
lrc_ring_wa_bb_per_ctx(const struct intel_engine_cs * engine)7635ca02815Sjsg static int lrc_ring_wa_bb_per_ctx(const struct intel_engine_cs *engine)
764c349dbc7Sjsg {
7655ca02815Sjsg if (GRAPHICS_VER(engine->i915) >= 12)
7665ca02815Sjsg return 0x12;
7675ca02815Sjsg else if (GRAPHICS_VER(engine->i915) >= 9 || engine->class == RENDER_CLASS)
7685ca02815Sjsg return 0x18;
7695ca02815Sjsg else
7705ca02815Sjsg return -1;
771c349dbc7Sjsg }
772c349dbc7Sjsg
lrc_ring_indirect_ptr(const struct intel_engine_cs * engine)7735ca02815Sjsg static int lrc_ring_indirect_ptr(const struct intel_engine_cs *engine)
774c349dbc7Sjsg {
775c349dbc7Sjsg int x;
776c349dbc7Sjsg
7775ca02815Sjsg x = lrc_ring_wa_bb_per_ctx(engine);
7785ca02815Sjsg if (x < 0)
7795ca02815Sjsg return x;
7805ca02815Sjsg
7815ca02815Sjsg return x + 2;
782c349dbc7Sjsg }
783c349dbc7Sjsg
lrc_ring_indirect_offset(const struct intel_engine_cs * engine)7845ca02815Sjsg static int lrc_ring_indirect_offset(const struct intel_engine_cs *engine)
785c349dbc7Sjsg {
7865ca02815Sjsg int x;
787c349dbc7Sjsg
7885ca02815Sjsg x = lrc_ring_indirect_ptr(engine);
7895ca02815Sjsg if (x < 0)
7905ca02815Sjsg return x;
791c349dbc7Sjsg
7925ca02815Sjsg return x + 2;
793c349dbc7Sjsg }
794c349dbc7Sjsg
lrc_ring_cmd_buf_cctl(const struct intel_engine_cs * engine)7955ca02815Sjsg static int lrc_ring_cmd_buf_cctl(const struct intel_engine_cs *engine)
796c349dbc7Sjsg {
797c349dbc7Sjsg
7985ca02815Sjsg if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50))
799c349dbc7Sjsg /*
8005ca02815Sjsg * Note that the CSFE context has a dummy slot for CMD_BUF_CCTL
8015ca02815Sjsg * simply to match the RCS context image layout.
802c349dbc7Sjsg */
8035ca02815Sjsg return 0xc6;
8045ca02815Sjsg else if (engine->class != RENDER_CLASS)
8055ca02815Sjsg return -1;
8065ca02815Sjsg else if (GRAPHICS_VER(engine->i915) >= 12)
8075ca02815Sjsg return 0xb6;
8085ca02815Sjsg else if (GRAPHICS_VER(engine->i915) >= 11)
8095ca02815Sjsg return 0xaa;
810c349dbc7Sjsg else
8115ca02815Sjsg return -1;
812c349dbc7Sjsg }
813c349dbc7Sjsg
8145ca02815Sjsg static u32
lrc_ring_indirect_offset_default(const struct intel_engine_cs * engine)8155ca02815Sjsg lrc_ring_indirect_offset_default(const struct intel_engine_cs *engine)
816c349dbc7Sjsg {
817f005ef32Sjsg if (GRAPHICS_VER(engine->i915) >= 12)
8185ca02815Sjsg return GEN12_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
819f005ef32Sjsg else if (GRAPHICS_VER(engine->i915) >= 11)
8205ca02815Sjsg return GEN11_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
821f005ef32Sjsg else if (GRAPHICS_VER(engine->i915) >= 9)
8225ca02815Sjsg return GEN9_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
823f005ef32Sjsg else if (GRAPHICS_VER(engine->i915) >= 8)
8245ca02815Sjsg return GEN8_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
825f005ef32Sjsg
826f005ef32Sjsg GEM_BUG_ON(GRAPHICS_VER(engine->i915) < 8);
827f005ef32Sjsg
828f005ef32Sjsg return 0;
829c349dbc7Sjsg }
830c349dbc7Sjsg
831c349dbc7Sjsg static void
lrc_setup_indirect_ctx(u32 * regs,const struct intel_engine_cs * engine,u32 ctx_bb_ggtt_addr,u32 size)8325ca02815Sjsg lrc_setup_indirect_ctx(u32 *regs,
8335ca02815Sjsg const struct intel_engine_cs *engine,
8345ca02815Sjsg u32 ctx_bb_ggtt_addr,
8355ca02815Sjsg u32 size)
836c349dbc7Sjsg {
8375ca02815Sjsg GEM_BUG_ON(!size);
8385ca02815Sjsg GEM_BUG_ON(!IS_ALIGNED(size, CACHELINE_BYTES));
8395ca02815Sjsg GEM_BUG_ON(lrc_ring_indirect_ptr(engine) == -1);
8405ca02815Sjsg regs[lrc_ring_indirect_ptr(engine) + 1] =
8415ca02815Sjsg ctx_bb_ggtt_addr | (size / CACHELINE_BYTES);
842c349dbc7Sjsg
8435ca02815Sjsg GEM_BUG_ON(lrc_ring_indirect_offset(engine) == -1);
8445ca02815Sjsg regs[lrc_ring_indirect_offset(engine) + 1] =
8455ca02815Sjsg lrc_ring_indirect_offset_default(engine) << 6;
846c349dbc7Sjsg }
847c349dbc7Sjsg
init_common_regs(u32 * const regs,const struct intel_context * ce,const struct intel_engine_cs * engine,bool inhibit)8485ca02815Sjsg static void init_common_regs(u32 * const regs,
8495ca02815Sjsg const struct intel_context *ce,
8505ca02815Sjsg const struct intel_engine_cs *engine,
8515ca02815Sjsg bool inhibit)
852c349dbc7Sjsg {
8535ca02815Sjsg u32 ctl;
8541bb76ff1Sjsg int loc;
8555ca02815Sjsg
8565ca02815Sjsg ctl = _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH);
8575ca02815Sjsg ctl |= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT);
8585ca02815Sjsg if (inhibit)
8595ca02815Sjsg ctl |= CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT;
8605ca02815Sjsg if (GRAPHICS_VER(engine->i915) < 11)
8615ca02815Sjsg ctl |= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT |
8625ca02815Sjsg CTX_CTRL_RS_CTX_ENABLE);
8635ca02815Sjsg regs[CTX_CONTEXT_CONTROL] = ctl;
8645ca02815Sjsg
8651bb76ff1Sjsg regs[CTX_TIMESTAMP] = ce->stats.runtime.last;
8661bb76ff1Sjsg
8671bb76ff1Sjsg loc = lrc_ring_bb_offset(engine);
8681bb76ff1Sjsg if (loc != -1)
8691bb76ff1Sjsg regs[loc + 1] = 0;
870c349dbc7Sjsg }
871c349dbc7Sjsg
init_wa_bb_regs(u32 * const regs,const struct intel_engine_cs * engine)8725ca02815Sjsg static void init_wa_bb_regs(u32 * const regs,
8735ca02815Sjsg const struct intel_engine_cs *engine)
874c349dbc7Sjsg {
8755ca02815Sjsg const struct i915_ctx_workarounds * const wa_ctx = &engine->wa_ctx;
876ad8b1aafSjsg
8775ca02815Sjsg if (wa_ctx->per_ctx.size) {
8785ca02815Sjsg const u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
879ad8b1aafSjsg
8805ca02815Sjsg GEM_BUG_ON(lrc_ring_wa_bb_per_ctx(engine) == -1);
8815ca02815Sjsg regs[lrc_ring_wa_bb_per_ctx(engine) + 1] =
8825ca02815Sjsg (ggtt_offset + wa_ctx->per_ctx.offset) | 0x01;
883c349dbc7Sjsg }
884c349dbc7Sjsg
8855ca02815Sjsg if (wa_ctx->indirect_ctx.size) {
8865ca02815Sjsg lrc_setup_indirect_ctx(regs, engine,
8875ca02815Sjsg i915_ggtt_offset(wa_ctx->vma) +
8885ca02815Sjsg wa_ctx->indirect_ctx.offset,
8895ca02815Sjsg wa_ctx->indirect_ctx.size);
8905ca02815Sjsg }
891c349dbc7Sjsg }
892c349dbc7Sjsg
init_ppgtt_regs(u32 * regs,const struct i915_ppgtt * ppgtt)8935ca02815Sjsg static void init_ppgtt_regs(u32 *regs, const struct i915_ppgtt *ppgtt)
894c349dbc7Sjsg {
8955ca02815Sjsg if (i915_vm_is_4lvl(&ppgtt->vm)) {
8965ca02815Sjsg /* 64b PPGTT (48bit canonical)
8975ca02815Sjsg * PDP0_DESCRIPTOR contains the base address to PML4 and
8985ca02815Sjsg * other PDP Descriptors are ignored.
899c349dbc7Sjsg */
9005ca02815Sjsg ASSIGN_CTX_PML4(ppgtt, regs);
901c349dbc7Sjsg } else {
9025ca02815Sjsg ASSIGN_CTX_PDP(ppgtt, regs, 3);
9035ca02815Sjsg ASSIGN_CTX_PDP(ppgtt, regs, 2);
9045ca02815Sjsg ASSIGN_CTX_PDP(ppgtt, regs, 1);
9055ca02815Sjsg ASSIGN_CTX_PDP(ppgtt, regs, 0);
9065ca02815Sjsg }
907ad8b1aafSjsg }
908c349dbc7Sjsg
vm_alias(struct i915_address_space * vm)9095ca02815Sjsg static struct i915_ppgtt *vm_alias(struct i915_address_space *vm)
910c349dbc7Sjsg {
9115ca02815Sjsg if (i915_is_ggtt(vm))
9125ca02815Sjsg return i915_vm_to_ggtt(vm)->alias;
913ad8b1aafSjsg else
9145ca02815Sjsg return i915_vm_to_ppgtt(vm);
915c349dbc7Sjsg }
916c349dbc7Sjsg
__reset_stop_ring(u32 * regs,const struct intel_engine_cs * engine)9175ca02815Sjsg static void __reset_stop_ring(u32 *regs, const struct intel_engine_cs *engine)
918c349dbc7Sjsg {
9195ca02815Sjsg int x;
9205ca02815Sjsg
9215ca02815Sjsg x = lrc_ring_mi_mode(engine);
9225ca02815Sjsg if (x != -1) {
9235ca02815Sjsg regs[x + 1] &= ~STOP_RING;
9245ca02815Sjsg regs[x + 1] |= STOP_RING << 16;
9255ca02815Sjsg }
926c349dbc7Sjsg }
927c349dbc7Sjsg
__lrc_init_regs(u32 * regs,const struct intel_context * ce,const struct intel_engine_cs * engine,bool inhibit)9285ca02815Sjsg static void __lrc_init_regs(u32 *regs,
9295ca02815Sjsg const struct intel_context *ce,
9305ca02815Sjsg const struct intel_engine_cs *engine,
9315ca02815Sjsg bool inhibit)
932c349dbc7Sjsg {
9335ca02815Sjsg /*
9345ca02815Sjsg * A context is actually a big batch buffer with several
9355ca02815Sjsg * MI_LOAD_REGISTER_IMM commands followed by (reg, value) pairs. The
9365ca02815Sjsg * values we are setting here are only for the first context restore:
9375ca02815Sjsg * on a subsequent save, the GPU will recreate this batchbuffer with new
9385ca02815Sjsg * values (including all the missing MI_LOAD_REGISTER_IMM commands that
9395ca02815Sjsg * we are not initializing here).
9405ca02815Sjsg *
9415ca02815Sjsg * Must keep consistent with virtual_update_register_offsets().
9425ca02815Sjsg */
9435ca02815Sjsg
9445ca02815Sjsg if (inhibit)
9455ca02815Sjsg memset(regs, 0, PAGE_SIZE);
9465ca02815Sjsg
9475ca02815Sjsg set_offsets(regs, reg_offsets(engine), engine, inhibit);
9485ca02815Sjsg
9495ca02815Sjsg init_common_regs(regs, ce, engine, inhibit);
9505ca02815Sjsg init_ppgtt_regs(regs, vm_alias(ce->vm));
9515ca02815Sjsg
9525ca02815Sjsg init_wa_bb_regs(regs, engine);
9535ca02815Sjsg
9545ca02815Sjsg __reset_stop_ring(regs, engine);
955c349dbc7Sjsg }
956c349dbc7Sjsg
lrc_init_regs(const struct intel_context * ce,const struct intel_engine_cs * engine,bool inhibit)9575ca02815Sjsg void lrc_init_regs(const struct intel_context *ce,
9585ca02815Sjsg const struct intel_engine_cs *engine,
9595ca02815Sjsg bool inhibit)
960c349dbc7Sjsg {
9615ca02815Sjsg __lrc_init_regs(ce->lrc_reg_state, ce, engine, inhibit);
962c349dbc7Sjsg }
963c349dbc7Sjsg
lrc_reset_regs(const struct intel_context * ce,const struct intel_engine_cs * engine)9645ca02815Sjsg void lrc_reset_regs(const struct intel_context *ce,
9655ca02815Sjsg const struct intel_engine_cs *engine)
966c349dbc7Sjsg {
9675ca02815Sjsg __reset_stop_ring(ce->lrc_reg_state, engine);
968c349dbc7Sjsg }
969c349dbc7Sjsg
970c349dbc7Sjsg static void
set_redzone(void * vaddr,const struct intel_engine_cs * engine)971c349dbc7Sjsg set_redzone(void *vaddr, const struct intel_engine_cs *engine)
972c349dbc7Sjsg {
973c349dbc7Sjsg if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
974c349dbc7Sjsg return;
975c349dbc7Sjsg
976c349dbc7Sjsg vaddr += engine->context_size;
977c349dbc7Sjsg
978c349dbc7Sjsg memset(vaddr, CONTEXT_REDZONE, I915_GTT_PAGE_SIZE);
979c349dbc7Sjsg }
980c349dbc7Sjsg
981c349dbc7Sjsg static void
check_redzone(const void * vaddr,const struct intel_engine_cs * engine)982c349dbc7Sjsg check_redzone(const void *vaddr, const struct intel_engine_cs *engine)
983c349dbc7Sjsg {
984c349dbc7Sjsg if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
985c349dbc7Sjsg return;
986c349dbc7Sjsg
987c349dbc7Sjsg vaddr += engine->context_size;
988c349dbc7Sjsg
989c349dbc7Sjsg if (memchr_inv(vaddr, CONTEXT_REDZONE, I915_GTT_PAGE_SIZE))
990ad8b1aafSjsg drm_err_once(&engine->i915->drm,
991c349dbc7Sjsg "%s context redzone overwritten!\n",
992c349dbc7Sjsg engine->name);
993c349dbc7Sjsg }
994c349dbc7Sjsg
context_wa_bb_offset(const struct intel_context * ce)9951bb76ff1Sjsg static u32 context_wa_bb_offset(const struct intel_context *ce)
9961bb76ff1Sjsg {
9971bb76ff1Sjsg return PAGE_SIZE * ce->wa_bb_page;
9981bb76ff1Sjsg }
9991bb76ff1Sjsg
context_indirect_bb(const struct intel_context * ce)10001bb76ff1Sjsg static u32 *context_indirect_bb(const struct intel_context *ce)
10011bb76ff1Sjsg {
10021bb76ff1Sjsg void *ptr;
10031bb76ff1Sjsg
10041bb76ff1Sjsg GEM_BUG_ON(!ce->wa_bb_page);
10051bb76ff1Sjsg
10061bb76ff1Sjsg ptr = ce->lrc_reg_state;
10071bb76ff1Sjsg ptr -= LRC_STATE_OFFSET; /* back to start of context image */
10081bb76ff1Sjsg ptr += context_wa_bb_offset(ce);
10091bb76ff1Sjsg
10101bb76ff1Sjsg return ptr;
10111bb76ff1Sjsg }
10121bb76ff1Sjsg
lrc_init_state(struct intel_context * ce,struct intel_engine_cs * engine,void * state)10135ca02815Sjsg void lrc_init_state(struct intel_context *ce,
10145ca02815Sjsg struct intel_engine_cs *engine,
10155ca02815Sjsg void *state)
10165ca02815Sjsg {
10175ca02815Sjsg bool inhibit = true;
10185ca02815Sjsg
10195ca02815Sjsg set_redzone(state, engine);
10205ca02815Sjsg
10215ca02815Sjsg if (engine->default_state) {
10225ca02815Sjsg #ifdef __linux__
10235ca02815Sjsg shmem_read(engine->default_state, 0,
10245ca02815Sjsg state, engine->context_size);
10255ca02815Sjsg #else
10265ca02815Sjsg uao_read(engine->default_state, 0,
10275ca02815Sjsg state, engine->context_size);
10285ca02815Sjsg #endif
10295ca02815Sjsg __set_bit(CONTEXT_VALID_BIT, &ce->flags);
10305ca02815Sjsg inhibit = false;
10315ca02815Sjsg }
10325ca02815Sjsg
10335ca02815Sjsg /* Clear the ppHWSP (inc. per-context counters) */
10345ca02815Sjsg memset(state, 0, PAGE_SIZE);
10355ca02815Sjsg
10361bb76ff1Sjsg /* Clear the indirect wa and storage */
10371bb76ff1Sjsg if (ce->wa_bb_page)
10381bb76ff1Sjsg memset(state + context_wa_bb_offset(ce), 0, PAGE_SIZE);
10391bb76ff1Sjsg
10405ca02815Sjsg /*
10415ca02815Sjsg * The second page of the context object contains some registers which
10425ca02815Sjsg * must be set up prior to the first execution.
10435ca02815Sjsg */
10445ca02815Sjsg __lrc_init_regs(state + LRC_STATE_OFFSET, ce, engine, inhibit);
10455ca02815Sjsg }
10465ca02815Sjsg
lrc_indirect_bb(const struct intel_context * ce)10471bb76ff1Sjsg u32 lrc_indirect_bb(const struct intel_context *ce)
10481bb76ff1Sjsg {
10491bb76ff1Sjsg return i915_ggtt_offset(ce->state) + context_wa_bb_offset(ce);
10501bb76ff1Sjsg }
10511bb76ff1Sjsg
setup_predicate_disable_wa(const struct intel_context * ce,u32 * cs)10521bb76ff1Sjsg static u32 *setup_predicate_disable_wa(const struct intel_context *ce, u32 *cs)
10531bb76ff1Sjsg {
10541bb76ff1Sjsg /* If predication is active, this will be noop'ed */
10551bb76ff1Sjsg *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT | (4 - 2);
10561bb76ff1Sjsg *cs++ = lrc_indirect_bb(ce) + DG2_PREDICATE_RESULT_WA;
10571bb76ff1Sjsg *cs++ = 0;
10581bb76ff1Sjsg *cs++ = 0; /* No predication */
10591bb76ff1Sjsg
10601bb76ff1Sjsg /* predicated end, only terminates if SET_PREDICATE_RESULT:0 is clear */
10611bb76ff1Sjsg *cs++ = MI_BATCH_BUFFER_END | BIT(15);
10621bb76ff1Sjsg *cs++ = MI_SET_PREDICATE | MI_SET_PREDICATE_DISABLE;
10631bb76ff1Sjsg
10641bb76ff1Sjsg /* Instructions are no longer predicated (disabled), we can proceed */
10651bb76ff1Sjsg *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT | (4 - 2);
10661bb76ff1Sjsg *cs++ = lrc_indirect_bb(ce) + DG2_PREDICATE_RESULT_WA;
10671bb76ff1Sjsg *cs++ = 0;
10681bb76ff1Sjsg *cs++ = 1; /* enable predication before the next BB */
10691bb76ff1Sjsg
10701bb76ff1Sjsg *cs++ = MI_BATCH_BUFFER_END;
10711bb76ff1Sjsg GEM_BUG_ON(offset_in_page(cs) > DG2_PREDICATE_RESULT_WA);
10721bb76ff1Sjsg
10731bb76ff1Sjsg return cs;
10741bb76ff1Sjsg }
10751bb76ff1Sjsg
10765ca02815Sjsg static struct i915_vma *
__lrc_alloc_state(struct intel_context * ce,struct intel_engine_cs * engine)10775ca02815Sjsg __lrc_alloc_state(struct intel_context *ce, struct intel_engine_cs *engine)
10785ca02815Sjsg {
10795ca02815Sjsg struct drm_i915_gem_object *obj;
10805ca02815Sjsg struct i915_vma *vma;
10815ca02815Sjsg u32 context_size;
10825ca02815Sjsg
10835ca02815Sjsg context_size = round_up(engine->context_size, I915_GTT_PAGE_SIZE);
10845ca02815Sjsg
10855ca02815Sjsg if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
10865ca02815Sjsg context_size += I915_GTT_PAGE_SIZE; /* for redzone */
10875ca02815Sjsg
1088f005ef32Sjsg if (GRAPHICS_VER(engine->i915) >= 12) {
10895ca02815Sjsg ce->wa_bb_page = context_size / PAGE_SIZE;
10905ca02815Sjsg context_size += PAGE_SIZE;
10915ca02815Sjsg }
10925ca02815Sjsg
10931bb76ff1Sjsg if (intel_context_is_parent(ce) && intel_engine_uses_guc(engine)) {
10941bb76ff1Sjsg ce->parallel.guc.parent_page = context_size / PAGE_SIZE;
10951bb76ff1Sjsg context_size += PARENT_SCRATCH_SIZE;
10961bb76ff1Sjsg }
10971bb76ff1Sjsg
10981bb76ff1Sjsg obj = i915_gem_object_create_lmem(engine->i915, context_size,
10991bb76ff1Sjsg I915_BO_ALLOC_PM_VOLATILE);
1100f005ef32Sjsg if (IS_ERR(obj)) {
11015ca02815Sjsg obj = i915_gem_object_create_shmem(engine->i915, context_size);
11025ca02815Sjsg if (IS_ERR(obj))
11035ca02815Sjsg return ERR_CAST(obj);
11045ca02815Sjsg
1105f005ef32Sjsg /*
1106f005ef32Sjsg * Wa_22016122933: For Media version 13.0, all Media GT shared
1107f005ef32Sjsg * memory needs to be mapped as WC on CPU side and UC (PAT
1108f005ef32Sjsg * index 2) on GPU side.
1109f005ef32Sjsg */
1110f005ef32Sjsg if (intel_gt_needs_wa_22016122933(engine->gt))
1111f005ef32Sjsg i915_gem_object_set_cache_coherency(obj, I915_CACHE_NONE);
1112f005ef32Sjsg }
1113f005ef32Sjsg
11145ca02815Sjsg vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL);
11155ca02815Sjsg if (IS_ERR(vma)) {
11165ca02815Sjsg i915_gem_object_put(obj);
11175ca02815Sjsg return vma;
11185ca02815Sjsg }
11195ca02815Sjsg
11205ca02815Sjsg return vma;
11215ca02815Sjsg }
11225ca02815Sjsg
11235ca02815Sjsg static struct intel_timeline *
pinned_timeline(struct intel_context * ce,struct intel_engine_cs * engine)11245ca02815Sjsg pinned_timeline(struct intel_context *ce, struct intel_engine_cs *engine)
11255ca02815Sjsg {
11265ca02815Sjsg struct intel_timeline *tl = fetch_and_zero(&ce->timeline);
11275ca02815Sjsg
11285ca02815Sjsg return intel_timeline_create_from_engine(engine, page_unmask_bits(tl));
11295ca02815Sjsg }
11305ca02815Sjsg
lrc_alloc(struct intel_context * ce,struct intel_engine_cs * engine)11315ca02815Sjsg int lrc_alloc(struct intel_context *ce, struct intel_engine_cs *engine)
11325ca02815Sjsg {
11335ca02815Sjsg struct intel_ring *ring;
11345ca02815Sjsg struct i915_vma *vma;
11355ca02815Sjsg int err;
11365ca02815Sjsg
11375ca02815Sjsg GEM_BUG_ON(ce->state);
11385ca02815Sjsg
11395ca02815Sjsg vma = __lrc_alloc_state(ce, engine);
11405ca02815Sjsg if (IS_ERR(vma))
11415ca02815Sjsg return PTR_ERR(vma);
11425ca02815Sjsg
11435ca02815Sjsg ring = intel_engine_create_ring(engine, ce->ring_size);
11445ca02815Sjsg if (IS_ERR(ring)) {
11455ca02815Sjsg err = PTR_ERR(ring);
11465ca02815Sjsg goto err_vma;
11475ca02815Sjsg }
11485ca02815Sjsg
11495ca02815Sjsg if (!page_mask_bits(ce->timeline)) {
11505ca02815Sjsg struct intel_timeline *tl;
11515ca02815Sjsg
11525ca02815Sjsg /*
11535ca02815Sjsg * Use the static global HWSP for the kernel context, and
11545ca02815Sjsg * a dynamically allocated cacheline for everyone else.
11555ca02815Sjsg */
11565ca02815Sjsg if (unlikely(ce->timeline))
11575ca02815Sjsg tl = pinned_timeline(ce, engine);
11585ca02815Sjsg else
11595ca02815Sjsg tl = intel_timeline_create(engine->gt);
11605ca02815Sjsg if (IS_ERR(tl)) {
11615ca02815Sjsg err = PTR_ERR(tl);
11625ca02815Sjsg goto err_ring;
11635ca02815Sjsg }
11645ca02815Sjsg
11655ca02815Sjsg ce->timeline = tl;
11665ca02815Sjsg }
11675ca02815Sjsg
11685ca02815Sjsg ce->ring = ring;
11695ca02815Sjsg ce->state = vma;
11705ca02815Sjsg
11715ca02815Sjsg return 0;
11725ca02815Sjsg
11735ca02815Sjsg err_ring:
11745ca02815Sjsg intel_ring_put(ring);
11755ca02815Sjsg err_vma:
11765ca02815Sjsg i915_vma_put(vma);
11775ca02815Sjsg return err;
11785ca02815Sjsg }
11795ca02815Sjsg
lrc_reset(struct intel_context * ce)11805ca02815Sjsg void lrc_reset(struct intel_context *ce)
11815ca02815Sjsg {
11825ca02815Sjsg GEM_BUG_ON(!intel_context_is_pinned(ce));
11835ca02815Sjsg
11845ca02815Sjsg intel_ring_reset(ce->ring, ce->ring->emit);
11855ca02815Sjsg
11865ca02815Sjsg /* Scrub away the garbage */
11875ca02815Sjsg lrc_init_regs(ce, ce->engine, true);
11885ca02815Sjsg ce->lrc.lrca = lrc_update_regs(ce, ce->engine, ce->ring->tail);
11895ca02815Sjsg }
11905ca02815Sjsg
11915ca02815Sjsg int
lrc_pre_pin(struct intel_context * ce,struct intel_engine_cs * engine,struct i915_gem_ww_ctx * ww,void ** vaddr)11925ca02815Sjsg lrc_pre_pin(struct intel_context *ce,
11935ca02815Sjsg struct intel_engine_cs *engine,
11945ca02815Sjsg struct i915_gem_ww_ctx *ww,
11955ca02815Sjsg void **vaddr)
11965ca02815Sjsg {
11975ca02815Sjsg GEM_BUG_ON(!ce->state);
11985ca02815Sjsg GEM_BUG_ON(!i915_vma_is_pinned(ce->state));
11995ca02815Sjsg
12005ca02815Sjsg *vaddr = i915_gem_object_pin_map(ce->state->obj,
1201f005ef32Sjsg intel_gt_coherent_map_type(ce->engine->gt,
12025ca02815Sjsg ce->state->obj,
12035ca02815Sjsg false) |
12045ca02815Sjsg I915_MAP_OVERRIDE);
12055ca02815Sjsg
12065ca02815Sjsg return PTR_ERR_OR_ZERO(*vaddr);
12075ca02815Sjsg }
12085ca02815Sjsg
12095ca02815Sjsg int
lrc_pin(struct intel_context * ce,struct intel_engine_cs * engine,void * vaddr)12105ca02815Sjsg lrc_pin(struct intel_context *ce,
12115ca02815Sjsg struct intel_engine_cs *engine,
12125ca02815Sjsg void *vaddr)
12135ca02815Sjsg {
12145ca02815Sjsg ce->lrc_reg_state = vaddr + LRC_STATE_OFFSET;
12155ca02815Sjsg
12165ca02815Sjsg if (!__test_and_set_bit(CONTEXT_INIT_BIT, &ce->flags))
12175ca02815Sjsg lrc_init_state(ce, engine, vaddr);
12185ca02815Sjsg
12195ca02815Sjsg ce->lrc.lrca = lrc_update_regs(ce, engine, ce->ring->tail);
12205ca02815Sjsg return 0;
12215ca02815Sjsg }
12225ca02815Sjsg
lrc_unpin(struct intel_context * ce)12235ca02815Sjsg void lrc_unpin(struct intel_context *ce)
1224c349dbc7Sjsg {
12251bb76ff1Sjsg if (unlikely(ce->parallel.last_rq)) {
12261bb76ff1Sjsg i915_request_put(ce->parallel.last_rq);
12271bb76ff1Sjsg ce->parallel.last_rq = NULL;
12281bb76ff1Sjsg }
1229ad8b1aafSjsg check_redzone((void *)ce->lrc_reg_state - LRC_STATE_OFFSET,
1230c349dbc7Sjsg ce->engine);
1231ad8b1aafSjsg }
1232c349dbc7Sjsg
lrc_post_unpin(struct intel_context * ce)12335ca02815Sjsg void lrc_post_unpin(struct intel_context *ce)
1234ad8b1aafSjsg {
1235c349dbc7Sjsg i915_gem_object_unpin_map(ce->state->obj);
1236c349dbc7Sjsg }
1237c349dbc7Sjsg
lrc_fini(struct intel_context * ce)12385ca02815Sjsg void lrc_fini(struct intel_context *ce)
12395ca02815Sjsg {
12405ca02815Sjsg if (!ce->state)
12415ca02815Sjsg return;
12425ca02815Sjsg
12435ca02815Sjsg intel_ring_put(fetch_and_zero(&ce->ring));
12445ca02815Sjsg i915_vma_put(fetch_and_zero(&ce->state));
12455ca02815Sjsg }
12465ca02815Sjsg
lrc_destroy(struct kref * kref)12475ca02815Sjsg void lrc_destroy(struct kref *kref)
12485ca02815Sjsg {
12495ca02815Sjsg struct intel_context *ce = container_of(kref, typeof(*ce), ref);
12505ca02815Sjsg
12515ca02815Sjsg GEM_BUG_ON(!i915_active_is_idle(&ce->active));
12525ca02815Sjsg GEM_BUG_ON(intel_context_is_pinned(ce));
12535ca02815Sjsg
12545ca02815Sjsg lrc_fini(ce);
12555ca02815Sjsg
12565ca02815Sjsg intel_context_fini(ce);
12575ca02815Sjsg intel_context_free(ce);
12585ca02815Sjsg }
12595ca02815Sjsg
1260ad8b1aafSjsg static u32 *
gen12_emit_timestamp_wa(const struct intel_context * ce,u32 * cs)1261ad8b1aafSjsg gen12_emit_timestamp_wa(const struct intel_context *ce, u32 *cs)
1262ad8b1aafSjsg {
1263ad8b1aafSjsg *cs++ = MI_LOAD_REGISTER_MEM_GEN8 |
1264ad8b1aafSjsg MI_SRM_LRM_GLOBAL_GTT |
1265ad8b1aafSjsg MI_LRI_LRM_CS_MMIO;
1266ad8b1aafSjsg *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
1267ad8b1aafSjsg *cs++ = i915_ggtt_offset(ce->state) + LRC_STATE_OFFSET +
1268ad8b1aafSjsg CTX_TIMESTAMP * sizeof(u32);
1269ad8b1aafSjsg *cs++ = 0;
1270ad8b1aafSjsg
1271ad8b1aafSjsg *cs++ = MI_LOAD_REGISTER_REG |
1272ad8b1aafSjsg MI_LRR_SOURCE_CS_MMIO |
1273ad8b1aafSjsg MI_LRI_LRM_CS_MMIO;
1274ad8b1aafSjsg *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
1275ad8b1aafSjsg *cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(0));
1276ad8b1aafSjsg
1277ad8b1aafSjsg *cs++ = MI_LOAD_REGISTER_REG |
1278ad8b1aafSjsg MI_LRR_SOURCE_CS_MMIO |
1279ad8b1aafSjsg MI_LRI_LRM_CS_MMIO;
1280ad8b1aafSjsg *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
1281ad8b1aafSjsg *cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(0));
1282ad8b1aafSjsg
1283ad8b1aafSjsg return cs;
1284ad8b1aafSjsg }
1285ad8b1aafSjsg
1286ad8b1aafSjsg static u32 *
gen12_emit_restore_scratch(const struct intel_context * ce,u32 * cs)1287ad8b1aafSjsg gen12_emit_restore_scratch(const struct intel_context *ce, u32 *cs)
1288ad8b1aafSjsg {
1289ad8b1aafSjsg GEM_BUG_ON(lrc_ring_gpr0(ce->engine) == -1);
1290ad8b1aafSjsg
1291ad8b1aafSjsg *cs++ = MI_LOAD_REGISTER_MEM_GEN8 |
1292ad8b1aafSjsg MI_SRM_LRM_GLOBAL_GTT |
1293ad8b1aafSjsg MI_LRI_LRM_CS_MMIO;
1294ad8b1aafSjsg *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
1295ad8b1aafSjsg *cs++ = i915_ggtt_offset(ce->state) + LRC_STATE_OFFSET +
1296ad8b1aafSjsg (lrc_ring_gpr0(ce->engine) + 1) * sizeof(u32);
1297ad8b1aafSjsg *cs++ = 0;
1298ad8b1aafSjsg
1299ad8b1aafSjsg return cs;
1300ad8b1aafSjsg }
1301ad8b1aafSjsg
1302ad8b1aafSjsg static u32 *
gen12_emit_cmd_buf_wa(const struct intel_context * ce,u32 * cs)1303ad8b1aafSjsg gen12_emit_cmd_buf_wa(const struct intel_context *ce, u32 *cs)
1304ad8b1aafSjsg {
1305ad8b1aafSjsg GEM_BUG_ON(lrc_ring_cmd_buf_cctl(ce->engine) == -1);
1306ad8b1aafSjsg
1307ad8b1aafSjsg *cs++ = MI_LOAD_REGISTER_MEM_GEN8 |
1308ad8b1aafSjsg MI_SRM_LRM_GLOBAL_GTT |
1309ad8b1aafSjsg MI_LRI_LRM_CS_MMIO;
1310ad8b1aafSjsg *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
1311ad8b1aafSjsg *cs++ = i915_ggtt_offset(ce->state) + LRC_STATE_OFFSET +
1312ad8b1aafSjsg (lrc_ring_cmd_buf_cctl(ce->engine) + 1) * sizeof(u32);
1313ad8b1aafSjsg *cs++ = 0;
1314ad8b1aafSjsg
1315ad8b1aafSjsg *cs++ = MI_LOAD_REGISTER_REG |
1316ad8b1aafSjsg MI_LRR_SOURCE_CS_MMIO |
1317ad8b1aafSjsg MI_LRI_LRM_CS_MMIO;
1318ad8b1aafSjsg *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
1319ad8b1aafSjsg *cs++ = i915_mmio_reg_offset(RING_CMD_BUF_CCTL(0));
1320ad8b1aafSjsg
1321ad8b1aafSjsg return cs;
1322ad8b1aafSjsg }
1323ad8b1aafSjsg
13241bb76ff1Sjsg /*
13251bb76ff1Sjsg * The bspec's tuning guide asks us to program a vertical watermark value of
13261bb76ff1Sjsg * 0x3FF. However this register is not saved/restored properly by the
13271bb76ff1Sjsg * hardware, so we're required to apply the desired value via INDIRECT_CTX
13281bb76ff1Sjsg * batch buffer to ensure the value takes effect properly. All other bits
13291bb76ff1Sjsg * in this register should remain at 0 (the hardware default).
13301bb76ff1Sjsg */
13311bb76ff1Sjsg static u32 *
dg2_emit_draw_watermark_setting(u32 * cs)13321bb76ff1Sjsg dg2_emit_draw_watermark_setting(u32 *cs)
13331bb76ff1Sjsg {
13341bb76ff1Sjsg *cs++ = MI_LOAD_REGISTER_IMM(1);
13351bb76ff1Sjsg *cs++ = i915_mmio_reg_offset(DRAW_WATERMARK);
13361bb76ff1Sjsg *cs++ = REG_FIELD_PREP(VERT_WM_VAL, 0x3FF);
13371bb76ff1Sjsg
13381bb76ff1Sjsg return cs;
13391bb76ff1Sjsg }
13401bb76ff1Sjsg
1341ad8b1aafSjsg static u32 *
gen12_emit_indirect_ctx_rcs(const struct intel_context * ce,u32 * cs)1342ad8b1aafSjsg gen12_emit_indirect_ctx_rcs(const struct intel_context *ce, u32 *cs)
1343ad8b1aafSjsg {
1344ad8b1aafSjsg cs = gen12_emit_timestamp_wa(ce, cs);
1345ad8b1aafSjsg cs = gen12_emit_cmd_buf_wa(ce, cs);
1346ad8b1aafSjsg cs = gen12_emit_restore_scratch(ce, cs);
1347ad8b1aafSjsg
13481bb76ff1Sjsg /* Wa_16013000631:dg2 */
1349d412e58aSjsg if (IS_DG2_G11(ce->engine->i915))
13501bb76ff1Sjsg cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE, 0);
13511bb76ff1Sjsg
1352df2f834eSjsg cs = gen12_emit_aux_table_inv(ce->engine, cs);
13531bb76ff1Sjsg
13541bb76ff1Sjsg /* Wa_16014892111 */
1355*596b6869Sjsg if (IS_GFX_GT_IP_STEP(ce->engine->gt, IP_VER(12, 70), STEP_A0, STEP_B0) ||
1356*596b6869Sjsg IS_GFX_GT_IP_STEP(ce->engine->gt, IP_VER(12, 71), STEP_A0, STEP_B0) ||
1357f005ef32Sjsg IS_DG2(ce->engine->i915))
13581bb76ff1Sjsg cs = dg2_emit_draw_watermark_setting(cs);
13591bb76ff1Sjsg
1360ad8b1aafSjsg return cs;
1361ad8b1aafSjsg }
1362ad8b1aafSjsg
1363ad8b1aafSjsg static u32 *
gen12_emit_indirect_ctx_xcs(const struct intel_context * ce,u32 * cs)1364ad8b1aafSjsg gen12_emit_indirect_ctx_xcs(const struct intel_context *ce, u32 *cs)
1365ad8b1aafSjsg {
1366ad8b1aafSjsg cs = gen12_emit_timestamp_wa(ce, cs);
1367ad8b1aafSjsg cs = gen12_emit_restore_scratch(ce, cs);
1368ad8b1aafSjsg
13691bb76ff1Sjsg /* Wa_16013000631:dg2 */
1370d412e58aSjsg if (IS_DG2_G11(ce->engine->i915))
13711bb76ff1Sjsg if (ce->engine->class == COMPUTE_CLASS)
13721bb76ff1Sjsg cs = gen8_emit_pipe_control(cs,
13731bb76ff1Sjsg PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE,
13741bb76ff1Sjsg 0);
13751bb76ff1Sjsg
1376df2f834eSjsg return gen12_emit_aux_table_inv(ce->engine, cs);
1377ad8b1aafSjsg }
1378ad8b1aafSjsg
1379ad8b1aafSjsg static void
setup_indirect_ctx_bb(const struct intel_context * ce,const struct intel_engine_cs * engine,u32 * (* emit)(const struct intel_context *,u32 *))1380ad8b1aafSjsg setup_indirect_ctx_bb(const struct intel_context *ce,
1381ad8b1aafSjsg const struct intel_engine_cs *engine,
1382ad8b1aafSjsg u32 *(*emit)(const struct intel_context *, u32 *))
1383ad8b1aafSjsg {
1384ad8b1aafSjsg u32 * const start = context_indirect_bb(ce);
1385ad8b1aafSjsg u32 *cs;
1386ad8b1aafSjsg
1387ad8b1aafSjsg cs = emit(ce, start);
1388ad8b1aafSjsg GEM_BUG_ON(cs - start > I915_GTT_PAGE_SIZE / sizeof(*cs));
1389ad8b1aafSjsg while ((unsigned long)cs % CACHELINE_BYTES)
1390ad8b1aafSjsg *cs++ = MI_NOOP;
1391ad8b1aafSjsg
13921bb76ff1Sjsg GEM_BUG_ON(cs - start > DG2_PREDICATE_RESULT_BB / sizeof(*start));
13931bb76ff1Sjsg setup_predicate_disable_wa(ce, start + DG2_PREDICATE_RESULT_BB / sizeof(*start));
13941bb76ff1Sjsg
13955ca02815Sjsg lrc_setup_indirect_ctx(ce->lrc_reg_state, engine,
13961bb76ff1Sjsg lrc_indirect_bb(ce),
1397ad8b1aafSjsg (cs - start) * sizeof(*cs));
1398ad8b1aafSjsg }
1399ad8b1aafSjsg
14005ca02815Sjsg /*
14015ca02815Sjsg * The context descriptor encodes various attributes of a context,
14025ca02815Sjsg * including its GTT address and some flags. Because it's fairly
14035ca02815Sjsg * expensive to calculate, we'll just do it once and cache the result,
14045ca02815Sjsg * which remains valid until the context is unpinned.
14055ca02815Sjsg *
14065ca02815Sjsg * This is what a descriptor looks like, from LSB to MSB::
14075ca02815Sjsg *
14085ca02815Sjsg * bits 0-11: flags, GEN8_CTX_* (cached in ctx->desc_template)
14095ca02815Sjsg * bits 12-31: LRCA, GTT address of (the HWSP of) this context
14105ca02815Sjsg * bits 32-52: ctx ID, a globally unique tag (highest bit used by GuC)
14115ca02815Sjsg * bits 53-54: mbz, reserved for use by hardware
14125ca02815Sjsg * bits 55-63: group ID, currently unused and set to 0
14135ca02815Sjsg *
14145ca02815Sjsg * Starting from Gen11, the upper dword of the descriptor has a new format:
14155ca02815Sjsg *
14165ca02815Sjsg * bits 32-36: reserved
14175ca02815Sjsg * bits 37-47: SW context ID
14185ca02815Sjsg * bits 48:53: engine instance
14195ca02815Sjsg * bit 54: mbz, reserved for use by hardware
14205ca02815Sjsg * bits 55-60: SW counter
14215ca02815Sjsg * bits 61-63: engine class
14225ca02815Sjsg *
14235ca02815Sjsg * On Xe_HP, the upper dword of the descriptor has a new format:
14245ca02815Sjsg *
14255ca02815Sjsg * bits 32-37: virtual function number
14265ca02815Sjsg * bit 38: mbz, reserved for use by hardware
14275ca02815Sjsg * bits 39-54: SW context ID
14285ca02815Sjsg * bits 55-57: reserved
14295ca02815Sjsg * bits 58-63: SW counter
14305ca02815Sjsg *
14315ca02815Sjsg * engine info, SW context ID and SW counter need to form a unique number
14325ca02815Sjsg * (Context ID) per lrc.
14335ca02815Sjsg */
lrc_descriptor(const struct intel_context * ce)14345ca02815Sjsg static u32 lrc_descriptor(const struct intel_context *ce)
14355ca02815Sjsg {
14365ca02815Sjsg u32 desc;
14375ca02815Sjsg
14385ca02815Sjsg desc = INTEL_LEGACY_32B_CONTEXT;
14395ca02815Sjsg if (i915_vm_is_4lvl(ce->vm))
14405ca02815Sjsg desc = INTEL_LEGACY_64B_CONTEXT;
14415ca02815Sjsg desc <<= GEN8_CTX_ADDRESSING_MODE_SHIFT;
14425ca02815Sjsg
14435ca02815Sjsg desc |= GEN8_CTX_VALID | GEN8_CTX_PRIVILEGE;
14445ca02815Sjsg if (GRAPHICS_VER(ce->vm->i915) == 8)
14455ca02815Sjsg desc |= GEN8_CTX_L3LLC_COHERENT;
14465ca02815Sjsg
14475ca02815Sjsg return i915_ggtt_offset(ce->state) | desc;
14485ca02815Sjsg }
14495ca02815Sjsg
lrc_update_regs(const struct intel_context * ce,const struct intel_engine_cs * engine,u32 head)14505ca02815Sjsg u32 lrc_update_regs(const struct intel_context *ce,
1451c349dbc7Sjsg const struct intel_engine_cs *engine,
1452c349dbc7Sjsg u32 head)
1453c349dbc7Sjsg {
1454c349dbc7Sjsg struct intel_ring *ring = ce->ring;
1455c349dbc7Sjsg u32 *regs = ce->lrc_reg_state;
1456c349dbc7Sjsg
1457c349dbc7Sjsg GEM_BUG_ON(!intel_ring_offset_valid(ring, head));
1458c349dbc7Sjsg GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->tail));
1459c349dbc7Sjsg
1460c349dbc7Sjsg regs[CTX_RING_START] = i915_ggtt_offset(ring->vma);
1461c349dbc7Sjsg regs[CTX_RING_HEAD] = head;
1462c349dbc7Sjsg regs[CTX_RING_TAIL] = ring->tail;
1463c349dbc7Sjsg regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID;
1464c349dbc7Sjsg
1465c349dbc7Sjsg /* RPCS */
1466c349dbc7Sjsg if (engine->class == RENDER_CLASS) {
1467c349dbc7Sjsg regs[CTX_R_PWR_CLK_STATE] =
1468ad8b1aafSjsg intel_sseu_make_rpcs(engine->gt, &ce->sseu);
1469c349dbc7Sjsg
1470c349dbc7Sjsg i915_oa_init_reg_state(ce, engine);
1471c349dbc7Sjsg }
1472ad8b1aafSjsg
1473ad8b1aafSjsg if (ce->wa_bb_page) {
1474ad8b1aafSjsg u32 *(*fn)(const struct intel_context *ce, u32 *cs);
1475ad8b1aafSjsg
1476ad8b1aafSjsg fn = gen12_emit_indirect_ctx_xcs;
1477ad8b1aafSjsg if (ce->engine->class == RENDER_CLASS)
1478ad8b1aafSjsg fn = gen12_emit_indirect_ctx_rcs;
1479ad8b1aafSjsg
1480ad8b1aafSjsg /* Mutually exclusive wrt to global indirect bb */
1481ad8b1aafSjsg GEM_BUG_ON(engine->wa_ctx.indirect_ctx.size);
1482ad8b1aafSjsg setup_indirect_ctx_bb(ce, engine, fn);
1483ad8b1aafSjsg }
14845ca02815Sjsg
14855ca02815Sjsg return lrc_descriptor(ce) | CTX_DESC_FORCE_RESTORE;
1486c349dbc7Sjsg }
1487c349dbc7Sjsg
lrc_update_offsets(struct intel_context * ce,struct intel_engine_cs * engine)14885ca02815Sjsg void lrc_update_offsets(struct intel_context *ce,
14895ca02815Sjsg struct intel_engine_cs *engine)
1490c349dbc7Sjsg {
14915ca02815Sjsg set_offsets(ce->lrc_reg_state, reg_offsets(engine), engine, false);
1492ad8b1aafSjsg }
1493ad8b1aafSjsg
lrc_check_regs(const struct intel_context * ce,const struct intel_engine_cs * engine,const char * when)14945ca02815Sjsg void lrc_check_regs(const struct intel_context *ce,
14955ca02815Sjsg const struct intel_engine_cs *engine,
14965ca02815Sjsg const char *when)
1497ad8b1aafSjsg {
14985ca02815Sjsg const struct intel_ring *ring = ce->ring;
14995ca02815Sjsg u32 *regs = ce->lrc_reg_state;
15005ca02815Sjsg bool valid = true;
15015ca02815Sjsg int x;
1502c349dbc7Sjsg
15035ca02815Sjsg if (regs[CTX_RING_START] != i915_ggtt_offset(ring->vma)) {
15045ca02815Sjsg pr_err("%s: context submitted with incorrect RING_START [%08x], expected %08x\n",
15055ca02815Sjsg engine->name,
15065ca02815Sjsg regs[CTX_RING_START],
15075ca02815Sjsg i915_ggtt_offset(ring->vma));
15085ca02815Sjsg regs[CTX_RING_START] = i915_ggtt_offset(ring->vma);
15095ca02815Sjsg valid = false;
1510c349dbc7Sjsg }
1511c349dbc7Sjsg
15125ca02815Sjsg if ((regs[CTX_RING_CTL] & ~(RING_WAIT | RING_WAIT_SEMAPHORE)) !=
15135ca02815Sjsg (RING_CTL_SIZE(ring->size) | RING_VALID)) {
15145ca02815Sjsg pr_err("%s: context submitted with incorrect RING_CTL [%08x], expected %08x\n",
15155ca02815Sjsg engine->name,
15165ca02815Sjsg regs[CTX_RING_CTL],
15175ca02815Sjsg (u32)(RING_CTL_SIZE(ring->size) | RING_VALID));
15185ca02815Sjsg regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID;
15195ca02815Sjsg valid = false;
1520c349dbc7Sjsg }
1521c349dbc7Sjsg
15225ca02815Sjsg x = lrc_ring_mi_mode(engine);
15235ca02815Sjsg if (x != -1 && regs[x + 1] & (regs[x + 1] >> 16) & STOP_RING) {
15245ca02815Sjsg pr_err("%s: context submitted with STOP_RING [%08x] in RING_MI_MODE\n",
15255ca02815Sjsg engine->name, regs[x + 1]);
15265ca02815Sjsg regs[x + 1] &= ~STOP_RING;
15275ca02815Sjsg regs[x + 1] |= STOP_RING << 16;
15285ca02815Sjsg valid = false;
1529c349dbc7Sjsg }
1530c349dbc7Sjsg
15315ca02815Sjsg WARN_ONCE(!valid, "Invalid lrc state found %s submission\n", when);
1532c349dbc7Sjsg }
1533c349dbc7Sjsg
1534c349dbc7Sjsg /*
1535c349dbc7Sjsg * In this WA we need to set GEN8_L3SQCREG4[21:21] and reset it after
1536c349dbc7Sjsg * PIPE_CONTROL instruction. This is required for the flush to happen correctly
1537c349dbc7Sjsg * but there is a slight complication as this is applied in WA batch where the
1538c349dbc7Sjsg * values are only initialized once so we cannot take register value at the
1539c349dbc7Sjsg * beginning and reuse it further; hence we save its value to memory, upload a
1540c349dbc7Sjsg * constant value with bit21 set and then we restore it back with the saved value.
1541c349dbc7Sjsg * To simplify the WA, a constant value is formed by using the default value
1542c349dbc7Sjsg * of this register. This shouldn't be a problem because we are only modifying
1543c349dbc7Sjsg * it for a short period and this batch in non-premptible. We can ofcourse
1544c349dbc7Sjsg * use additional instructions that read the actual value of the register
1545c349dbc7Sjsg * at that time and set our bit of interest but it makes the WA complicated.
1546c349dbc7Sjsg *
1547c349dbc7Sjsg * This WA is also required for Gen9 so extracting as a function avoids
1548c349dbc7Sjsg * code duplication.
1549c349dbc7Sjsg */
1550c349dbc7Sjsg static u32 *
gen8_emit_flush_coherentl3_wa(struct intel_engine_cs * engine,u32 * batch)1551c349dbc7Sjsg gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, u32 *batch)
1552c349dbc7Sjsg {
1553c349dbc7Sjsg /* NB no one else is allowed to scribble over scratch + 256! */
1554c349dbc7Sjsg *batch++ = MI_STORE_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT;
1555c349dbc7Sjsg *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
1556c349dbc7Sjsg *batch++ = intel_gt_scratch_offset(engine->gt,
1557c349dbc7Sjsg INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA);
1558c349dbc7Sjsg *batch++ = 0;
1559c349dbc7Sjsg
1560c349dbc7Sjsg *batch++ = MI_LOAD_REGISTER_IMM(1);
1561c349dbc7Sjsg *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
1562c349dbc7Sjsg *batch++ = 0x40400000 | GEN8_LQSC_FLUSH_COHERENT_LINES;
1563c349dbc7Sjsg
1564c349dbc7Sjsg batch = gen8_emit_pipe_control(batch,
1565c349dbc7Sjsg PIPE_CONTROL_CS_STALL |
1566c349dbc7Sjsg PIPE_CONTROL_DC_FLUSH_ENABLE,
1567c349dbc7Sjsg 0);
1568c349dbc7Sjsg
1569c349dbc7Sjsg *batch++ = MI_LOAD_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT;
1570c349dbc7Sjsg *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
1571c349dbc7Sjsg *batch++ = intel_gt_scratch_offset(engine->gt,
1572c349dbc7Sjsg INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA);
1573c349dbc7Sjsg *batch++ = 0;
1574c349dbc7Sjsg
1575c349dbc7Sjsg return batch;
1576c349dbc7Sjsg }
1577c349dbc7Sjsg
1578c349dbc7Sjsg /*
1579c349dbc7Sjsg * Typically we only have one indirect_ctx and per_ctx batch buffer which are
1580c349dbc7Sjsg * initialized at the beginning and shared across all contexts but this field
1581c349dbc7Sjsg * helps us to have multiple batches at different offsets and select them based
1582c349dbc7Sjsg * on a criteria. At the moment this batch always start at the beginning of the page
1583c349dbc7Sjsg * and at this point we don't have multiple wa_ctx batch buffers.
1584c349dbc7Sjsg *
1585c349dbc7Sjsg * The number of WA applied are not known at the beginning; we use this field
1586c349dbc7Sjsg * to return the no of DWORDS written.
1587c349dbc7Sjsg *
1588c349dbc7Sjsg * It is to be noted that this batch does not contain MI_BATCH_BUFFER_END
1589c349dbc7Sjsg * so it adds NOOPs as padding to make it cacheline aligned.
1590c349dbc7Sjsg * MI_BATCH_BUFFER_END will be added to perctx batch and both of them together
1591c349dbc7Sjsg * makes a complete batch buffer.
1592c349dbc7Sjsg */
gen8_init_indirectctx_bb(struct intel_engine_cs * engine,u32 * batch)1593c349dbc7Sjsg static u32 *gen8_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
1594c349dbc7Sjsg {
1595c349dbc7Sjsg /* WaDisableCtxRestoreArbitration:bdw,chv */
1596c349dbc7Sjsg *batch++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
1597c349dbc7Sjsg
1598c349dbc7Sjsg /* WaFlushCoherentL3CacheLinesAtContextSwitch:bdw */
1599c349dbc7Sjsg if (IS_BROADWELL(engine->i915))
1600c349dbc7Sjsg batch = gen8_emit_flush_coherentl3_wa(engine, batch);
1601c349dbc7Sjsg
1602c349dbc7Sjsg /* WaClearSlmSpaceAtContextSwitch:bdw,chv */
1603c349dbc7Sjsg /* Actual scratch location is at 128 bytes offset */
1604c349dbc7Sjsg batch = gen8_emit_pipe_control(batch,
1605c349dbc7Sjsg PIPE_CONTROL_FLUSH_L3 |
1606c349dbc7Sjsg PIPE_CONTROL_STORE_DATA_INDEX |
1607c349dbc7Sjsg PIPE_CONTROL_CS_STALL |
1608c349dbc7Sjsg PIPE_CONTROL_QW_WRITE,
1609c349dbc7Sjsg LRC_PPHWSP_SCRATCH_ADDR);
1610c349dbc7Sjsg
1611c349dbc7Sjsg *batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
1612c349dbc7Sjsg
1613c349dbc7Sjsg /* Pad to end of cacheline */
1614c349dbc7Sjsg while ((unsigned long)batch % CACHELINE_BYTES)
1615c349dbc7Sjsg *batch++ = MI_NOOP;
1616c349dbc7Sjsg
1617c349dbc7Sjsg /*
1618c349dbc7Sjsg * MI_BATCH_BUFFER_END is not required in Indirect ctx BB because
1619c349dbc7Sjsg * execution depends on the length specified in terms of cache lines
1620c349dbc7Sjsg * in the register CTX_RCS_INDIRECT_CTX
1621c349dbc7Sjsg */
1622c349dbc7Sjsg
1623c349dbc7Sjsg return batch;
1624c349dbc7Sjsg }
1625c349dbc7Sjsg
1626c349dbc7Sjsg struct lri {
1627c349dbc7Sjsg i915_reg_t reg;
1628c349dbc7Sjsg u32 value;
1629c349dbc7Sjsg };
1630c349dbc7Sjsg
emit_lri(u32 * batch,const struct lri * lri,unsigned int count)1631c349dbc7Sjsg static u32 *emit_lri(u32 *batch, const struct lri *lri, unsigned int count)
1632c349dbc7Sjsg {
1633c349dbc7Sjsg GEM_BUG_ON(!count || count > 63);
1634c349dbc7Sjsg
1635c349dbc7Sjsg *batch++ = MI_LOAD_REGISTER_IMM(count);
1636c349dbc7Sjsg do {
1637c349dbc7Sjsg *batch++ = i915_mmio_reg_offset(lri->reg);
1638c349dbc7Sjsg *batch++ = lri->value;
1639c349dbc7Sjsg } while (lri++, --count);
1640c349dbc7Sjsg *batch++ = MI_NOOP;
1641c349dbc7Sjsg
1642c349dbc7Sjsg return batch;
1643c349dbc7Sjsg }
1644c349dbc7Sjsg
gen9_init_indirectctx_bb(struct intel_engine_cs * engine,u32 * batch)1645c349dbc7Sjsg static u32 *gen9_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
1646c349dbc7Sjsg {
1647c349dbc7Sjsg static const struct lri lri[] = {
1648c349dbc7Sjsg /* WaDisableGatherAtSetShaderCommonSlice:skl,bxt,kbl,glk */
1649c349dbc7Sjsg {
1650c349dbc7Sjsg COMMON_SLICE_CHICKEN2,
1651c349dbc7Sjsg __MASKED_FIELD(GEN9_DISABLE_GATHER_AT_SET_SHADER_COMMON_SLICE,
1652c349dbc7Sjsg 0),
1653c349dbc7Sjsg },
1654c349dbc7Sjsg
1655c349dbc7Sjsg /* BSpec: 11391 */
1656c349dbc7Sjsg {
1657c349dbc7Sjsg FF_SLICE_CHICKEN,
1658c349dbc7Sjsg __MASKED_FIELD(FF_SLICE_CHICKEN_CL_PROVOKING_VERTEX_FIX,
1659c349dbc7Sjsg FF_SLICE_CHICKEN_CL_PROVOKING_VERTEX_FIX),
1660c349dbc7Sjsg },
1661c349dbc7Sjsg
1662c349dbc7Sjsg /* BSpec: 11299 */
1663c349dbc7Sjsg {
1664c349dbc7Sjsg _3D_CHICKEN3,
1665c349dbc7Sjsg __MASKED_FIELD(_3D_CHICKEN_SF_PROVOKING_VERTEX_FIX,
1666c349dbc7Sjsg _3D_CHICKEN_SF_PROVOKING_VERTEX_FIX),
1667c349dbc7Sjsg }
1668c349dbc7Sjsg };
1669c349dbc7Sjsg
1670c349dbc7Sjsg *batch++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
1671c349dbc7Sjsg
1672c349dbc7Sjsg /* WaFlushCoherentL3CacheLinesAtContextSwitch:skl,bxt,glk */
1673c349dbc7Sjsg batch = gen8_emit_flush_coherentl3_wa(engine, batch);
1674c349dbc7Sjsg
1675c349dbc7Sjsg /* WaClearSlmSpaceAtContextSwitch:skl,bxt,kbl,glk,cfl */
1676c349dbc7Sjsg batch = gen8_emit_pipe_control(batch,
1677c349dbc7Sjsg PIPE_CONTROL_FLUSH_L3 |
1678c349dbc7Sjsg PIPE_CONTROL_STORE_DATA_INDEX |
1679c349dbc7Sjsg PIPE_CONTROL_CS_STALL |
1680c349dbc7Sjsg PIPE_CONTROL_QW_WRITE,
1681c349dbc7Sjsg LRC_PPHWSP_SCRATCH_ADDR);
1682c349dbc7Sjsg
1683c349dbc7Sjsg batch = emit_lri(batch, lri, ARRAY_SIZE(lri));
1684c349dbc7Sjsg
1685c349dbc7Sjsg /* WaMediaPoolStateCmdInWABB:bxt,glk */
1686c349dbc7Sjsg if (HAS_POOLED_EU(engine->i915)) {
1687c349dbc7Sjsg /*
1688c349dbc7Sjsg * EU pool configuration is setup along with golden context
1689c349dbc7Sjsg * during context initialization. This value depends on
1690c349dbc7Sjsg * device type (2x6 or 3x6) and needs to be updated based
1691c349dbc7Sjsg * on which subslice is disabled especially for 2x6
1692c349dbc7Sjsg * devices, however it is safe to load default
1693c349dbc7Sjsg * configuration of 3x6 device instead of masking off
1694c349dbc7Sjsg * corresponding bits because HW ignores bits of a disabled
1695c349dbc7Sjsg * subslice and drops down to appropriate config. Please
1696c349dbc7Sjsg * see render_state_setup() in i915_gem_render_state.c for
1697c349dbc7Sjsg * possible configurations, to avoid duplication they are
1698c349dbc7Sjsg * not shown here again.
1699c349dbc7Sjsg */
1700c349dbc7Sjsg *batch++ = GEN9_MEDIA_POOL_STATE;
1701c349dbc7Sjsg *batch++ = GEN9_MEDIA_POOL_ENABLE;
1702c349dbc7Sjsg *batch++ = 0x00777000;
1703c349dbc7Sjsg *batch++ = 0;
1704c349dbc7Sjsg *batch++ = 0;
1705c349dbc7Sjsg *batch++ = 0;
1706c349dbc7Sjsg }
1707c349dbc7Sjsg
1708c349dbc7Sjsg *batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
1709c349dbc7Sjsg
1710c349dbc7Sjsg /* Pad to end of cacheline */
1711c349dbc7Sjsg while ((unsigned long)batch % CACHELINE_BYTES)
1712c349dbc7Sjsg *batch++ = MI_NOOP;
1713c349dbc7Sjsg
1714c349dbc7Sjsg return batch;
1715c349dbc7Sjsg }
1716c349dbc7Sjsg
17175ca02815Sjsg #define CTX_WA_BB_SIZE (PAGE_SIZE)
1718c349dbc7Sjsg
lrc_create_wa_ctx(struct intel_engine_cs * engine)17195ca02815Sjsg static int lrc_create_wa_ctx(struct intel_engine_cs *engine)
1720c349dbc7Sjsg {
1721c349dbc7Sjsg struct drm_i915_gem_object *obj;
1722c349dbc7Sjsg struct i915_vma *vma;
1723c349dbc7Sjsg int err;
1724c349dbc7Sjsg
17255ca02815Sjsg obj = i915_gem_object_create_shmem(engine->i915, CTX_WA_BB_SIZE);
1726c349dbc7Sjsg if (IS_ERR(obj))
1727c349dbc7Sjsg return PTR_ERR(obj);
1728c349dbc7Sjsg
1729c349dbc7Sjsg vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL);
1730c349dbc7Sjsg if (IS_ERR(vma)) {
1731c349dbc7Sjsg err = PTR_ERR(vma);
1732c349dbc7Sjsg goto err;
1733c349dbc7Sjsg }
1734c349dbc7Sjsg
1735c349dbc7Sjsg engine->wa_ctx.vma = vma;
1736c349dbc7Sjsg return 0;
1737c349dbc7Sjsg
1738c349dbc7Sjsg err:
1739c349dbc7Sjsg i915_gem_object_put(obj);
1740c349dbc7Sjsg return err;
1741c349dbc7Sjsg }
1742c349dbc7Sjsg
lrc_fini_wa_ctx(struct intel_engine_cs * engine)17435ca02815Sjsg void lrc_fini_wa_ctx(struct intel_engine_cs *engine)
1744c349dbc7Sjsg {
1745c349dbc7Sjsg i915_vma_unpin_and_release(&engine->wa_ctx.vma, 0);
1746c349dbc7Sjsg }
1747c349dbc7Sjsg
1748c349dbc7Sjsg typedef u32 *(*wa_bb_func_t)(struct intel_engine_cs *engine, u32 *batch);
1749c349dbc7Sjsg
lrc_init_wa_ctx(struct intel_engine_cs * engine)17505ca02815Sjsg void lrc_init_wa_ctx(struct intel_engine_cs *engine)
1751c349dbc7Sjsg {
1752c349dbc7Sjsg struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx;
17535ca02815Sjsg struct i915_wa_ctx_bb *wa_bb[] = {
17545ca02815Sjsg &wa_ctx->indirect_ctx, &wa_ctx->per_ctx
17555ca02815Sjsg };
17565ca02815Sjsg wa_bb_func_t wa_bb_fn[ARRAY_SIZE(wa_bb)];
17575ca02815Sjsg struct i915_gem_ww_ctx ww;
1758c349dbc7Sjsg void *batch, *batch_ptr;
1759c349dbc7Sjsg unsigned int i;
17605ca02815Sjsg int err;
1761c349dbc7Sjsg
1762f005ef32Sjsg if (GRAPHICS_VER(engine->i915) >= 11 ||
1763f005ef32Sjsg !(engine->flags & I915_ENGINE_HAS_RCS_REG_STATE))
17645ca02815Sjsg return;
1765c349dbc7Sjsg
1766f005ef32Sjsg if (GRAPHICS_VER(engine->i915) == 9) {
1767c349dbc7Sjsg wa_bb_fn[0] = gen9_init_indirectctx_bb;
1768c349dbc7Sjsg wa_bb_fn[1] = NULL;
1769f005ef32Sjsg } else if (GRAPHICS_VER(engine->i915) == 8) {
1770c349dbc7Sjsg wa_bb_fn[0] = gen8_init_indirectctx_bb;
1771c349dbc7Sjsg wa_bb_fn[1] = NULL;
1772c349dbc7Sjsg }
1773c349dbc7Sjsg
17745ca02815Sjsg err = lrc_create_wa_ctx(engine);
17755ca02815Sjsg if (err) {
17765ca02815Sjsg /*
17775ca02815Sjsg * We continue even if we fail to initialize WA batch
17785ca02815Sjsg * because we only expect rare glitches but nothing
17795ca02815Sjsg * critical to prevent us from using GPU
17805ca02815Sjsg */
17815ca02815Sjsg drm_err(&engine->i915->drm,
17825ca02815Sjsg "Ignoring context switch w/a allocation error:%d\n",
17835ca02815Sjsg err);
17845ca02815Sjsg return;
1785c349dbc7Sjsg }
1786c349dbc7Sjsg
17875ca02815Sjsg if (!engine->wa_ctx.vma)
17885ca02815Sjsg return;
17895ca02815Sjsg
17905ca02815Sjsg i915_gem_ww_ctx_init(&ww, true);
17915ca02815Sjsg retry:
17925ca02815Sjsg err = i915_gem_object_lock(wa_ctx->vma->obj, &ww);
17935ca02815Sjsg if (!err)
17945ca02815Sjsg err = i915_ggtt_pin(wa_ctx->vma, &ww, 0, PIN_HIGH);
17955ca02815Sjsg if (err)
17965ca02815Sjsg goto err;
17975ca02815Sjsg
1798ad8b1aafSjsg batch = i915_gem_object_pin_map(wa_ctx->vma->obj, I915_MAP_WB);
17995ca02815Sjsg if (IS_ERR(batch)) {
18005ca02815Sjsg err = PTR_ERR(batch);
18015ca02815Sjsg goto err_unpin;
18025ca02815Sjsg }
1803c349dbc7Sjsg
1804c349dbc7Sjsg /*
1805c349dbc7Sjsg * Emit the two workaround batch buffers, recording the offset from the
1806c349dbc7Sjsg * start of the workaround batch buffer object for each and their
1807c349dbc7Sjsg * respective sizes.
1808c349dbc7Sjsg */
1809ad8b1aafSjsg batch_ptr = batch;
1810c349dbc7Sjsg for (i = 0; i < ARRAY_SIZE(wa_bb_fn); i++) {
1811c349dbc7Sjsg wa_bb[i]->offset = batch_ptr - batch;
1812c349dbc7Sjsg if (GEM_DEBUG_WARN_ON(!IS_ALIGNED(wa_bb[i]->offset,
1813c349dbc7Sjsg CACHELINE_BYTES))) {
18145ca02815Sjsg err = -EINVAL;
1815c349dbc7Sjsg break;
1816c349dbc7Sjsg }
1817c349dbc7Sjsg if (wa_bb_fn[i])
1818c349dbc7Sjsg batch_ptr = wa_bb_fn[i](engine, batch_ptr);
1819c349dbc7Sjsg wa_bb[i]->size = batch_ptr - (batch + wa_bb[i]->offset);
1820c349dbc7Sjsg }
18215ca02815Sjsg GEM_BUG_ON(batch_ptr - batch > CTX_WA_BB_SIZE);
1822c349dbc7Sjsg
1823ad8b1aafSjsg __i915_gem_object_flush_map(wa_ctx->vma->obj, 0, batch_ptr - batch);
1824ad8b1aafSjsg __i915_gem_object_release_map(wa_ctx->vma->obj);
1825c349dbc7Sjsg
18265ca02815Sjsg /* Verify that we can handle failure to setup the wa_ctx */
18275ca02815Sjsg if (!err)
18285ca02815Sjsg err = i915_inject_probe_error(engine->i915, -ENODEV);
18295ca02815Sjsg
18305ca02815Sjsg err_unpin:
18315ca02815Sjsg if (err)
18325ca02815Sjsg i915_vma_unpin(wa_ctx->vma);
18335ca02815Sjsg err:
18345ca02815Sjsg if (err == -EDEADLK) {
18355ca02815Sjsg err = i915_gem_ww_ctx_backoff(&ww);
18365ca02815Sjsg if (!err)
18375ca02815Sjsg goto retry;
18385ca02815Sjsg }
18395ca02815Sjsg i915_gem_ww_ctx_fini(&ww);
18405ca02815Sjsg
18415ca02815Sjsg if (err) {
18425ca02815Sjsg i915_vma_put(engine->wa_ctx.vma);
18435ca02815Sjsg
18445ca02815Sjsg /* Clear all flags to prevent further use */
18455ca02815Sjsg memset(wa_ctx, 0, sizeof(*wa_ctx));
18465ca02815Sjsg }
1847c349dbc7Sjsg }
1848c349dbc7Sjsg
st_runtime_underflow(struct intel_context_stats * stats,s32 dt)18491bb76ff1Sjsg static void st_runtime_underflow(struct intel_context_stats *stats, s32 dt)
1850ad8b1aafSjsg {
18515ca02815Sjsg #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
18521bb76ff1Sjsg stats->runtime.num_underflow++;
18531bb76ff1Sjsg stats->runtime.max_underflow =
18541bb76ff1Sjsg max_t(u32, stats->runtime.max_underflow, -dt);
1855c349dbc7Sjsg #endif
1856c349dbc7Sjsg }
1857c349dbc7Sjsg
lrc_get_runtime(const struct intel_context * ce)18581bb76ff1Sjsg static u32 lrc_get_runtime(const struct intel_context *ce)
18591bb76ff1Sjsg {
18601bb76ff1Sjsg /*
18611bb76ff1Sjsg * We can use either ppHWSP[16] which is recorded before the context
18621bb76ff1Sjsg * switch (and so excludes the cost of context switches) or use the
18631bb76ff1Sjsg * value from the context image itself, which is saved/restored earlier
18641bb76ff1Sjsg * and so includes the cost of the save.
18651bb76ff1Sjsg */
18661bb76ff1Sjsg return READ_ONCE(ce->lrc_reg_state[CTX_TIMESTAMP]);
18671bb76ff1Sjsg }
18681bb76ff1Sjsg
lrc_update_runtime(struct intel_context * ce)18695ca02815Sjsg void lrc_update_runtime(struct intel_context *ce)
1870c349dbc7Sjsg {
18711bb76ff1Sjsg struct intel_context_stats *stats = &ce->stats;
18725ca02815Sjsg u32 old;
18735ca02815Sjsg s32 dt;
1874c349dbc7Sjsg
18751bb76ff1Sjsg old = stats->runtime.last;
18761bb76ff1Sjsg stats->runtime.last = lrc_get_runtime(ce);
18771bb76ff1Sjsg dt = stats->runtime.last - old;
18781bb76ff1Sjsg if (!dt)
1879c349dbc7Sjsg return;
1880c349dbc7Sjsg
18815ca02815Sjsg if (unlikely(dt < 0)) {
18825ca02815Sjsg CE_TRACE(ce, "runtime underflow: last=%u, new=%u, delta=%d\n",
18831bb76ff1Sjsg old, stats->runtime.last, dt);
18841bb76ff1Sjsg st_runtime_underflow(stats, dt);
18855ca02815Sjsg return;
1886c349dbc7Sjsg }
1887c349dbc7Sjsg
18881bb76ff1Sjsg ewma_runtime_add(&stats->runtime.avg, dt);
18891bb76ff1Sjsg stats->runtime.total += dt;
1890c349dbc7Sjsg }
1891c349dbc7Sjsg
1892c349dbc7Sjsg #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
1893c349dbc7Sjsg #include "selftest_lrc.c"
1894c349dbc7Sjsg #endif
1895