15ca02815Sjsg // SPDX-License-Identifier: MIT
2c349dbc7Sjsg /*
3c349dbc7Sjsg * Copyright © 2014-2018 Intel Corporation
4c349dbc7Sjsg */
5c349dbc7Sjsg
6c349dbc7Sjsg #include "i915_drv.h"
7f005ef32Sjsg #include "i915_reg.h"
8c349dbc7Sjsg #include "intel_context.h"
9c349dbc7Sjsg #include "intel_engine_pm.h"
101bb76ff1Sjsg #include "intel_engine_regs.h"
115ca02815Sjsg #include "intel_gpu_commands.h"
12c349dbc7Sjsg #include "intel_gt.h"
130fe191bfSjsg #include "intel_gt_ccs_mode.h"
141bb76ff1Sjsg #include "intel_gt_mcr.h"
151bb76ff1Sjsg #include "intel_gt_regs.h"
16c349dbc7Sjsg #include "intel_ring.h"
17c349dbc7Sjsg #include "intel_workarounds.h"
18c349dbc7Sjsg
19c349dbc7Sjsg /**
20c349dbc7Sjsg * DOC: Hardware workarounds
21c349dbc7Sjsg *
22f005ef32Sjsg * Hardware workarounds are register programming documented to be executed in
23f005ef32Sjsg * the driver that fall outside of the normal programming sequences for a
24f005ef32Sjsg * platform. There are some basic categories of workarounds, depending on
25f005ef32Sjsg * how/when they are applied:
26c349dbc7Sjsg *
27f005ef32Sjsg * - Context workarounds: workarounds that touch registers that are
28f005ef32Sjsg * saved/restored to/from the HW context image. The list is emitted (via Load
29f005ef32Sjsg * Register Immediate commands) once when initializing the device and saved in
30f005ef32Sjsg * the default context. That default context is then used on every context
31f005ef32Sjsg * creation to have a "primed golden context", i.e. a context image that
32f005ef32Sjsg * already contains the changes needed to all the registers.
33c349dbc7Sjsg *
34f005ef32Sjsg * Context workarounds should be implemented in the \*_ctx_workarounds_init()
35f005ef32Sjsg * variants respective to the targeted platforms.
36c349dbc7Sjsg *
37f005ef32Sjsg * - Engine workarounds: the list of these WAs is applied whenever the specific
38f005ef32Sjsg * engine is reset. It's also possible that a set of engine classes share a
39f005ef32Sjsg * common power domain and they are reset together. This happens on some
40f005ef32Sjsg * platforms with render and compute engines. In this case (at least) one of
41f005ef32Sjsg * them need to keeep the workaround programming: the approach taken in the
42f005ef32Sjsg * driver is to tie those workarounds to the first compute/render engine that
43f005ef32Sjsg * is registered. When executing with GuC submission, engine resets are
44f005ef32Sjsg * outside of kernel driver control, hence the list of registers involved in
45f005ef32Sjsg * written once, on engine initialization, and then passed to GuC, that
46f005ef32Sjsg * saves/restores their values before/after the reset takes place. See
47f005ef32Sjsg * ``drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c`` for reference.
48f005ef32Sjsg *
49f005ef32Sjsg * Workarounds for registers specific to RCS and CCS should be implemented in
50f005ef32Sjsg * rcs_engine_wa_init() and ccs_engine_wa_init(), respectively; those for
51f005ef32Sjsg * registers belonging to BCS, VCS or VECS should be implemented in
52f005ef32Sjsg * xcs_engine_wa_init(). Workarounds for registers not belonging to a specific
53f005ef32Sjsg * engine's MMIO range but that are part of of the common RCS/CCS reset domain
54d29fdb3cSjsg * should be implemented in general_render_compute_wa_init(). The settings
55d29fdb3cSjsg * about the CCS load balancing should be added in ccs_engine_wa_mode().
56f005ef32Sjsg *
57f005ef32Sjsg * - GT workarounds: the list of these WAs is applied whenever these registers
58f005ef32Sjsg * revert to their default values: on GPU reset, suspend/resume [1]_, etc.
59f005ef32Sjsg *
60f005ef32Sjsg * GT workarounds should be implemented in the \*_gt_workarounds_init()
61f005ef32Sjsg * variants respective to the targeted platforms.
62f005ef32Sjsg *
63f005ef32Sjsg * - Register whitelist: some workarounds need to be implemented in userspace,
64f005ef32Sjsg * but need to touch privileged registers. The whitelist in the kernel
65f005ef32Sjsg * instructs the hardware to allow the access to happen. From the kernel side,
66f005ef32Sjsg * this is just a special case of a MMIO workaround (as we write the list of
67f005ef32Sjsg * these to/be-whitelisted registers to some special HW registers).
68f005ef32Sjsg *
69f005ef32Sjsg * Register whitelisting should be done in the \*_whitelist_build() variants
70f005ef32Sjsg * respective to the targeted platforms.
71f005ef32Sjsg *
72f005ef32Sjsg * - Workaround batchbuffers: buffers that get executed automatically by the
73f005ef32Sjsg * hardware on every HW context restore. These buffers are created and
74f005ef32Sjsg * programmed in the default context so the hardware always go through those
75f005ef32Sjsg * programming sequences when switching contexts. The support for workaround
76f005ef32Sjsg * batchbuffers is enabled these hardware mechanisms:
77f005ef32Sjsg *
78f005ef32Sjsg * #. INDIRECT_CTX: A batchbuffer and an offset are provided in the default
79f005ef32Sjsg * context, pointing the hardware to jump to that location when that offset
80f005ef32Sjsg * is reached in the context restore. Workaround batchbuffer in the driver
81f005ef32Sjsg * currently uses this mechanism for all platforms.
82f005ef32Sjsg *
83f005ef32Sjsg * #. BB_PER_CTX_PTR: A batchbuffer is provided in the default context,
84f005ef32Sjsg * pointing the hardware to a buffer to continue executing after the
85f005ef32Sjsg * engine registers are restored in a context restore sequence. This is
86f005ef32Sjsg * currently not used in the driver.
87f005ef32Sjsg *
88f005ef32Sjsg * - Other: There are WAs that, due to their nature, cannot be applied from a
89f005ef32Sjsg * central place. Those are peppered around the rest of the code, as needed.
90f005ef32Sjsg * Workarounds related to the display IP are the main example.
91f005ef32Sjsg *
92f005ef32Sjsg * .. [1] Technically, some registers are powercontext saved & restored, so they
93c349dbc7Sjsg * survive a suspend/resume. In practice, writing them again is not too
94f005ef32Sjsg * costly and simplifies things, so it's the approach taken in the driver.
95c349dbc7Sjsg */
96c349dbc7Sjsg
wa_init_start(struct i915_wa_list * wal,struct intel_gt * gt,const char * name,const char * engine_name)97f005ef32Sjsg static void wa_init_start(struct i915_wa_list *wal, struct intel_gt *gt,
98f005ef32Sjsg const char *name, const char *engine_name)
99c349dbc7Sjsg {
100f005ef32Sjsg wal->gt = gt;
101c349dbc7Sjsg wal->name = name;
102c349dbc7Sjsg wal->engine_name = engine_name;
103c349dbc7Sjsg }
104c349dbc7Sjsg
105c349dbc7Sjsg #define WA_LIST_CHUNK (1 << 4)
106c349dbc7Sjsg
wa_init_finish(struct i915_wa_list * wal)107c349dbc7Sjsg static void wa_init_finish(struct i915_wa_list *wal)
108c349dbc7Sjsg {
109c349dbc7Sjsg /* Trim unused entries. */
110c349dbc7Sjsg if (!IS_ALIGNED(wal->count, WA_LIST_CHUNK)) {
111c349dbc7Sjsg struct i915_wa *list = kmemdup(wal->list,
112c349dbc7Sjsg wal->count * sizeof(*list),
113c349dbc7Sjsg GFP_KERNEL);
114c349dbc7Sjsg
115c349dbc7Sjsg if (list) {
116c349dbc7Sjsg kfree(wal->list);
117c349dbc7Sjsg wal->list = list;
118c349dbc7Sjsg }
119c349dbc7Sjsg }
120c349dbc7Sjsg
121c349dbc7Sjsg if (!wal->count)
122c349dbc7Sjsg return;
123c349dbc7Sjsg
124f005ef32Sjsg drm_dbg(&wal->gt->i915->drm, "Initialized %u %s workarounds on %s\n",
125c349dbc7Sjsg wal->wa_count, wal->name, wal->engine_name);
126c349dbc7Sjsg }
127c349dbc7Sjsg
128f005ef32Sjsg static enum forcewake_domains
wal_get_fw_for_rmw(struct intel_uncore * uncore,const struct i915_wa_list * wal)129f005ef32Sjsg wal_get_fw_for_rmw(struct intel_uncore *uncore, const struct i915_wa_list *wal)
130f005ef32Sjsg {
131f005ef32Sjsg enum forcewake_domains fw = 0;
132f005ef32Sjsg struct i915_wa *wa;
133f005ef32Sjsg unsigned int i;
134f005ef32Sjsg
135f005ef32Sjsg for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
136f005ef32Sjsg fw |= intel_uncore_forcewake_for_reg(uncore,
137f005ef32Sjsg wa->reg,
138f005ef32Sjsg FW_REG_READ |
139f005ef32Sjsg FW_REG_WRITE);
140f005ef32Sjsg
141f005ef32Sjsg return fw;
142f005ef32Sjsg }
143f005ef32Sjsg
_wa_add(struct i915_wa_list * wal,const struct i915_wa * wa)144c349dbc7Sjsg static void _wa_add(struct i915_wa_list *wal, const struct i915_wa *wa)
145c349dbc7Sjsg {
146c349dbc7Sjsg unsigned int addr = i915_mmio_reg_offset(wa->reg);
147f005ef32Sjsg struct drm_i915_private *i915 = wal->gt->i915;
148c349dbc7Sjsg unsigned int start = 0, end = wal->count;
149c349dbc7Sjsg const unsigned int grow = WA_LIST_CHUNK;
150c349dbc7Sjsg struct i915_wa *wa_;
151c349dbc7Sjsg
152c349dbc7Sjsg GEM_BUG_ON(!is_power_of_2(grow));
153c349dbc7Sjsg
154c349dbc7Sjsg if (IS_ALIGNED(wal->count, grow)) { /* Either uninitialized or full. */
155c349dbc7Sjsg struct i915_wa *list;
156c349dbc7Sjsg
157f005ef32Sjsg list = kmalloc_array(ALIGN(wal->count + 1, grow), sizeof(*wa),
158c349dbc7Sjsg GFP_KERNEL);
159c349dbc7Sjsg if (!list) {
160f005ef32Sjsg drm_err(&i915->drm, "No space for workaround init!\n");
161c349dbc7Sjsg return;
162c349dbc7Sjsg }
163c349dbc7Sjsg
164ad8b1aafSjsg if (wal->list) {
165c349dbc7Sjsg memcpy(list, wal->list, sizeof(*wa) * wal->count);
166ad8b1aafSjsg kfree(wal->list);
167ad8b1aafSjsg }
168c349dbc7Sjsg
169c349dbc7Sjsg wal->list = list;
170c349dbc7Sjsg }
171c349dbc7Sjsg
172c349dbc7Sjsg while (start < end) {
173c349dbc7Sjsg unsigned int mid = start + (end - start) / 2;
174c349dbc7Sjsg
175c349dbc7Sjsg if (i915_mmio_reg_offset(wal->list[mid].reg) < addr) {
176c349dbc7Sjsg start = mid + 1;
177c349dbc7Sjsg } else if (i915_mmio_reg_offset(wal->list[mid].reg) > addr) {
178c349dbc7Sjsg end = mid;
179c349dbc7Sjsg } else {
180c349dbc7Sjsg wa_ = &wal->list[mid];
181c349dbc7Sjsg
182c349dbc7Sjsg if ((wa->clr | wa_->clr) && !(wa->clr & ~wa_->clr)) {
183f005ef32Sjsg drm_err(&i915->drm,
184f005ef32Sjsg "Discarding overwritten w/a for reg %04x (clear: %08x, set: %08x)\n",
185c349dbc7Sjsg i915_mmio_reg_offset(wa_->reg),
186c349dbc7Sjsg wa_->clr, wa_->set);
187c349dbc7Sjsg
188c349dbc7Sjsg wa_->set &= ~wa->clr;
189c349dbc7Sjsg }
190c349dbc7Sjsg
191c349dbc7Sjsg wal->wa_count++;
192c349dbc7Sjsg wa_->set |= wa->set;
193c349dbc7Sjsg wa_->clr |= wa->clr;
194c349dbc7Sjsg wa_->read |= wa->read;
195c349dbc7Sjsg return;
196c349dbc7Sjsg }
197c349dbc7Sjsg }
198c349dbc7Sjsg
199c349dbc7Sjsg wal->wa_count++;
200c349dbc7Sjsg wa_ = &wal->list[wal->count++];
201c349dbc7Sjsg *wa_ = *wa;
202c349dbc7Sjsg
203c349dbc7Sjsg while (wa_-- > wal->list) {
204c349dbc7Sjsg GEM_BUG_ON(i915_mmio_reg_offset(wa_[0].reg) ==
205c349dbc7Sjsg i915_mmio_reg_offset(wa_[1].reg));
206c349dbc7Sjsg if (i915_mmio_reg_offset(wa_[1].reg) >
207c349dbc7Sjsg i915_mmio_reg_offset(wa_[0].reg))
208c349dbc7Sjsg break;
209c349dbc7Sjsg
210c349dbc7Sjsg swap(wa_[1], wa_[0]);
211c349dbc7Sjsg }
212c349dbc7Sjsg }
213c349dbc7Sjsg
wa_add(struct i915_wa_list * wal,i915_reg_t reg,u32 clear,u32 set,u32 read_mask,bool masked_reg)214c349dbc7Sjsg static void wa_add(struct i915_wa_list *wal, i915_reg_t reg,
2155ca02815Sjsg u32 clear, u32 set, u32 read_mask, bool masked_reg)
216c349dbc7Sjsg {
217c349dbc7Sjsg struct i915_wa wa = {
218c349dbc7Sjsg .reg = reg,
219c349dbc7Sjsg .clr = clear,
220c349dbc7Sjsg .set = set,
221c349dbc7Sjsg .read = read_mask,
2225ca02815Sjsg .masked_reg = masked_reg,
223c349dbc7Sjsg };
224c349dbc7Sjsg
225c349dbc7Sjsg _wa_add(wal, &wa);
226c349dbc7Sjsg }
227c349dbc7Sjsg
wa_mcr_add(struct i915_wa_list * wal,i915_mcr_reg_t reg,u32 clear,u32 set,u32 read_mask,bool masked_reg)228f005ef32Sjsg static void wa_mcr_add(struct i915_wa_list *wal, i915_mcr_reg_t reg,
229f005ef32Sjsg u32 clear, u32 set, u32 read_mask, bool masked_reg)
230f005ef32Sjsg {
231f005ef32Sjsg struct i915_wa wa = {
232f005ef32Sjsg .mcr_reg = reg,
233f005ef32Sjsg .clr = clear,
234f005ef32Sjsg .set = set,
235f005ef32Sjsg .read = read_mask,
236f005ef32Sjsg .masked_reg = masked_reg,
237f005ef32Sjsg .is_mcr = 1,
238f005ef32Sjsg };
239f005ef32Sjsg
240f005ef32Sjsg _wa_add(wal, &wa);
241f005ef32Sjsg }
242f005ef32Sjsg
243c349dbc7Sjsg static void
wa_write_clr_set(struct i915_wa_list * wal,i915_reg_t reg,u32 clear,u32 set)2445ca02815Sjsg wa_write_clr_set(struct i915_wa_list *wal, i915_reg_t reg, u32 clear, u32 set)
245c349dbc7Sjsg {
246f005ef32Sjsg wa_add(wal, reg, clear, set, clear | set, false);
247f005ef32Sjsg }
248f005ef32Sjsg
249f005ef32Sjsg static void
wa_mcr_write_clr_set(struct i915_wa_list * wal,i915_mcr_reg_t reg,u32 clear,u32 set)250f005ef32Sjsg wa_mcr_write_clr_set(struct i915_wa_list *wal, i915_mcr_reg_t reg, u32 clear, u32 set)
251f005ef32Sjsg {
252f005ef32Sjsg wa_mcr_add(wal, reg, clear, set, clear | set, false);
253c349dbc7Sjsg }
254c349dbc7Sjsg
255c349dbc7Sjsg static void
wa_write(struct i915_wa_list * wal,i915_reg_t reg,u32 set)256c349dbc7Sjsg wa_write(struct i915_wa_list *wal, i915_reg_t reg, u32 set)
257c349dbc7Sjsg {
2585ca02815Sjsg wa_write_clr_set(wal, reg, ~0, set);
259c349dbc7Sjsg }
260c349dbc7Sjsg
261c349dbc7Sjsg static void
wa_mcr_write(struct i915_wa_list * wal,i915_mcr_reg_t reg,u32 set)262f005ef32Sjsg wa_mcr_write(struct i915_wa_list *wal, i915_mcr_reg_t reg, u32 set)
263f005ef32Sjsg {
264f005ef32Sjsg wa_mcr_write_clr_set(wal, reg, ~0, set);
265f005ef32Sjsg }
266f005ef32Sjsg
267f005ef32Sjsg static void
wa_write_or(struct i915_wa_list * wal,i915_reg_t reg,u32 set)268c349dbc7Sjsg wa_write_or(struct i915_wa_list *wal, i915_reg_t reg, u32 set)
269c349dbc7Sjsg {
2705ca02815Sjsg wa_write_clr_set(wal, reg, set, set);
271c349dbc7Sjsg }
272c349dbc7Sjsg
273c349dbc7Sjsg static void
wa_mcr_write_or(struct i915_wa_list * wal,i915_mcr_reg_t reg,u32 set)274f005ef32Sjsg wa_mcr_write_or(struct i915_wa_list *wal, i915_mcr_reg_t reg, u32 set)
275f005ef32Sjsg {
276f005ef32Sjsg wa_mcr_write_clr_set(wal, reg, set, set);
277f005ef32Sjsg }
278f005ef32Sjsg
279f005ef32Sjsg static void
wa_write_clr(struct i915_wa_list * wal,i915_reg_t reg,u32 clr)2802b49ea17Sjsg wa_write_clr(struct i915_wa_list *wal, i915_reg_t reg, u32 clr)
2812b49ea17Sjsg {
2825ca02815Sjsg wa_write_clr_set(wal, reg, clr, 0);
2832b49ea17Sjsg }
2842b49ea17Sjsg
285f005ef32Sjsg static void
wa_mcr_write_clr(struct i915_wa_list * wal,i915_mcr_reg_t reg,u32 clr)286f005ef32Sjsg wa_mcr_write_clr(struct i915_wa_list *wal, i915_mcr_reg_t reg, u32 clr)
287f005ef32Sjsg {
288f005ef32Sjsg wa_mcr_write_clr_set(wal, reg, clr, 0);
289f005ef32Sjsg }
290f005ef32Sjsg
2915ca02815Sjsg /*
2925ca02815Sjsg * WA operations on "masked register". A masked register has the upper 16 bits
2935ca02815Sjsg * documented as "masked" in b-spec. Its purpose is to allow writing to just a
2945ca02815Sjsg * portion of the register without a rmw: you simply write in the upper 16 bits
2955ca02815Sjsg * the mask of bits you are going to modify.
2965ca02815Sjsg *
2975ca02815Sjsg * The wa_masked_* family of functions already does the necessary operations to
2985ca02815Sjsg * calculate the mask based on the parameters passed, so user only has to
2995ca02815Sjsg * provide the lower 16 bits of that register.
3005ca02815Sjsg */
3015ca02815Sjsg
3022b49ea17Sjsg static void
wa_masked_en(struct i915_wa_list * wal,i915_reg_t reg,u32 val)303c349dbc7Sjsg wa_masked_en(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
304c349dbc7Sjsg {
3055ca02815Sjsg wa_add(wal, reg, 0, _MASKED_BIT_ENABLE(val), val, true);
306c349dbc7Sjsg }
307c349dbc7Sjsg
308c349dbc7Sjsg static void
wa_mcr_masked_en(struct i915_wa_list * wal,i915_mcr_reg_t reg,u32 val)309f005ef32Sjsg wa_mcr_masked_en(struct i915_wa_list *wal, i915_mcr_reg_t reg, u32 val)
310f005ef32Sjsg {
311f005ef32Sjsg wa_mcr_add(wal, reg, 0, _MASKED_BIT_ENABLE(val), val, true);
312f005ef32Sjsg }
313f005ef32Sjsg
314f005ef32Sjsg static void
wa_masked_dis(struct i915_wa_list * wal,i915_reg_t reg,u32 val)315c349dbc7Sjsg wa_masked_dis(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
316c349dbc7Sjsg {
3175ca02815Sjsg wa_add(wal, reg, 0, _MASKED_BIT_DISABLE(val), val, true);
318c349dbc7Sjsg }
319c349dbc7Sjsg
3205ca02815Sjsg static void
wa_mcr_masked_dis(struct i915_wa_list * wal,i915_mcr_reg_t reg,u32 val)321f005ef32Sjsg wa_mcr_masked_dis(struct i915_wa_list *wal, i915_mcr_reg_t reg, u32 val)
322f005ef32Sjsg {
323f005ef32Sjsg wa_mcr_add(wal, reg, 0, _MASKED_BIT_DISABLE(val), val, true);
324f005ef32Sjsg }
325f005ef32Sjsg
326f005ef32Sjsg static void
wa_masked_field_set(struct i915_wa_list * wal,i915_reg_t reg,u32 mask,u32 val)3275ca02815Sjsg wa_masked_field_set(struct i915_wa_list *wal, i915_reg_t reg,
3285ca02815Sjsg u32 mask, u32 val)
3295ca02815Sjsg {
3305ca02815Sjsg wa_add(wal, reg, 0, _MASKED_FIELD(mask, val), mask, true);
3315ca02815Sjsg }
332c349dbc7Sjsg
333f005ef32Sjsg static void
wa_mcr_masked_field_set(struct i915_wa_list * wal,i915_mcr_reg_t reg,u32 mask,u32 val)334f005ef32Sjsg wa_mcr_masked_field_set(struct i915_wa_list *wal, i915_mcr_reg_t reg,
335f005ef32Sjsg u32 mask, u32 val)
336f005ef32Sjsg {
337f005ef32Sjsg wa_mcr_add(wal, reg, 0, _MASKED_FIELD(mask, val), mask, true);
338f005ef32Sjsg }
339f005ef32Sjsg
gen6_ctx_workarounds_init(struct intel_engine_cs * engine,struct i915_wa_list * wal)340ad8b1aafSjsg static void gen6_ctx_workarounds_init(struct intel_engine_cs *engine,
341ad8b1aafSjsg struct i915_wa_list *wal)
342ad8b1aafSjsg {
3435ca02815Sjsg wa_masked_en(wal, INSTPM, INSTPM_FORCE_ORDERING);
344ad8b1aafSjsg }
345ad8b1aafSjsg
gen7_ctx_workarounds_init(struct intel_engine_cs * engine,struct i915_wa_list * wal)346ad8b1aafSjsg static void gen7_ctx_workarounds_init(struct intel_engine_cs *engine,
347ad8b1aafSjsg struct i915_wa_list *wal)
348ad8b1aafSjsg {
3495ca02815Sjsg wa_masked_en(wal, INSTPM, INSTPM_FORCE_ORDERING);
350ad8b1aafSjsg }
351ad8b1aafSjsg
gen8_ctx_workarounds_init(struct intel_engine_cs * engine,struct i915_wa_list * wal)352c349dbc7Sjsg static void gen8_ctx_workarounds_init(struct intel_engine_cs *engine,
353c349dbc7Sjsg struct i915_wa_list *wal)
354c349dbc7Sjsg {
3555ca02815Sjsg wa_masked_en(wal, INSTPM, INSTPM_FORCE_ORDERING);
356c349dbc7Sjsg
357c349dbc7Sjsg /* WaDisableAsyncFlipPerfMode:bdw,chv */
3581bb76ff1Sjsg wa_masked_en(wal, RING_MI_MODE(RENDER_RING_BASE), ASYNC_FLIP_PERF_DISABLE);
359c349dbc7Sjsg
360c349dbc7Sjsg /* WaDisablePartialInstShootdown:bdw,chv */
361f005ef32Sjsg wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN,
362c349dbc7Sjsg PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
363c349dbc7Sjsg
364c349dbc7Sjsg /* Use Force Non-Coherent whenever executing a 3D context. This is a
3655ca02815Sjsg * workaround for a possible hang in the unlikely event a TLB
366c349dbc7Sjsg * invalidation occurs during a PSD flush.
367c349dbc7Sjsg */
368c349dbc7Sjsg /* WaForceEnableNonCoherent:bdw,chv */
369c349dbc7Sjsg /* WaHdcDisableFetchWhenMasked:bdw,chv */
3705ca02815Sjsg wa_masked_en(wal, HDC_CHICKEN0,
371c349dbc7Sjsg HDC_DONOT_FETCH_MEM_WHEN_MASKED |
372c349dbc7Sjsg HDC_FORCE_NON_COHERENT);
373c349dbc7Sjsg
374c349dbc7Sjsg /* From the Haswell PRM, Command Reference: Registers, CACHE_MODE_0:
375c349dbc7Sjsg * "The Hierarchical Z RAW Stall Optimization allows non-overlapping
376c349dbc7Sjsg * polygons in the same 8x4 pixel/sample area to be processed without
377c349dbc7Sjsg * stalling waiting for the earlier ones to write to Hierarchical Z
378c349dbc7Sjsg * buffer."
379c349dbc7Sjsg *
380c349dbc7Sjsg * This optimization is off by default for BDW and CHV; turn it on.
381c349dbc7Sjsg */
3825ca02815Sjsg wa_masked_dis(wal, CACHE_MODE_0_GEN7, HIZ_RAW_STALL_OPT_DISABLE);
383c349dbc7Sjsg
384c349dbc7Sjsg /* Wa4x4STCOptimizationDisable:bdw,chv */
3855ca02815Sjsg wa_masked_en(wal, CACHE_MODE_1, GEN8_4x4_STC_OPTIMIZATION_DISABLE);
386c349dbc7Sjsg
387c349dbc7Sjsg /*
388c349dbc7Sjsg * BSpec recommends 8x4 when MSAA is used,
389c349dbc7Sjsg * however in practice 16x4 seems fastest.
390c349dbc7Sjsg *
391c349dbc7Sjsg * Note that PS/WM thread counts depend on the WIZ hashing
392c349dbc7Sjsg * disable bit, which we don't touch here, but it's good
393c349dbc7Sjsg * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
394c349dbc7Sjsg */
3955ca02815Sjsg wa_masked_field_set(wal, GEN7_GT_MODE,
396c349dbc7Sjsg GEN6_WIZ_HASHING_MASK,
397c349dbc7Sjsg GEN6_WIZ_HASHING_16x4);
398c349dbc7Sjsg }
399c349dbc7Sjsg
bdw_ctx_workarounds_init(struct intel_engine_cs * engine,struct i915_wa_list * wal)400c349dbc7Sjsg static void bdw_ctx_workarounds_init(struct intel_engine_cs *engine,
401c349dbc7Sjsg struct i915_wa_list *wal)
402c349dbc7Sjsg {
403c349dbc7Sjsg struct drm_i915_private *i915 = engine->i915;
404c349dbc7Sjsg
405c349dbc7Sjsg gen8_ctx_workarounds_init(engine, wal);
406c349dbc7Sjsg
407c349dbc7Sjsg /* WaDisableThreadStallDopClockGating:bdw (pre-production) */
408f005ef32Sjsg wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
409c349dbc7Sjsg
410c349dbc7Sjsg /* WaDisableDopClockGating:bdw
411c349dbc7Sjsg *
412c349dbc7Sjsg * Also see the related UCGTCL1 write in bdw_init_clock_gating()
413c349dbc7Sjsg * to disable EUTC clock gating.
414c349dbc7Sjsg */
415f005ef32Sjsg wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN2,
416c349dbc7Sjsg DOP_CLOCK_GATING_DISABLE);
417c349dbc7Sjsg
418f005ef32Sjsg wa_mcr_masked_en(wal, GEN8_HALF_SLICE_CHICKEN3,
419c349dbc7Sjsg GEN8_SAMPLER_POWER_BYPASS_DIS);
420c349dbc7Sjsg
4215ca02815Sjsg wa_masked_en(wal, HDC_CHICKEN0,
422c349dbc7Sjsg /* WaForceContextSaveRestoreNonCoherent:bdw */
423c349dbc7Sjsg HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT |
424c349dbc7Sjsg /* WaDisableFenceDestinationToSLM:bdw (pre-prod) */
425f005ef32Sjsg (IS_BROADWELL_GT3(i915) ? HDC_FENCE_DEST_SLM_DISABLE : 0));
426c349dbc7Sjsg }
427c349dbc7Sjsg
chv_ctx_workarounds_init(struct intel_engine_cs * engine,struct i915_wa_list * wal)428c349dbc7Sjsg static void chv_ctx_workarounds_init(struct intel_engine_cs *engine,
429c349dbc7Sjsg struct i915_wa_list *wal)
430c349dbc7Sjsg {
431c349dbc7Sjsg gen8_ctx_workarounds_init(engine, wal);
432c349dbc7Sjsg
433c349dbc7Sjsg /* WaDisableThreadStallDopClockGating:chv */
434f005ef32Sjsg wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
435c349dbc7Sjsg
436c349dbc7Sjsg /* Improve HiZ throughput on CHV. */
4375ca02815Sjsg wa_masked_en(wal, HIZ_CHICKEN, CHV_HZ_8X8_MODE_IN_1X);
438c349dbc7Sjsg }
439c349dbc7Sjsg
gen9_ctx_workarounds_init(struct intel_engine_cs * engine,struct i915_wa_list * wal)440c349dbc7Sjsg static void gen9_ctx_workarounds_init(struct intel_engine_cs *engine,
441c349dbc7Sjsg struct i915_wa_list *wal)
442c349dbc7Sjsg {
443c349dbc7Sjsg struct drm_i915_private *i915 = engine->i915;
444c349dbc7Sjsg
445c349dbc7Sjsg if (HAS_LLC(i915)) {
446c349dbc7Sjsg /* WaCompressedResourceSamplerPbeMediaNewHashMode:skl,kbl
447c349dbc7Sjsg *
448c349dbc7Sjsg * Must match Display Engine. See
449c349dbc7Sjsg * WaCompressedResourceDisplayNewHashMode.
450c349dbc7Sjsg */
4515ca02815Sjsg wa_masked_en(wal, COMMON_SLICE_CHICKEN2,
452c349dbc7Sjsg GEN9_PBE_COMPRESSED_HASH_SELECTION);
453f005ef32Sjsg wa_mcr_masked_en(wal, GEN9_HALF_SLICE_CHICKEN7,
454c349dbc7Sjsg GEN9_SAMPLER_HASH_COMPRESSED_READ_ADDR);
455c349dbc7Sjsg }
456c349dbc7Sjsg
457c349dbc7Sjsg /* WaClearFlowControlGpgpuContextSave:skl,bxt,kbl,glk,cfl */
458c349dbc7Sjsg /* WaDisablePartialInstShootdown:skl,bxt,kbl,glk,cfl */
459f005ef32Sjsg wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN,
460c349dbc7Sjsg FLOW_CONTROL_ENABLE |
461c349dbc7Sjsg PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
462c349dbc7Sjsg
463c349dbc7Sjsg /* WaEnableYV12BugFixInHalfSliceChicken7:skl,bxt,kbl,glk,cfl */
464c349dbc7Sjsg /* WaEnableSamplerGPGPUPreemptionSupport:skl,bxt,kbl,cfl */
465f005ef32Sjsg wa_mcr_masked_en(wal, GEN9_HALF_SLICE_CHICKEN7,
466c349dbc7Sjsg GEN9_ENABLE_YV12_BUGFIX |
467c349dbc7Sjsg GEN9_ENABLE_GPGPU_PREEMPTION);
468c349dbc7Sjsg
469c349dbc7Sjsg /* Wa4x4STCOptimizationDisable:skl,bxt,kbl,glk,cfl */
470c349dbc7Sjsg /* WaDisablePartialResolveInVc:skl,bxt,kbl,cfl */
4715ca02815Sjsg wa_masked_en(wal, CACHE_MODE_1,
472c349dbc7Sjsg GEN8_4x4_STC_OPTIMIZATION_DISABLE |
473c349dbc7Sjsg GEN9_PARTIAL_RESOLVE_IN_VC_DISABLE);
474c349dbc7Sjsg
475c349dbc7Sjsg /* WaCcsTlbPrefetchDisable:skl,bxt,kbl,glk,cfl */
476f005ef32Sjsg wa_mcr_masked_dis(wal, GEN9_HALF_SLICE_CHICKEN5,
477c349dbc7Sjsg GEN9_CCS_TLB_PREFETCH_ENABLE);
478c349dbc7Sjsg
479c349dbc7Sjsg /* WaForceContextSaveRestoreNonCoherent:skl,bxt,kbl,cfl */
4805ca02815Sjsg wa_masked_en(wal, HDC_CHICKEN0,
481c349dbc7Sjsg HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT |
482c349dbc7Sjsg HDC_FORCE_CSR_NON_COHERENT_OVR_DISABLE);
483c349dbc7Sjsg
484c349dbc7Sjsg /* WaForceEnableNonCoherent and WaDisableHDCInvalidation are
485c349dbc7Sjsg * both tied to WaForceContextSaveRestoreNonCoherent
486c349dbc7Sjsg * in some hsds for skl. We keep the tie for all gen9. The
487c349dbc7Sjsg * documentation is a bit hazy and so we want to get common behaviour,
488c349dbc7Sjsg * even though there is no clear evidence we would need both on kbl/bxt.
489c349dbc7Sjsg * This area has been source of system hangs so we play it safe
490c349dbc7Sjsg * and mimic the skl regardless of what bspec says.
491c349dbc7Sjsg *
492c349dbc7Sjsg * Use Force Non-Coherent whenever executing a 3D context. This
493c349dbc7Sjsg * is a workaround for a possible hang in the unlikely event
494c349dbc7Sjsg * a TLB invalidation occurs during a PSD flush.
495c349dbc7Sjsg */
496c349dbc7Sjsg
497c349dbc7Sjsg /* WaForceEnableNonCoherent:skl,bxt,kbl,cfl */
4985ca02815Sjsg wa_masked_en(wal, HDC_CHICKEN0,
499c349dbc7Sjsg HDC_FORCE_NON_COHERENT);
500c349dbc7Sjsg
501c349dbc7Sjsg /* WaDisableSamplerPowerBypassForSOPingPong:skl,bxt,kbl,cfl */
502ad8b1aafSjsg if (IS_SKYLAKE(i915) ||
503ad8b1aafSjsg IS_KABYLAKE(i915) ||
504ad8b1aafSjsg IS_COFFEELAKE(i915) ||
505ad8b1aafSjsg IS_COMETLAKE(i915))
506f005ef32Sjsg wa_mcr_masked_en(wal, GEN8_HALF_SLICE_CHICKEN3,
507c349dbc7Sjsg GEN8_SAMPLER_POWER_BYPASS_DIS);
508c349dbc7Sjsg
509c349dbc7Sjsg /* WaDisableSTUnitPowerOptimization:skl,bxt,kbl,glk,cfl */
510f005ef32Sjsg wa_mcr_masked_en(wal, HALF_SLICE_CHICKEN2, GEN8_ST_PO_DISABLE);
511c349dbc7Sjsg
512c349dbc7Sjsg /*
513c349dbc7Sjsg * Supporting preemption with fine-granularity requires changes in the
514c349dbc7Sjsg * batch buffer programming. Since we can't break old userspace, we
515c349dbc7Sjsg * need to set our default preemption level to safe value. Userspace is
516c349dbc7Sjsg * still able to use more fine-grained preemption levels, since in
517c349dbc7Sjsg * WaEnablePreemptionGranularityControlByUMD we're whitelisting the
518c349dbc7Sjsg * per-ctx register. As such, WaDisable{3D,GPGPU}MidCmdPreemption are
519c349dbc7Sjsg * not real HW workarounds, but merely a way to start using preemption
520c349dbc7Sjsg * while maintaining old contract with userspace.
521c349dbc7Sjsg */
522c349dbc7Sjsg
523c349dbc7Sjsg /* WaDisable3DMidCmdPreemption:skl,bxt,glk,cfl,[cnl] */
5245ca02815Sjsg wa_masked_dis(wal, GEN8_CS_CHICKEN1, GEN9_PREEMPT_3D_OBJECT_LEVEL);
525c349dbc7Sjsg
526c349dbc7Sjsg /* WaDisableGPGPUMidCmdPreemption:skl,bxt,blk,cfl,[cnl] */
5275ca02815Sjsg wa_masked_field_set(wal, GEN8_CS_CHICKEN1,
528c349dbc7Sjsg GEN9_PREEMPT_GPGPU_LEVEL_MASK,
529c349dbc7Sjsg GEN9_PREEMPT_GPGPU_COMMAND_LEVEL);
530c349dbc7Sjsg
531c349dbc7Sjsg /* WaClearHIZ_WM_CHICKEN3:bxt,glk */
532c349dbc7Sjsg if (IS_GEN9_LP(i915))
5335ca02815Sjsg wa_masked_en(wal, GEN9_WM_CHICKEN3, GEN9_FACTOR_IN_CLR_VAL_HIZ);
534c349dbc7Sjsg }
535c349dbc7Sjsg
skl_tune_iz_hashing(struct intel_engine_cs * engine,struct i915_wa_list * wal)536c349dbc7Sjsg static void skl_tune_iz_hashing(struct intel_engine_cs *engine,
537c349dbc7Sjsg struct i915_wa_list *wal)
538c349dbc7Sjsg {
539ad8b1aafSjsg struct intel_gt *gt = engine->gt;
540c349dbc7Sjsg u8 vals[3] = { 0, 0, 0 };
541c349dbc7Sjsg unsigned int i;
542c349dbc7Sjsg
543c349dbc7Sjsg for (i = 0; i < 3; i++) {
544c349dbc7Sjsg u8 ss;
545c349dbc7Sjsg
546c349dbc7Sjsg /*
547c349dbc7Sjsg * Only consider slices where one, and only one, subslice has 7
548c349dbc7Sjsg * EUs
549c349dbc7Sjsg */
550ad8b1aafSjsg if (!is_power_of_2(gt->info.sseu.subslice_7eu[i]))
551c349dbc7Sjsg continue;
552c349dbc7Sjsg
553c349dbc7Sjsg /*
554c349dbc7Sjsg * subslice_7eu[i] != 0 (because of the check above) and
555c349dbc7Sjsg * ss_max == 4 (maximum number of subslices possible per slice)
556c349dbc7Sjsg *
557c349dbc7Sjsg * -> 0 <= ss <= 3;
558c349dbc7Sjsg */
559ad8b1aafSjsg ss = ffs(gt->info.sseu.subslice_7eu[i]) - 1;
560c349dbc7Sjsg vals[i] = 3 - ss;
561c349dbc7Sjsg }
562c349dbc7Sjsg
563c349dbc7Sjsg if (vals[0] == 0 && vals[1] == 0 && vals[2] == 0)
564c349dbc7Sjsg return;
565c349dbc7Sjsg
566c349dbc7Sjsg /* Tune IZ hashing. See intel_device_info_runtime_init() */
5675ca02815Sjsg wa_masked_field_set(wal, GEN7_GT_MODE,
568c349dbc7Sjsg GEN9_IZ_HASHING_MASK(2) |
569c349dbc7Sjsg GEN9_IZ_HASHING_MASK(1) |
570c349dbc7Sjsg GEN9_IZ_HASHING_MASK(0),
571c349dbc7Sjsg GEN9_IZ_HASHING(2, vals[2]) |
572c349dbc7Sjsg GEN9_IZ_HASHING(1, vals[1]) |
573c349dbc7Sjsg GEN9_IZ_HASHING(0, vals[0]));
574c349dbc7Sjsg }
575c349dbc7Sjsg
skl_ctx_workarounds_init(struct intel_engine_cs * engine,struct i915_wa_list * wal)576c349dbc7Sjsg static void skl_ctx_workarounds_init(struct intel_engine_cs *engine,
577c349dbc7Sjsg struct i915_wa_list *wal)
578c349dbc7Sjsg {
579c349dbc7Sjsg gen9_ctx_workarounds_init(engine, wal);
580c349dbc7Sjsg skl_tune_iz_hashing(engine, wal);
581c349dbc7Sjsg }
582c349dbc7Sjsg
bxt_ctx_workarounds_init(struct intel_engine_cs * engine,struct i915_wa_list * wal)583c349dbc7Sjsg static void bxt_ctx_workarounds_init(struct intel_engine_cs *engine,
584c349dbc7Sjsg struct i915_wa_list *wal)
585c349dbc7Sjsg {
586c349dbc7Sjsg gen9_ctx_workarounds_init(engine, wal);
587c349dbc7Sjsg
588c349dbc7Sjsg /* WaDisableThreadStallDopClockGating:bxt */
589f005ef32Sjsg wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN,
590c349dbc7Sjsg STALL_DOP_GATING_DISABLE);
591c349dbc7Sjsg
592c349dbc7Sjsg /* WaToEnableHwFixForPushConstHWBug:bxt */
5935ca02815Sjsg wa_masked_en(wal, COMMON_SLICE_CHICKEN2,
594c349dbc7Sjsg GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
595c349dbc7Sjsg }
596c349dbc7Sjsg
kbl_ctx_workarounds_init(struct intel_engine_cs * engine,struct i915_wa_list * wal)597c349dbc7Sjsg static void kbl_ctx_workarounds_init(struct intel_engine_cs *engine,
598c349dbc7Sjsg struct i915_wa_list *wal)
599c349dbc7Sjsg {
600c349dbc7Sjsg struct drm_i915_private *i915 = engine->i915;
601c349dbc7Sjsg
602c349dbc7Sjsg gen9_ctx_workarounds_init(engine, wal);
603c349dbc7Sjsg
604c349dbc7Sjsg /* WaToEnableHwFixForPushConstHWBug:kbl */
605f005ef32Sjsg if (IS_KABYLAKE(i915) && IS_GRAPHICS_STEP(i915, STEP_C0, STEP_FOREVER))
6065ca02815Sjsg wa_masked_en(wal, COMMON_SLICE_CHICKEN2,
607c349dbc7Sjsg GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
608c349dbc7Sjsg
609c349dbc7Sjsg /* WaDisableSbeCacheDispatchPortSharing:kbl */
610f005ef32Sjsg wa_mcr_masked_en(wal, GEN8_HALF_SLICE_CHICKEN1,
611c349dbc7Sjsg GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
612c349dbc7Sjsg }
613c349dbc7Sjsg
glk_ctx_workarounds_init(struct intel_engine_cs * engine,struct i915_wa_list * wal)614c349dbc7Sjsg static void glk_ctx_workarounds_init(struct intel_engine_cs *engine,
615c349dbc7Sjsg struct i915_wa_list *wal)
616c349dbc7Sjsg {
617c349dbc7Sjsg gen9_ctx_workarounds_init(engine, wal);
618c349dbc7Sjsg
619c349dbc7Sjsg /* WaToEnableHwFixForPushConstHWBug:glk */
6205ca02815Sjsg wa_masked_en(wal, COMMON_SLICE_CHICKEN2,
621c349dbc7Sjsg GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
622c349dbc7Sjsg }
623c349dbc7Sjsg
cfl_ctx_workarounds_init(struct intel_engine_cs * engine,struct i915_wa_list * wal)624c349dbc7Sjsg static void cfl_ctx_workarounds_init(struct intel_engine_cs *engine,
625c349dbc7Sjsg struct i915_wa_list *wal)
626c349dbc7Sjsg {
627c349dbc7Sjsg gen9_ctx_workarounds_init(engine, wal);
628c349dbc7Sjsg
629c349dbc7Sjsg /* WaToEnableHwFixForPushConstHWBug:cfl */
6305ca02815Sjsg wa_masked_en(wal, COMMON_SLICE_CHICKEN2,
631c349dbc7Sjsg GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
632c349dbc7Sjsg
633c349dbc7Sjsg /* WaDisableSbeCacheDispatchPortSharing:cfl */
634f005ef32Sjsg wa_mcr_masked_en(wal, GEN8_HALF_SLICE_CHICKEN1,
635c349dbc7Sjsg GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
636c349dbc7Sjsg }
637c349dbc7Sjsg
icl_ctx_workarounds_init(struct intel_engine_cs * engine,struct i915_wa_list * wal)638c349dbc7Sjsg static void icl_ctx_workarounds_init(struct intel_engine_cs *engine,
639c349dbc7Sjsg struct i915_wa_list *wal)
640c349dbc7Sjsg {
6415ca02815Sjsg /* Wa_1406697149 (WaDisableBankHangMode:icl) */
642f005ef32Sjsg wa_write(wal, GEN8_L3CNTLREG, GEN8_ERRDETBCTRL);
643c349dbc7Sjsg
644c349dbc7Sjsg /* WaForceEnableNonCoherent:icl
645c349dbc7Sjsg * This is not the same workaround as in early Gen9 platforms, where
646c349dbc7Sjsg * lacking this could cause system hangs, but coherency performance
647c349dbc7Sjsg * overhead is high and only a few compute workloads really need it
648c349dbc7Sjsg * (the register is whitelisted in hardware now, so UMDs can opt in
649c349dbc7Sjsg * for coherency if they have a good reason).
650c349dbc7Sjsg */
651f005ef32Sjsg wa_mcr_masked_en(wal, ICL_HDC_MODE, HDC_FORCE_NON_COHERENT);
652c349dbc7Sjsg
653c349dbc7Sjsg /* WaEnableFloatBlendOptimization:icl */
654f005ef32Sjsg wa_mcr_add(wal, GEN10_CACHE_MODE_SS, 0,
6555ca02815Sjsg _MASKED_BIT_ENABLE(FLOAT_BLEND_OPTIMIZATION_ENABLE),
6565ca02815Sjsg 0 /* write-only, so skip validation */,
6575ca02815Sjsg true);
658c349dbc7Sjsg
659c349dbc7Sjsg /* WaDisableGPGPUMidThreadPreemption:icl */
6605ca02815Sjsg wa_masked_field_set(wal, GEN8_CS_CHICKEN1,
661c349dbc7Sjsg GEN9_PREEMPT_GPGPU_LEVEL_MASK,
662c349dbc7Sjsg GEN9_PREEMPT_GPGPU_THREAD_GROUP_LEVEL);
663c349dbc7Sjsg
664c349dbc7Sjsg /* allow headerless messages for preemptible GPGPU context */
665f005ef32Sjsg wa_mcr_masked_en(wal, GEN10_SAMPLER_MODE,
666c349dbc7Sjsg GEN11_SAMPLER_ENABLE_HEADLESS_MSG);
667c349dbc7Sjsg
668c349dbc7Sjsg /* Wa_1604278689:icl,ehl */
669c349dbc7Sjsg wa_write(wal, IVB_FBC_RT_BASE, 0xFFFFFFFF & ~ILK_FBC_RT_VALID);
6705ca02815Sjsg wa_write_clr_set(wal, IVB_FBC_RT_BASE_UPPER,
671f005ef32Sjsg 0,
672c349dbc7Sjsg 0xFFFFFFFF);
673c349dbc7Sjsg
674c349dbc7Sjsg /* Wa_1406306137:icl,ehl */
675f005ef32Sjsg wa_mcr_masked_en(wal, GEN9_ROW_CHICKEN4, GEN11_DIS_PICK_2ND_EU);
676c349dbc7Sjsg }
677c349dbc7Sjsg
6785ca02815Sjsg /*
6795ca02815Sjsg * These settings aren't actually workarounds, but general tuning settings that
6801bb76ff1Sjsg * need to be programmed on dg2 platform.
6811bb76ff1Sjsg */
dg2_ctx_gt_tuning_init(struct intel_engine_cs * engine,struct i915_wa_list * wal)6821bb76ff1Sjsg static void dg2_ctx_gt_tuning_init(struct intel_engine_cs *engine,
6831bb76ff1Sjsg struct i915_wa_list *wal)
6841bb76ff1Sjsg {
685f005ef32Sjsg wa_mcr_masked_en(wal, CHICKEN_RASTER_2, TBIMR_FAST_CLIP);
686f005ef32Sjsg wa_mcr_write_clr_set(wal, XEHP_L3SQCREG5, L3_PWM_TIMER_INIT_VAL_MASK,
6871bb76ff1Sjsg REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f));
688f005ef32Sjsg wa_mcr_write_clr_set(wal, XEHP_FF_MODE2, FF_MODE2_TDS_TIMER_MASK,
689f005ef32Sjsg FF_MODE2_TDS_TIMER_128);
6905ca02815Sjsg }
6915ca02815Sjsg
gen12_ctx_workarounds_init(struct intel_engine_cs * engine,struct i915_wa_list * wal)6925ca02815Sjsg static void gen12_ctx_workarounds_init(struct intel_engine_cs *engine,
6935ca02815Sjsg struct i915_wa_list *wal)
6945ca02815Sjsg {
695f005ef32Sjsg struct drm_i915_private *i915 = engine->i915;
6965ca02815Sjsg
6975ca02815Sjsg /*
6985ca02815Sjsg * Wa_1409142259:tgl,dg1,adl-p
6995ca02815Sjsg * Wa_1409347922:tgl,dg1,adl-p
7005ca02815Sjsg * Wa_1409252684:tgl,dg1,adl-p
7015ca02815Sjsg * Wa_1409217633:tgl,dg1,adl-p
7025ca02815Sjsg * Wa_1409207793:tgl,dg1,adl-p
7035ca02815Sjsg * Wa_1409178076:tgl,dg1,adl-p
7045ca02815Sjsg * Wa_1408979724:tgl,dg1,adl-p
7055ca02815Sjsg * Wa_14010443199:tgl,rkl,dg1,adl-p
7065ca02815Sjsg * Wa_14010698770:tgl,rkl,dg1,adl-s,adl-p
7075ca02815Sjsg * Wa_1409342910:tgl,rkl,dg1,adl-s,adl-p
7085ca02815Sjsg */
7095ca02815Sjsg wa_masked_en(wal, GEN11_COMMON_SLICE_CHICKEN3,
710c349dbc7Sjsg GEN12_DISABLE_CPS_AWARE_COLOR_PIPE);
711c349dbc7Sjsg
712ad8b1aafSjsg /* WaDisableGPGPUMidThreadPreemption:gen12 */
7135ca02815Sjsg wa_masked_field_set(wal, GEN8_CS_CHICKEN1,
714c349dbc7Sjsg GEN9_PREEMPT_GPGPU_LEVEL_MASK,
715c349dbc7Sjsg GEN9_PREEMPT_GPGPU_THREAD_GROUP_LEVEL);
7165ca02815Sjsg
7175ca02815Sjsg /*
718f005ef32Sjsg * Wa_16011163337 - GS_TIMER
7195ca02815Sjsg *
720f005ef32Sjsg * TDS_TIMER: Although some platforms refer to it as Wa_1604555607, we
721f005ef32Sjsg * need to program it even on those that don't explicitly list that
722f005ef32Sjsg * workaround.
723f005ef32Sjsg *
724f005ef32Sjsg * Note that the programming of GEN12_FF_MODE2 is further modified
725f005ef32Sjsg * according to the FF_MODE2 guidance given by Wa_1608008084.
726f005ef32Sjsg * Wa_1608008084 tells us the FF_MODE2 register will return the wrong
727f005ef32Sjsg * value when read from the CPU.
728f005ef32Sjsg *
729f005ef32Sjsg * The default value for this register is zero for all fields.
730f005ef32Sjsg * So instead of doing a RMW we should just write the desired values
731f005ef32Sjsg * for TDS and GS timers. Note that since the readback can't be trusted,
732f005ef32Sjsg * the clear mask is just set to ~0 to make sure other bits are not
733f005ef32Sjsg * inadvertently set. For the same reason read verification is ignored.
7345ca02815Sjsg */
7355ca02815Sjsg wa_add(wal,
736f005ef32Sjsg GEN12_FF_MODE2,
737f005ef32Sjsg ~0,
738f005ef32Sjsg FF_MODE2_TDS_TIMER_128 | FF_MODE2_GS_TIMER_224,
7395ca02815Sjsg 0, false);
740f005ef32Sjsg
741f005ef32Sjsg if (!IS_DG1(i915)) {
742f005ef32Sjsg /* Wa_1806527549 */
743f005ef32Sjsg wa_masked_en(wal, HIZ_CHICKEN, HZ_DEPTH_TEST_LE_GE_OPT_DISABLE);
744f005ef32Sjsg
745f005ef32Sjsg /* Wa_1606376872 */
746f005ef32Sjsg wa_masked_en(wal, COMMON_SLICE_CHICKEN4, DISABLE_TDC_LOAD_BALANCING_CALC);
747f005ef32Sjsg }
748c349dbc7Sjsg }
749c349dbc7Sjsg
dg1_ctx_workarounds_init(struct intel_engine_cs * engine,struct i915_wa_list * wal)7505ca02815Sjsg static void dg1_ctx_workarounds_init(struct intel_engine_cs *engine,
751ad8b1aafSjsg struct i915_wa_list *wal)
752ad8b1aafSjsg {
753ad8b1aafSjsg gen12_ctx_workarounds_init(engine, wal);
754ad8b1aafSjsg
7555ca02815Sjsg /* Wa_1409044764 */
7565ca02815Sjsg wa_masked_dis(wal, GEN11_COMMON_SLICE_CHICKEN3,
7575ca02815Sjsg DG1_FLOAT_POINT_BLEND_OPT_STRICT_MODE_EN);
7585ca02815Sjsg
7595ca02815Sjsg /* Wa_22010493298 */
7605ca02815Sjsg wa_masked_en(wal, HIZ_CHICKEN,
7615ca02815Sjsg DG1_HZ_READ_SUPPRESSION_OPTIMIZATION_DISABLE);
762ad8b1aafSjsg }
763ad8b1aafSjsg
dg2_ctx_workarounds_init(struct intel_engine_cs * engine,struct i915_wa_list * wal)7641bb76ff1Sjsg static void dg2_ctx_workarounds_init(struct intel_engine_cs *engine,
7651bb76ff1Sjsg struct i915_wa_list *wal)
7661bb76ff1Sjsg {
7671bb76ff1Sjsg dg2_ctx_gt_tuning_init(engine, wal);
7681bb76ff1Sjsg
7691bb76ff1Sjsg /* Wa_16013271637:dg2 */
770f005ef32Sjsg wa_mcr_masked_en(wal, XEHP_SLICE_COMMON_ECO_CHICKEN1,
7711bb76ff1Sjsg MSC_MSAA_REODER_BUF_BYPASS_DISABLE);
7721bb76ff1Sjsg
7731bb76ff1Sjsg /* Wa_14014947963:dg2 */
7741bb76ff1Sjsg wa_masked_field_set(wal, VF_PREEMPTION, PREEMPTION_VERTEX_COUNT, 0x4000);
7751bb76ff1Sjsg
776f005ef32Sjsg /* Wa_18018764978:dg2 */
777f005ef32Sjsg wa_mcr_masked_en(wal, XEHP_PSS_MODE2, SCOREBOARD_STALL_FLUSH_CONTROL);
778f005ef32Sjsg
7791bb76ff1Sjsg /* Wa_15010599737:dg2 */
780f005ef32Sjsg wa_mcr_masked_en(wal, CHICKEN_RASTER_1, DIS_SF_ROUND_NEAREST_EVEN);
781f005ef32Sjsg
782f005ef32Sjsg /* Wa_18019271663:dg2 */
783f005ef32Sjsg wa_masked_en(wal, CACHE_MODE_1, MSAA_OPTIMIZATION_REDUC_DISABLE);
784e53a5ecfSjsg
785e53a5ecfSjsg /* Wa_14019877138:dg2 */
786e53a5ecfSjsg wa_mcr_masked_en(wal, XEHP_PSS_CHICKEN, FD_END_COLLECT);
787f005ef32Sjsg }
788f005ef32Sjsg
xelpg_ctx_gt_tuning_init(struct intel_engine_cs * engine,struct i915_wa_list * wal)78913f2a72cSjsg static void xelpg_ctx_gt_tuning_init(struct intel_engine_cs *engine,
790f005ef32Sjsg struct i915_wa_list *wal)
791f005ef32Sjsg {
792596b6869Sjsg struct intel_gt *gt = engine->gt;
793f005ef32Sjsg
794f005ef32Sjsg dg2_ctx_gt_tuning_init(engine, wal);
795f005ef32Sjsg
7966e5fdd49Sjsg /*
7976e5fdd49Sjsg * Due to Wa_16014892111, the DRAW_WATERMARK tuning must be done in
7986e5fdd49Sjsg * gen12_emit_indirect_ctx_rcs() rather than here on some early
7996e5fdd49Sjsg * steppings.
8006e5fdd49Sjsg */
8016e5fdd49Sjsg if (!(IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) ||
8026e5fdd49Sjsg IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0)))
803f005ef32Sjsg wa_add(wal, DRAW_WATERMARK, VERT_WM_VAL, 0x3FF, 0, false);
804f005ef32Sjsg }
805f005ef32Sjsg
xelpg_ctx_workarounds_init(struct intel_engine_cs * engine,struct i915_wa_list * wal)80613f2a72cSjsg static void xelpg_ctx_workarounds_init(struct intel_engine_cs *engine,
807f005ef32Sjsg struct i915_wa_list *wal)
808f005ef32Sjsg {
809596b6869Sjsg struct intel_gt *gt = engine->gt;
810f005ef32Sjsg
81113f2a72cSjsg xelpg_ctx_gt_tuning_init(engine, wal);
812f005ef32Sjsg
813596b6869Sjsg if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) ||
814596b6869Sjsg IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0)) {
815f005ef32Sjsg /* Wa_14014947963 */
816f005ef32Sjsg wa_masked_field_set(wal, VF_PREEMPTION,
817f005ef32Sjsg PREEMPTION_VERTEX_COUNT, 0x4000);
818f005ef32Sjsg
819f005ef32Sjsg /* Wa_16013271637 */
820f005ef32Sjsg wa_mcr_masked_en(wal, XEHP_SLICE_COMMON_ECO_CHICKEN1,
821f005ef32Sjsg MSC_MSAA_REODER_BUF_BYPASS_DISABLE);
822f005ef32Sjsg
823f005ef32Sjsg /* Wa_18019627453 */
824f005ef32Sjsg wa_mcr_masked_en(wal, VFLSKPD, VF_PREFETCH_TLB_DIS);
825f005ef32Sjsg
826f005ef32Sjsg /* Wa_18018764978 */
827f005ef32Sjsg wa_mcr_masked_en(wal, XEHP_PSS_MODE2, SCOREBOARD_STALL_FLUSH_CONTROL);
828f005ef32Sjsg }
829f005ef32Sjsg
830f005ef32Sjsg /* Wa_18019271663 */
831f005ef32Sjsg wa_masked_en(wal, CACHE_MODE_1, MSAA_OPTIMIZATION_REDUC_DISABLE);
832ee4aabc9Sjsg
833ee4aabc9Sjsg /* Wa_14019877138 */
834ee4aabc9Sjsg wa_mcr_masked_en(wal, XEHP_PSS_CHICKEN, FD_END_COLLECT);
8351bb76ff1Sjsg }
8361bb76ff1Sjsg
fakewa_disable_nestedbb_mode(struct intel_engine_cs * engine,struct i915_wa_list * wal)8371bb76ff1Sjsg static void fakewa_disable_nestedbb_mode(struct intel_engine_cs *engine,
8381bb76ff1Sjsg struct i915_wa_list *wal)
8391bb76ff1Sjsg {
8401bb76ff1Sjsg /*
8411bb76ff1Sjsg * This is a "fake" workaround defined by software to ensure we
8421bb76ff1Sjsg * maintain reliable, backward-compatible behavior for userspace with
8431bb76ff1Sjsg * regards to how nested MI_BATCH_BUFFER_START commands are handled.
8441bb76ff1Sjsg *
8451bb76ff1Sjsg * The per-context setting of MI_MODE[12] determines whether the bits
8461bb76ff1Sjsg * of a nested MI_BATCH_BUFFER_START instruction should be interpreted
8471bb76ff1Sjsg * in the traditional manner or whether they should instead use a new
8481bb76ff1Sjsg * tgl+ meaning that breaks backward compatibility, but allows nesting
8491bb76ff1Sjsg * into 3rd-level batchbuffers. When this new capability was first
8501bb76ff1Sjsg * added in TGL, it remained off by default unless a context
8511bb76ff1Sjsg * intentionally opted in to the new behavior. However Xe_HPG now
8521bb76ff1Sjsg * flips this on by default and requires that we explicitly opt out if
8531bb76ff1Sjsg * we don't want the new behavior.
8541bb76ff1Sjsg *
8551bb76ff1Sjsg * From a SW perspective, we want to maintain the backward-compatible
8561bb76ff1Sjsg * behavior for userspace, so we'll apply a fake workaround to set it
8571bb76ff1Sjsg * back to the legacy behavior on platforms where the hardware default
8581bb76ff1Sjsg * is to break compatibility. At the moment there is no Linux
8591bb76ff1Sjsg * userspace that utilizes third-level batchbuffers, so this will avoid
8601bb76ff1Sjsg * userspace from needing to make any changes. using the legacy
8611bb76ff1Sjsg * meaning is the correct thing to do. If/when we have userspace
8621bb76ff1Sjsg * consumers that want to utilize third-level batch nesting, we can
8631bb76ff1Sjsg * provide a context parameter to allow them to opt-in.
8641bb76ff1Sjsg */
8651bb76ff1Sjsg wa_masked_dis(wal, RING_MI_MODE(engine->mmio_base), TGL_NESTED_BB_EN);
8661bb76ff1Sjsg }
8671bb76ff1Sjsg
gen12_ctx_gt_mocs_init(struct intel_engine_cs * engine,struct i915_wa_list * wal)8681bb76ff1Sjsg static void gen12_ctx_gt_mocs_init(struct intel_engine_cs *engine,
8691bb76ff1Sjsg struct i915_wa_list *wal)
8701bb76ff1Sjsg {
8711bb76ff1Sjsg u8 mocs;
8721bb76ff1Sjsg
8731bb76ff1Sjsg /*
8741bb76ff1Sjsg * Some blitter commands do not have a field for MOCS, those
8751bb76ff1Sjsg * commands will use MOCS index pointed by BLIT_CCTL.
8761bb76ff1Sjsg * BLIT_CCTL registers are needed to be programmed to un-cached.
8771bb76ff1Sjsg */
8781bb76ff1Sjsg if (engine->class == COPY_ENGINE_CLASS) {
8791bb76ff1Sjsg mocs = engine->gt->mocs.uc_index;
8801bb76ff1Sjsg wa_write_clr_set(wal,
8811bb76ff1Sjsg BLIT_CCTL(engine->mmio_base),
8821bb76ff1Sjsg BLIT_CCTL_MASK,
8831bb76ff1Sjsg BLIT_CCTL_MOCS(mocs, mocs));
8841bb76ff1Sjsg }
8851bb76ff1Sjsg }
8861bb76ff1Sjsg
8871bb76ff1Sjsg /*
8881bb76ff1Sjsg * gen12_ctx_gt_fake_wa_init() aren't programmingan official workaround
8891bb76ff1Sjsg * defined by the hardware team, but it programming general context registers.
8901bb76ff1Sjsg * Adding those context register programming in context workaround
8911bb76ff1Sjsg * allow us to use the wa framework for proper application and validation.
8921bb76ff1Sjsg */
8931bb76ff1Sjsg static void
gen12_ctx_gt_fake_wa_init(struct intel_engine_cs * engine,struct i915_wa_list * wal)8941bb76ff1Sjsg gen12_ctx_gt_fake_wa_init(struct intel_engine_cs *engine,
8951bb76ff1Sjsg struct i915_wa_list *wal)
8961bb76ff1Sjsg {
8971bb76ff1Sjsg if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55))
8981bb76ff1Sjsg fakewa_disable_nestedbb_mode(engine, wal);
8991bb76ff1Sjsg
9001bb76ff1Sjsg gen12_ctx_gt_mocs_init(engine, wal);
9011bb76ff1Sjsg }
9021bb76ff1Sjsg
903c349dbc7Sjsg static void
__intel_engine_init_ctx_wa(struct intel_engine_cs * engine,struct i915_wa_list * wal,const char * name)904c349dbc7Sjsg __intel_engine_init_ctx_wa(struct intel_engine_cs *engine,
905c349dbc7Sjsg struct i915_wa_list *wal,
906c349dbc7Sjsg const char *name)
907c349dbc7Sjsg {
908c349dbc7Sjsg struct drm_i915_private *i915 = engine->i915;
909c349dbc7Sjsg
910f005ef32Sjsg wa_init_start(wal, engine->gt, name, engine->name);
911c349dbc7Sjsg
9121bb76ff1Sjsg /* Applies to all engines */
9131bb76ff1Sjsg /*
9141bb76ff1Sjsg * Fake workarounds are not the actual workaround but
9151bb76ff1Sjsg * programming of context registers using workaround framework.
9161bb76ff1Sjsg */
9171bb76ff1Sjsg if (GRAPHICS_VER(i915) >= 12)
9181bb76ff1Sjsg gen12_ctx_gt_fake_wa_init(engine, wal);
9191bb76ff1Sjsg
9201bb76ff1Sjsg if (engine->class != RENDER_CLASS)
9211bb76ff1Sjsg goto done;
9221bb76ff1Sjsg
9236e5fdd49Sjsg if (IS_GFX_GT_IP_RANGE(engine->gt, IP_VER(12, 70), IP_VER(12, 74)))
92413f2a72cSjsg xelpg_ctx_workarounds_init(engine, wal);
925f005ef32Sjsg else if (IS_PONTEVECCHIO(i915))
9261bb76ff1Sjsg ; /* noop; none at this time */
9271bb76ff1Sjsg else if (IS_DG2(i915))
9281bb76ff1Sjsg dg2_ctx_workarounds_init(engine, wal);
9291bb76ff1Sjsg else if (IS_XEHPSDV(i915))
9301bb76ff1Sjsg ; /* noop; none at this time */
9311bb76ff1Sjsg else if (IS_DG1(i915))
9325ca02815Sjsg dg1_ctx_workarounds_init(engine, wal);
9335ca02815Sjsg else if (GRAPHICS_VER(i915) == 12)
934ad8b1aafSjsg gen12_ctx_workarounds_init(engine, wal);
9355ca02815Sjsg else if (GRAPHICS_VER(i915) == 11)
936c349dbc7Sjsg icl_ctx_workarounds_init(engine, wal);
937ad8b1aafSjsg else if (IS_COFFEELAKE(i915) || IS_COMETLAKE(i915))
938c349dbc7Sjsg cfl_ctx_workarounds_init(engine, wal);
939c349dbc7Sjsg else if (IS_GEMINILAKE(i915))
940c349dbc7Sjsg glk_ctx_workarounds_init(engine, wal);
941c349dbc7Sjsg else if (IS_KABYLAKE(i915))
942c349dbc7Sjsg kbl_ctx_workarounds_init(engine, wal);
943c349dbc7Sjsg else if (IS_BROXTON(i915))
944c349dbc7Sjsg bxt_ctx_workarounds_init(engine, wal);
945c349dbc7Sjsg else if (IS_SKYLAKE(i915))
946c349dbc7Sjsg skl_ctx_workarounds_init(engine, wal);
947c349dbc7Sjsg else if (IS_CHERRYVIEW(i915))
948c349dbc7Sjsg chv_ctx_workarounds_init(engine, wal);
949c349dbc7Sjsg else if (IS_BROADWELL(i915))
950c349dbc7Sjsg bdw_ctx_workarounds_init(engine, wal);
9515ca02815Sjsg else if (GRAPHICS_VER(i915) == 7)
952ad8b1aafSjsg gen7_ctx_workarounds_init(engine, wal);
9535ca02815Sjsg else if (GRAPHICS_VER(i915) == 6)
954ad8b1aafSjsg gen6_ctx_workarounds_init(engine, wal);
9555ca02815Sjsg else if (GRAPHICS_VER(i915) < 8)
9565ca02815Sjsg ;
957c349dbc7Sjsg else
9585ca02815Sjsg MISSING_CASE(GRAPHICS_VER(i915));
959c349dbc7Sjsg
9601bb76ff1Sjsg done:
961c349dbc7Sjsg wa_init_finish(wal);
962c349dbc7Sjsg }
963c349dbc7Sjsg
intel_engine_init_ctx_wa(struct intel_engine_cs * engine)964c349dbc7Sjsg void intel_engine_init_ctx_wa(struct intel_engine_cs *engine)
965c349dbc7Sjsg {
966c349dbc7Sjsg __intel_engine_init_ctx_wa(engine, &engine->ctx_wa_list, "context");
967c349dbc7Sjsg }
968c349dbc7Sjsg
intel_engine_emit_ctx_wa(struct i915_request * rq)969c349dbc7Sjsg int intel_engine_emit_ctx_wa(struct i915_request *rq)
970c349dbc7Sjsg {
971c349dbc7Sjsg struct i915_wa_list *wal = &rq->engine->ctx_wa_list;
972f005ef32Sjsg struct intel_uncore *uncore = rq->engine->uncore;
973f005ef32Sjsg enum forcewake_domains fw;
974f005ef32Sjsg unsigned long flags;
975c349dbc7Sjsg struct i915_wa *wa;
976c349dbc7Sjsg unsigned int i;
977c349dbc7Sjsg u32 *cs;
978c349dbc7Sjsg int ret;
979c349dbc7Sjsg
980c349dbc7Sjsg if (wal->count == 0)
981c349dbc7Sjsg return 0;
982c349dbc7Sjsg
983c349dbc7Sjsg ret = rq->engine->emit_flush(rq, EMIT_BARRIER);
984c349dbc7Sjsg if (ret)
985c349dbc7Sjsg return ret;
986c349dbc7Sjsg
987c349dbc7Sjsg cs = intel_ring_begin(rq, (wal->count * 2 + 2));
988c349dbc7Sjsg if (IS_ERR(cs))
989c349dbc7Sjsg return PTR_ERR(cs);
990c349dbc7Sjsg
991f005ef32Sjsg fw = wal_get_fw_for_rmw(uncore, wal);
992f005ef32Sjsg
993f005ef32Sjsg intel_gt_mcr_lock(wal->gt, &flags);
994f005ef32Sjsg spin_lock(&uncore->lock);
995f005ef32Sjsg intel_uncore_forcewake_get__locked(uncore, fw);
996f005ef32Sjsg
997c349dbc7Sjsg *cs++ = MI_LOAD_REGISTER_IMM(wal->count);
998c349dbc7Sjsg for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
999f005ef32Sjsg u32 val;
1000f005ef32Sjsg
1001f005ef32Sjsg /* Skip reading the register if it's not really needed */
1002f005ef32Sjsg if (wa->masked_reg || (wa->clr | wa->set) == U32_MAX) {
1003f005ef32Sjsg val = wa->set;
1004f005ef32Sjsg } else {
1005f005ef32Sjsg val = wa->is_mcr ?
1006f005ef32Sjsg intel_gt_mcr_read_any_fw(wal->gt, wa->mcr_reg) :
1007f005ef32Sjsg intel_uncore_read_fw(uncore, wa->reg);
1008f005ef32Sjsg val &= ~wa->clr;
1009f005ef32Sjsg val |= wa->set;
1010f005ef32Sjsg }
1011f005ef32Sjsg
1012c349dbc7Sjsg *cs++ = i915_mmio_reg_offset(wa->reg);
1013f005ef32Sjsg *cs++ = val;
1014c349dbc7Sjsg }
1015c349dbc7Sjsg *cs++ = MI_NOOP;
1016c349dbc7Sjsg
1017f005ef32Sjsg intel_uncore_forcewake_put__locked(uncore, fw);
1018f005ef32Sjsg spin_unlock(&uncore->lock);
1019f005ef32Sjsg intel_gt_mcr_unlock(wal->gt, flags);
1020f005ef32Sjsg
1021c349dbc7Sjsg intel_ring_advance(rq, cs);
1022c349dbc7Sjsg
1023c349dbc7Sjsg ret = rq->engine->emit_flush(rq, EMIT_BARRIER);
1024c349dbc7Sjsg if (ret)
1025c349dbc7Sjsg return ret;
1026c349dbc7Sjsg
1027c349dbc7Sjsg return 0;
1028c349dbc7Sjsg }
1029c349dbc7Sjsg
1030c349dbc7Sjsg static void
gen4_gt_workarounds_init(struct intel_gt * gt,struct i915_wa_list * wal)10311bb76ff1Sjsg gen4_gt_workarounds_init(struct intel_gt *gt,
1032e2630e85Sjsg struct i915_wa_list *wal)
1033e2630e85Sjsg {
1034e2630e85Sjsg /* WaDisable_RenderCache_OperationalFlush:gen4,ilk */
1035e2630e85Sjsg wa_masked_dis(wal, CACHE_MODE_0, RC_OP_FLUSH_ENABLE);
1036e2630e85Sjsg }
1037e2630e85Sjsg
1038e2630e85Sjsg static void
g4x_gt_workarounds_init(struct intel_gt * gt,struct i915_wa_list * wal)10391bb76ff1Sjsg g4x_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
1040e2630e85Sjsg {
10411bb76ff1Sjsg gen4_gt_workarounds_init(gt, wal);
1042e2630e85Sjsg
1043e2630e85Sjsg /* WaDisableRenderCachePipelinedFlush:g4x,ilk */
1044e2630e85Sjsg wa_masked_en(wal, CACHE_MODE_0, CM0_PIPELINED_RENDER_FLUSH_DISABLE);
1045e2630e85Sjsg }
1046e2630e85Sjsg
1047e2630e85Sjsg static void
ilk_gt_workarounds_init(struct intel_gt * gt,struct i915_wa_list * wal)10481bb76ff1Sjsg ilk_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
10497021565cSjsg {
10501bb76ff1Sjsg g4x_gt_workarounds_init(gt, wal);
1051e2630e85Sjsg
10527021565cSjsg wa_masked_en(wal, _3D_CHICKEN2, _3D_CHICKEN2_WM_READ_PIPELINED);
10537021565cSjsg }
10547021565cSjsg
10557021565cSjsg static void
snb_gt_workarounds_init(struct intel_gt * gt,struct i915_wa_list * wal)10561bb76ff1Sjsg snb_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
1057170318fdSjsg {
1058170318fdSjsg }
1059170318fdSjsg
1060170318fdSjsg static void
ivb_gt_workarounds_init(struct intel_gt * gt,struct i915_wa_list * wal)10611bb76ff1Sjsg ivb_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
10622f562361Sjsg {
10632f562361Sjsg /* Apply the WaDisableRHWOOptimizationForRenderHang:ivb workaround. */
10642f562361Sjsg wa_masked_dis(wal,
10652f562361Sjsg GEN7_COMMON_SLICE_CHICKEN1,
10662f562361Sjsg GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC);
10672f562361Sjsg
10682f562361Sjsg /* WaApplyL3ControlAndL3ChickenMode:ivb */
10692f562361Sjsg wa_write(wal, GEN7_L3CNTLREG1, GEN7_WA_FOR_GEN7_L3_CONTROL);
10702f562361Sjsg wa_write(wal, GEN7_L3_CHICKEN_MODE_REGISTER, GEN7_WA_L3_CHICKEN_MODE);
10712f562361Sjsg
10722f562361Sjsg /* WaForceL3Serialization:ivb */
10732f562361Sjsg wa_write_clr(wal, GEN7_L3SQCREG4, L3SQ_URB_READ_CAM_MATCH_DISABLE);
10742f562361Sjsg }
10752f562361Sjsg
10762f562361Sjsg static void
vlv_gt_workarounds_init(struct intel_gt * gt,struct i915_wa_list * wal)10771bb76ff1Sjsg vlv_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
107844957862Sjsg {
107944957862Sjsg /* WaForceL3Serialization:vlv */
108044957862Sjsg wa_write_clr(wal, GEN7_L3SQCREG4, L3SQ_URB_READ_CAM_MATCH_DISABLE);
108144957862Sjsg
108244957862Sjsg /*
108344957862Sjsg * WaIncreaseL3CreditsForVLVB0:vlv
108444957862Sjsg * This is the hardware default actually.
108544957862Sjsg */
108644957862Sjsg wa_write(wal, GEN7_L3SQCREG1, VLV_B0_WA_L3SQCREG1_VALUE);
108744957862Sjsg }
108844957862Sjsg
108944957862Sjsg static void
hsw_gt_workarounds_init(struct intel_gt * gt,struct i915_wa_list * wal)10901bb76ff1Sjsg hsw_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
10912b49ea17Sjsg {
10922b49ea17Sjsg /* L3 caching of data atomics doesn't work -- disable it. */
10932b49ea17Sjsg wa_write(wal, HSW_SCRATCH1, HSW_SCRATCH1_L3_DATA_ATOMICS_DISABLE);
10942b49ea17Sjsg
10952b49ea17Sjsg wa_add(wal,
10962b49ea17Sjsg HSW_ROW_CHICKEN3, 0,
10972b49ea17Sjsg _MASKED_BIT_ENABLE(HSW_ROW_CHICKEN3_L3_GLOBAL_ATOMICS_DISABLE),
10985ca02815Sjsg 0 /* XXX does this reg exist? */, true);
10992b49ea17Sjsg
11002b49ea17Sjsg /* WaVSRefCountFullforceMissDisable:hsw */
11012b49ea17Sjsg wa_write_clr(wal, GEN7_FF_THREAD_MODE, GEN7_FF_VS_REF_CNT_FFME);
11022b49ea17Sjsg }
11032b49ea17Sjsg
11042b49ea17Sjsg static void
gen9_wa_init_mcr(struct drm_i915_private * i915,struct i915_wa_list * wal)11051bb76ff1Sjsg gen9_wa_init_mcr(struct drm_i915_private *i915, struct i915_wa_list *wal)
1106c349dbc7Sjsg {
11071bb76ff1Sjsg const struct sseu_dev_info *sseu = &to_gt(i915)->info.sseu;
11081bb76ff1Sjsg unsigned int slice, subslice;
11091bb76ff1Sjsg u32 mcr, mcr_mask;
11101bb76ff1Sjsg
11111bb76ff1Sjsg GEM_BUG_ON(GRAPHICS_VER(i915) != 9);
11121bb76ff1Sjsg
11131bb76ff1Sjsg /*
11141bb76ff1Sjsg * WaProgramMgsrForCorrectSliceSpecificMmioReads:gen9,glk,kbl,cml
11151bb76ff1Sjsg * Before any MMIO read into slice/subslice specific registers, MCR
11161bb76ff1Sjsg * packet control register needs to be programmed to point to any
11171bb76ff1Sjsg * enabled s/ss pair. Otherwise, incorrect values will be returned.
11181bb76ff1Sjsg * This means each subsequent MMIO read will be forwarded to an
11191bb76ff1Sjsg * specific s/ss combination, but this is OK since these registers
11201bb76ff1Sjsg * are consistent across s/ss in almost all cases. In the rare
11211bb76ff1Sjsg * occasions, such as INSTDONE, where this value is dependent
11221bb76ff1Sjsg * on s/ss combo, the read should be done with read_subslice_reg.
11231bb76ff1Sjsg */
11241bb76ff1Sjsg slice = ffs(sseu->slice_mask) - 1;
11251bb76ff1Sjsg GEM_BUG_ON(slice >= ARRAY_SIZE(sseu->subslice_mask.hsw));
11261bb76ff1Sjsg subslice = ffs(intel_sseu_get_hsw_subslices(sseu, slice));
11271bb76ff1Sjsg GEM_BUG_ON(!subslice);
11281bb76ff1Sjsg subslice--;
11291bb76ff1Sjsg
11301bb76ff1Sjsg /*
11311bb76ff1Sjsg * We use GEN8_MCR..() macros to calculate the |mcr| value for
11321bb76ff1Sjsg * Gen9 to address WaProgramMgsrForCorrectSliceSpecificMmioReads
11331bb76ff1Sjsg */
11341bb76ff1Sjsg mcr = GEN8_MCR_SLICE(slice) | GEN8_MCR_SUBSLICE(subslice);
11351bb76ff1Sjsg mcr_mask = GEN8_MCR_SLICE_MASK | GEN8_MCR_SUBSLICE_MASK;
11361bb76ff1Sjsg
11371bb76ff1Sjsg drm_dbg(&i915->drm, "MCR slice:%d/subslice:%d = %x\n", slice, subslice, mcr);
11381bb76ff1Sjsg
11391bb76ff1Sjsg wa_write_clr_set(wal, GEN8_MCR_SELECTOR, mcr_mask, mcr);
11401bb76ff1Sjsg }
11411bb76ff1Sjsg
11421bb76ff1Sjsg static void
gen9_gt_workarounds_init(struct intel_gt * gt,struct i915_wa_list * wal)11431bb76ff1Sjsg gen9_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
11441bb76ff1Sjsg {
11451bb76ff1Sjsg struct drm_i915_private *i915 = gt->i915;
11461bb76ff1Sjsg
11471bb76ff1Sjsg /* WaProgramMgsrForCorrectSliceSpecificMmioReads:glk,kbl,cml,gen9 */
11481bb76ff1Sjsg gen9_wa_init_mcr(i915, wal);
11491bb76ff1Sjsg
1150c349dbc7Sjsg /* WaDisableKillLogic:bxt,skl,kbl */
1151ad8b1aafSjsg if (!IS_COFFEELAKE(i915) && !IS_COMETLAKE(i915))
1152c349dbc7Sjsg wa_write_or(wal,
1153c349dbc7Sjsg GAM_ECOCHK,
1154c349dbc7Sjsg ECOCHK_DIS_TLB);
1155c349dbc7Sjsg
1156c349dbc7Sjsg if (HAS_LLC(i915)) {
1157c349dbc7Sjsg /* WaCompressedResourceSamplerPbeMediaNewHashMode:skl,kbl
1158c349dbc7Sjsg *
1159c349dbc7Sjsg * Must match Display Engine. See
1160c349dbc7Sjsg * WaCompressedResourceDisplayNewHashMode.
1161c349dbc7Sjsg */
1162c349dbc7Sjsg wa_write_or(wal,
1163c349dbc7Sjsg MMCD_MISC_CTRL,
1164c349dbc7Sjsg MMCD_PCLA | MMCD_HOTSPOT_EN);
1165c349dbc7Sjsg }
1166c349dbc7Sjsg
1167c349dbc7Sjsg /* WaDisableHDCInvalidation:skl,bxt,kbl,cfl */
1168c349dbc7Sjsg wa_write_or(wal,
1169c349dbc7Sjsg GAM_ECOCHK,
1170c349dbc7Sjsg BDW_DISABLE_HDC_INVALIDATION);
1171c349dbc7Sjsg }
1172c349dbc7Sjsg
1173c349dbc7Sjsg static void
skl_gt_workarounds_init(struct intel_gt * gt,struct i915_wa_list * wal)11741bb76ff1Sjsg skl_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
1175c349dbc7Sjsg {
11761bb76ff1Sjsg gen9_gt_workarounds_init(gt, wal);
1177c349dbc7Sjsg
1178c349dbc7Sjsg /* WaDisableGafsUnitClkGating:skl */
1179c349dbc7Sjsg wa_write_or(wal,
1180c349dbc7Sjsg GEN7_UCGCTL4,
1181c349dbc7Sjsg GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
1182c349dbc7Sjsg
1183c349dbc7Sjsg /* WaInPlaceDecompressionHang:skl */
1184f005ef32Sjsg if (IS_SKYLAKE(gt->i915) && IS_GRAPHICS_STEP(gt->i915, STEP_A0, STEP_H0))
1185c349dbc7Sjsg wa_write_or(wal,
1186c349dbc7Sjsg GEN9_GAMT_ECO_REG_RW_IA,
1187c349dbc7Sjsg GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
1188c349dbc7Sjsg }
1189c349dbc7Sjsg
1190c349dbc7Sjsg static void
kbl_gt_workarounds_init(struct intel_gt * gt,struct i915_wa_list * wal)11911bb76ff1Sjsg kbl_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
1192c349dbc7Sjsg {
11931bb76ff1Sjsg gen9_gt_workarounds_init(gt, wal);
1194c349dbc7Sjsg
1195c349dbc7Sjsg /* WaDisableDynamicCreditSharing:kbl */
1196f005ef32Sjsg if (IS_KABYLAKE(gt->i915) && IS_GRAPHICS_STEP(gt->i915, 0, STEP_C0))
1197c349dbc7Sjsg wa_write_or(wal,
1198c349dbc7Sjsg GAMT_CHKN_BIT_REG,
1199c349dbc7Sjsg GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING);
1200c349dbc7Sjsg
1201c349dbc7Sjsg /* WaDisableGafsUnitClkGating:kbl */
1202c349dbc7Sjsg wa_write_or(wal,
1203c349dbc7Sjsg GEN7_UCGCTL4,
1204c349dbc7Sjsg GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
1205c349dbc7Sjsg
1206c349dbc7Sjsg /* WaInPlaceDecompressionHang:kbl */
1207c349dbc7Sjsg wa_write_or(wal,
1208c349dbc7Sjsg GEN9_GAMT_ECO_REG_RW_IA,
1209c349dbc7Sjsg GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
1210c349dbc7Sjsg }
1211c349dbc7Sjsg
1212c349dbc7Sjsg static void
glk_gt_workarounds_init(struct intel_gt * gt,struct i915_wa_list * wal)12131bb76ff1Sjsg glk_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
1214c349dbc7Sjsg {
12151bb76ff1Sjsg gen9_gt_workarounds_init(gt, wal);
1216c349dbc7Sjsg }
1217c349dbc7Sjsg
1218c349dbc7Sjsg static void
cfl_gt_workarounds_init(struct intel_gt * gt,struct i915_wa_list * wal)12191bb76ff1Sjsg cfl_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
1220c349dbc7Sjsg {
12211bb76ff1Sjsg gen9_gt_workarounds_init(gt, wal);
1222c349dbc7Sjsg
1223c349dbc7Sjsg /* WaDisableGafsUnitClkGating:cfl */
1224c349dbc7Sjsg wa_write_or(wal,
1225c349dbc7Sjsg GEN7_UCGCTL4,
1226c349dbc7Sjsg GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
1227c349dbc7Sjsg
1228c349dbc7Sjsg /* WaInPlaceDecompressionHang:cfl */
1229c349dbc7Sjsg wa_write_or(wal,
1230c349dbc7Sjsg GEN9_GAMT_ECO_REG_RW_IA,
1231c349dbc7Sjsg GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
1232c349dbc7Sjsg }
1233c349dbc7Sjsg
__set_mcr_steering(struct i915_wa_list * wal,i915_reg_t steering_reg,unsigned int slice,unsigned int subslice)12345ca02815Sjsg static void __set_mcr_steering(struct i915_wa_list *wal,
12355ca02815Sjsg i915_reg_t steering_reg,
12365ca02815Sjsg unsigned int slice, unsigned int subslice)
12375ca02815Sjsg {
12385ca02815Sjsg u32 mcr, mcr_mask;
12395ca02815Sjsg
12405ca02815Sjsg mcr = GEN11_MCR_SLICE(slice) | GEN11_MCR_SUBSLICE(subslice);
12415ca02815Sjsg mcr_mask = GEN11_MCR_SLICE_MASK | GEN11_MCR_SUBSLICE_MASK;
12425ca02815Sjsg
12435ca02815Sjsg wa_write_clr_set(wal, steering_reg, mcr_mask, mcr);
12445ca02815Sjsg }
12455ca02815Sjsg
debug_dump_steering(struct intel_gt * gt)1246f005ef32Sjsg static void debug_dump_steering(struct intel_gt *gt)
12475ca02815Sjsg {
12481bb76ff1Sjsg struct drm_printer p = drm_debug_printer("MCR Steering:");
12495ca02815Sjsg
1250f005ef32Sjsg if (drm_debug_enabled(DRM_UT_DRIVER))
1251f005ef32Sjsg intel_gt_mcr_report_steering(&p, gt, false);
1252f005ef32Sjsg }
1253f005ef32Sjsg
__add_mcr_wa(struct intel_gt * gt,struct i915_wa_list * wal,unsigned int slice,unsigned int subslice)1254f005ef32Sjsg static void __add_mcr_wa(struct intel_gt *gt, struct i915_wa_list *wal,
1255f005ef32Sjsg unsigned int slice, unsigned int subslice)
1256f005ef32Sjsg {
12575ca02815Sjsg __set_mcr_steering(wal, GEN8_MCR_SELECTOR, slice, subslice);
12581bb76ff1Sjsg
12591bb76ff1Sjsg gt->default_steering.groupid = slice;
12601bb76ff1Sjsg gt->default_steering.instanceid = subslice;
12611bb76ff1Sjsg
1262f005ef32Sjsg debug_dump_steering(gt);
12635ca02815Sjsg }
12645ca02815Sjsg
1265c349dbc7Sjsg static void
icl_wa_init_mcr(struct intel_gt * gt,struct i915_wa_list * wal)12661bb76ff1Sjsg icl_wa_init_mcr(struct intel_gt *gt, struct i915_wa_list *wal)
1267c349dbc7Sjsg {
12681bb76ff1Sjsg const struct sseu_dev_info *sseu = >->info.sseu;
12691bb76ff1Sjsg unsigned int subslice;
1270c349dbc7Sjsg
12711bb76ff1Sjsg GEM_BUG_ON(GRAPHICS_VER(gt->i915) < 11);
12725ca02815Sjsg GEM_BUG_ON(hweight8(sseu->slice_mask) > 1);
1273c349dbc7Sjsg
1274c349dbc7Sjsg /*
12755ca02815Sjsg * Although a platform may have subslices, we need to always steer
12765ca02815Sjsg * reads to the lowest instance that isn't fused off. When Render
12775ca02815Sjsg * Power Gating is enabled, grabbing forcewake will only power up a
12785ca02815Sjsg * single subslice (the "minconfig") if there isn't a real workload
12795ca02815Sjsg * that needs to be run; this means that if we steer register reads to
12805ca02815Sjsg * one of the higher subslices, we run the risk of reading back 0's or
12815ca02815Sjsg * random garbage.
1282c349dbc7Sjsg */
12831bb76ff1Sjsg subslice = __ffs(intel_sseu_get_hsw_subslices(sseu, 0));
1284c349dbc7Sjsg
12855ca02815Sjsg /*
12865ca02815Sjsg * If the subslice we picked above also steers us to a valid L3 bank,
12875ca02815Sjsg * then we can just rely on the default steering and won't need to
12885ca02815Sjsg * worry about explicitly re-steering L3BANK reads later.
12895ca02815Sjsg */
12901bb76ff1Sjsg if (gt->info.l3bank_mask & BIT(subslice))
12911bb76ff1Sjsg gt->steering_table[L3BANK] = NULL;
1292c349dbc7Sjsg
12931bb76ff1Sjsg __add_mcr_wa(gt, wal, 0, subslice);
1294c349dbc7Sjsg }
1295c349dbc7Sjsg
1296c349dbc7Sjsg static void
xehp_init_mcr(struct intel_gt * gt,struct i915_wa_list * wal)12975ca02815Sjsg xehp_init_mcr(struct intel_gt *gt, struct i915_wa_list *wal)
1298c349dbc7Sjsg {
12995ca02815Sjsg const struct sseu_dev_info *sseu = >->info.sseu;
13005ca02815Sjsg unsigned long slice, subslice = 0, slice_mask = 0;
13015ca02815Sjsg u32 lncf_mask = 0;
13025ca02815Sjsg int i;
1303c349dbc7Sjsg
13045ca02815Sjsg /*
13055ca02815Sjsg * On Xe_HP the steering increases in complexity. There are now several
13065ca02815Sjsg * more units that require steering and we're not guaranteed to be able
13075ca02815Sjsg * to find a common setting for all of them. These are:
13085ca02815Sjsg * - GSLICE (fusable)
13095ca02815Sjsg * - DSS (sub-unit within gslice; fusable)
13105ca02815Sjsg * - L3 Bank (fusable)
13115ca02815Sjsg * - MSLICE (fusable)
13125ca02815Sjsg * - LNCF (sub-unit within mslice; always present if mslice is present)
13135ca02815Sjsg *
13145ca02815Sjsg * We'll do our default/implicit steering based on GSLICE (in the
13155ca02815Sjsg * sliceid field) and DSS (in the subsliceid field). If we can
13165ca02815Sjsg * find overlap between the valid MSLICE and/or LNCF values with
13175ca02815Sjsg * a suitable GSLICE, then we can just re-use the default value and
13185ca02815Sjsg * skip and explicit steering at runtime.
13195ca02815Sjsg *
13205ca02815Sjsg * We only need to look for overlap between GSLICE/MSLICE/LNCF to find
13215ca02815Sjsg * a valid sliceid value. DSS steering is the only type of steering
13225ca02815Sjsg * that utilizes the 'subsliceid' bits.
13235ca02815Sjsg *
13245ca02815Sjsg * Also note that, even though the steering domain is called "GSlice"
13255ca02815Sjsg * and it is encoded in the register using the gslice format, the spec
13265ca02815Sjsg * says that the combined (geometry | compute) fuse should be used to
13275ca02815Sjsg * select the steering.
13285ca02815Sjsg */
13295ca02815Sjsg
13305ca02815Sjsg /* Find the potential gslice candidates */
13311bb76ff1Sjsg slice_mask = intel_slicemask_from_xehp_dssmask(sseu->subslice_mask,
13321bb76ff1Sjsg GEN_DSS_PER_GSLICE);
13335ca02815Sjsg
13345ca02815Sjsg /*
13355ca02815Sjsg * Find the potential LNCF candidates. Either LNCF within a valid
13365ca02815Sjsg * mslice is fine.
13375ca02815Sjsg */
13385ca02815Sjsg for_each_set_bit(i, >->info.mslice_mask, GEN12_MAX_MSLICES)
13395ca02815Sjsg lncf_mask |= (0x3 << (i * 2));
13405ca02815Sjsg
13415ca02815Sjsg /*
13425ca02815Sjsg * Are there any sliceid values that work for both GSLICE and LNCF
13435ca02815Sjsg * steering?
13445ca02815Sjsg */
13455ca02815Sjsg if (slice_mask & lncf_mask) {
13465ca02815Sjsg slice_mask &= lncf_mask;
13475ca02815Sjsg gt->steering_table[LNCF] = NULL;
13485ca02815Sjsg }
13495ca02815Sjsg
13505ca02815Sjsg /* How about sliceid values that also work for MSLICE steering? */
13515ca02815Sjsg if (slice_mask & gt->info.mslice_mask) {
13525ca02815Sjsg slice_mask &= gt->info.mslice_mask;
13535ca02815Sjsg gt->steering_table[MSLICE] = NULL;
13545ca02815Sjsg }
13555ca02815Sjsg
1356f005ef32Sjsg if (IS_XEHPSDV(gt->i915) && slice_mask & BIT(0))
1357f005ef32Sjsg gt->steering_table[GAM] = NULL;
1358f005ef32Sjsg
13595ca02815Sjsg slice = __ffs(slice_mask);
13601bb76ff1Sjsg subslice = intel_sseu_find_first_xehp_dss(sseu, GEN_DSS_PER_GSLICE, slice) %
13611bb76ff1Sjsg GEN_DSS_PER_GSLICE;
13625ca02815Sjsg
13631bb76ff1Sjsg __add_mcr_wa(gt, wal, slice, subslice);
13645ca02815Sjsg
13655ca02815Sjsg /*
13665ca02815Sjsg * SQIDI ranges are special because they use different steering
13675ca02815Sjsg * registers than everything else we work with. On XeHP SDV and
13685ca02815Sjsg * DG2-G10, any value in the steering registers will work fine since
13695ca02815Sjsg * all instances are present, but DG2-G11 only has SQIDI instances at
13705ca02815Sjsg * ID's 2 and 3, so we need to steer to one of those. For simplicity
13715ca02815Sjsg * we'll just steer to a hardcoded "2" since that value will work
13725ca02815Sjsg * everywhere.
13735ca02815Sjsg */
13745ca02815Sjsg __set_mcr_steering(wal, MCFG_MCR_SELECTOR, 0, 2);
13755ca02815Sjsg __set_mcr_steering(wal, SF_MCR_SELECTOR, 0, 2);
1376f005ef32Sjsg
1377f005ef32Sjsg /*
1378f005ef32Sjsg * On DG2, GAM registers have a dedicated steering control register
1379f005ef32Sjsg * and must always be programmed to a hardcoded groupid of "1."
1380f005ef32Sjsg */
1381f005ef32Sjsg if (IS_DG2(gt->i915))
1382f005ef32Sjsg __set_mcr_steering(wal, GAM_MCR_SELECTOR, 1, 0);
1383c349dbc7Sjsg }
1384c349dbc7Sjsg
1385c349dbc7Sjsg static void
pvc_init_mcr(struct intel_gt * gt,struct i915_wa_list * wal)13861bb76ff1Sjsg pvc_init_mcr(struct intel_gt *gt, struct i915_wa_list *wal)
1387c349dbc7Sjsg {
13881bb76ff1Sjsg unsigned int dss;
13891bb76ff1Sjsg
13901bb76ff1Sjsg /*
13911bb76ff1Sjsg * Setup implicit steering for COMPUTE and DSS ranges to the first
13921bb76ff1Sjsg * non-fused-off DSS. All other types of MCR registers will be
13931bb76ff1Sjsg * explicitly steered.
13941bb76ff1Sjsg */
13951bb76ff1Sjsg dss = intel_sseu_find_first_xehp_dss(>->info.sseu, 0, 0);
13961bb76ff1Sjsg __add_mcr_wa(gt, wal, dss / GEN_DSS_PER_CSLICE, dss % GEN_DSS_PER_CSLICE);
13971bb76ff1Sjsg }
13981bb76ff1Sjsg
13991bb76ff1Sjsg static void
icl_gt_workarounds_init(struct intel_gt * gt,struct i915_wa_list * wal)14001bb76ff1Sjsg icl_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
14011bb76ff1Sjsg {
14021bb76ff1Sjsg struct drm_i915_private *i915 = gt->i915;
14031bb76ff1Sjsg
14041bb76ff1Sjsg icl_wa_init_mcr(gt, wal);
1405c349dbc7Sjsg
1406c349dbc7Sjsg /* WaModifyGamTlbPartitioning:icl */
14075ca02815Sjsg wa_write_clr_set(wal,
1408c349dbc7Sjsg GEN11_GACB_PERF_CTRL,
1409c349dbc7Sjsg GEN11_HASH_CTRL_MASK,
1410c349dbc7Sjsg GEN11_HASH_CTRL_BIT0 | GEN11_HASH_CTRL_BIT4);
1411c349dbc7Sjsg
1412c349dbc7Sjsg /* Wa_1405766107:icl
1413c349dbc7Sjsg * Formerly known as WaCL2SFHalfMaxAlloc
1414c349dbc7Sjsg */
1415c349dbc7Sjsg wa_write_or(wal,
1416c349dbc7Sjsg GEN11_LSN_UNSLCVC,
1417c349dbc7Sjsg GEN11_LSN_UNSLCVC_GAFS_HALF_SF_MAXALLOC |
1418c349dbc7Sjsg GEN11_LSN_UNSLCVC_GAFS_HALF_CL2_MAXALLOC);
1419c349dbc7Sjsg
1420c349dbc7Sjsg /* Wa_220166154:icl
1421c349dbc7Sjsg * Formerly known as WaDisCtxReload
1422c349dbc7Sjsg */
1423c349dbc7Sjsg wa_write_or(wal,
1424c349dbc7Sjsg GEN8_GAMW_ECO_DEV_RW_IA,
1425c349dbc7Sjsg GAMW_ECO_DEV_CTX_RELOAD_DISABLE);
1426c349dbc7Sjsg
1427c349dbc7Sjsg /* Wa_1406463099:icl
1428c349dbc7Sjsg * Formerly known as WaGamTlbPendError
1429c349dbc7Sjsg */
1430c349dbc7Sjsg wa_write_or(wal,
1431c349dbc7Sjsg GAMT_CHKN_BIT_REG,
1432c349dbc7Sjsg GAMT_CHKN_DISABLE_L3_COH_PIPE);
1433c349dbc7Sjsg
14344f60c9b9Sjsg /*
14354f60c9b9Sjsg * Wa_1408615072:icl,ehl (vsunit)
14364f60c9b9Sjsg * Wa_1407596294:icl,ehl (hsunit)
14374f60c9b9Sjsg */
14384f60c9b9Sjsg wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE,
14394f60c9b9Sjsg VSUNIT_CLKGATE_DIS | HSUNIT_CLKGATE_DIS);
14404f60c9b9Sjsg
14411bb76ff1Sjsg /* Wa_1407352427:icl,ehl */
14421bb76ff1Sjsg wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE2,
14431bb76ff1Sjsg PSDUNIT_CLKGATE_DIS);
14441bb76ff1Sjsg
14451bb76ff1Sjsg /* Wa_1406680159:icl,ehl */
1446f005ef32Sjsg wa_mcr_write_or(wal,
1447f005ef32Sjsg GEN11_SUBSLICE_UNIT_LEVEL_CLKGATE,
14481bb76ff1Sjsg GWUNIT_CLKGATE_DIS);
14491bb76ff1Sjsg
1450ad8b1aafSjsg /* Wa_1607087056:icl,ehl,jsl */
1451ad8b1aafSjsg if (IS_ICELAKE(i915) ||
1452f005ef32Sjsg ((IS_JASPERLAKE(i915) || IS_ELKHARTLAKE(i915)) &&
1453f005ef32Sjsg IS_GRAPHICS_STEP(i915, STEP_A0, STEP_B0)))
1454c349dbc7Sjsg wa_write_or(wal,
1455f005ef32Sjsg GEN11_SLICE_UNIT_LEVEL_CLKGATE,
1456c349dbc7Sjsg L3_CLKGATE_DIS | L3_CR2X_CLKGATE_DIS);
14575ca02815Sjsg
14585ca02815Sjsg /*
14595ca02815Sjsg * This is not a documented workaround, but rather an optimization
14605ca02815Sjsg * to reduce sampler power.
14615ca02815Sjsg */
1462f005ef32Sjsg wa_mcr_write_clr(wal, GEN10_DFR_RATIO_EN_AND_CHICKEN, DFR_DISABLE);
14635ca02815Sjsg }
14645ca02815Sjsg
14655ca02815Sjsg /*
14665ca02815Sjsg * Though there are per-engine instances of these registers,
14675ca02815Sjsg * they retain their value through engine resets and should
14685ca02815Sjsg * only be provided on the GT workaround list rather than
14695ca02815Sjsg * the engine-specific workaround list.
14705ca02815Sjsg */
14715ca02815Sjsg static void
wa_14011060649(struct intel_gt * gt,struct i915_wa_list * wal)14721bb76ff1Sjsg wa_14011060649(struct intel_gt *gt, struct i915_wa_list *wal)
14735ca02815Sjsg {
14745ca02815Sjsg struct intel_engine_cs *engine;
14755ca02815Sjsg int id;
14765ca02815Sjsg
14775ca02815Sjsg for_each_engine(engine, gt, id) {
14785ca02815Sjsg if (engine->class != VIDEO_DECODE_CLASS ||
14795ca02815Sjsg (engine->instance % 2))
14805ca02815Sjsg continue;
14815ca02815Sjsg
14825ca02815Sjsg wa_write_or(wal, VDBOX_CGCTL3F10(engine->mmio_base),
14835ca02815Sjsg IECPUNIT_CLKGATE_DIS);
1484c349dbc7Sjsg }
1485ad8b1aafSjsg }
1486ad8b1aafSjsg
1487ad8b1aafSjsg static void
gen12_gt_workarounds_init(struct intel_gt * gt,struct i915_wa_list * wal)14881bb76ff1Sjsg gen12_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
1489ad8b1aafSjsg {
14901bb76ff1Sjsg icl_wa_init_mcr(gt, wal);
14915ca02815Sjsg
14925ca02815Sjsg /* Wa_14011060649:tgl,rkl,dg1,adl-s,adl-p */
14931bb76ff1Sjsg wa_14011060649(gt, wal);
14945ca02815Sjsg
14955ca02815Sjsg /* Wa_14011059788:tgl,rkl,adl-s,dg1,adl-p */
1496f005ef32Sjsg wa_mcr_write_or(wal, GEN10_DFR_RATIO_EN_AND_CHICKEN, DFR_DISABLE);
1497c349dbc7Sjsg
1498f005ef32Sjsg /*
1499f005ef32Sjsg * Wa_14015795083
1500f005ef32Sjsg *
1501f005ef32Sjsg * Firmware on some gen12 platforms locks the MISCCPCTL register,
1502f005ef32Sjsg * preventing i915 from modifying it for this workaround. Skip the
1503f005ef32Sjsg * readback verification for this workaround on debug builds; if the
1504f005ef32Sjsg * workaround doesn't stick due to firmware behavior, it's not an error
1505f005ef32Sjsg * that we want CI to flag.
1506f005ef32Sjsg */
1507f005ef32Sjsg wa_add(wal, GEN7_MISCCPCTL, GEN12_DOP_CLOCK_GATE_RENDER_ENABLE,
1508f005ef32Sjsg 0, 0, false);
15095ca02815Sjsg }
15105ca02815Sjsg
15115ca02815Sjsg static void
dg1_gt_workarounds_init(struct intel_gt * gt,struct i915_wa_list * wal)15121bb76ff1Sjsg dg1_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
15135ca02815Sjsg {
15141bb76ff1Sjsg gen12_gt_workarounds_init(gt, wal);
15155ca02815Sjsg
15165ca02815Sjsg /* Wa_1409420604:dg1 */
1517f005ef32Sjsg wa_mcr_write_or(wal, SUBSLICE_UNIT_LEVEL_CLKGATE2,
15185ca02815Sjsg CPSSUNIT_CLKGATE_DIS);
15195ca02815Sjsg
15205ca02815Sjsg /* Wa_1408615072:dg1 */
15215ca02815Sjsg /* Empirical testing shows this register is unaffected by engine reset. */
1522f005ef32Sjsg wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE2, VSUNIT_CLKGATE_DIS_TGL);
15235ca02815Sjsg }
15245ca02815Sjsg
15255ca02815Sjsg static void
xehpsdv_gt_workarounds_init(struct intel_gt * gt,struct i915_wa_list * wal)15261bb76ff1Sjsg xehpsdv_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
15275ca02815Sjsg {
15281bb76ff1Sjsg struct drm_i915_private *i915 = gt->i915;
15291bb76ff1Sjsg
15301bb76ff1Sjsg xehp_init_mcr(gt, wal);
15311bb76ff1Sjsg
15321bb76ff1Sjsg /* Wa_1409757795:xehpsdv */
1533f005ef32Sjsg wa_mcr_write_or(wal, SCCGCTL94DC, CG3DDISURB);
1534f005ef32Sjsg
1535f005ef32Sjsg /* Wa_18011725039:xehpsdv */
1536f005ef32Sjsg if (IS_XEHPSDV_GRAPHICS_STEP(i915, STEP_A1, STEP_B0)) {
1537f005ef32Sjsg wa_mcr_masked_dis(wal, MLTICTXCTL, TDONRENDER);
1538f005ef32Sjsg wa_mcr_write_or(wal, L3SQCREG1_CCS0, FLUSHALLNONCOH);
1539f005ef32Sjsg }
15401bb76ff1Sjsg
15411bb76ff1Sjsg /* Wa_16011155590:xehpsdv */
15421bb76ff1Sjsg if (IS_XEHPSDV_GRAPHICS_STEP(i915, STEP_A0, STEP_B0))
15431bb76ff1Sjsg wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE,
15441bb76ff1Sjsg TSGUNIT_CLKGATE_DIS);
15451bb76ff1Sjsg
15461bb76ff1Sjsg /* Wa_14011780169:xehpsdv */
15471bb76ff1Sjsg if (IS_XEHPSDV_GRAPHICS_STEP(i915, STEP_B0, STEP_FOREVER)) {
15481bb76ff1Sjsg wa_write_or(wal, UNSLCGCTL9440, GAMTLBOACS_CLKGATE_DIS |
15491bb76ff1Sjsg GAMTLBVDBOX7_CLKGATE_DIS |
15501bb76ff1Sjsg GAMTLBVDBOX6_CLKGATE_DIS |
15511bb76ff1Sjsg GAMTLBVDBOX5_CLKGATE_DIS |
15521bb76ff1Sjsg GAMTLBVDBOX4_CLKGATE_DIS |
15531bb76ff1Sjsg GAMTLBVDBOX3_CLKGATE_DIS |
15541bb76ff1Sjsg GAMTLBVDBOX2_CLKGATE_DIS |
15551bb76ff1Sjsg GAMTLBVDBOX1_CLKGATE_DIS |
15561bb76ff1Sjsg GAMTLBVDBOX0_CLKGATE_DIS |
15571bb76ff1Sjsg GAMTLBKCR_CLKGATE_DIS |
15581bb76ff1Sjsg GAMTLBGUC_CLKGATE_DIS |
15591bb76ff1Sjsg GAMTLBBLT_CLKGATE_DIS);
15601bb76ff1Sjsg wa_write_or(wal, UNSLCGCTL9444, GAMTLBGFXA0_CLKGATE_DIS |
15611bb76ff1Sjsg GAMTLBGFXA1_CLKGATE_DIS |
15621bb76ff1Sjsg GAMTLBCOMPA0_CLKGATE_DIS |
15631bb76ff1Sjsg GAMTLBCOMPA1_CLKGATE_DIS |
15641bb76ff1Sjsg GAMTLBCOMPB0_CLKGATE_DIS |
15651bb76ff1Sjsg GAMTLBCOMPB1_CLKGATE_DIS |
15661bb76ff1Sjsg GAMTLBCOMPC0_CLKGATE_DIS |
15671bb76ff1Sjsg GAMTLBCOMPC1_CLKGATE_DIS |
15681bb76ff1Sjsg GAMTLBCOMPD0_CLKGATE_DIS |
15691bb76ff1Sjsg GAMTLBCOMPD1_CLKGATE_DIS |
15701bb76ff1Sjsg GAMTLBMERT_CLKGATE_DIS |
15711bb76ff1Sjsg GAMTLBVEBOX3_CLKGATE_DIS |
15721bb76ff1Sjsg GAMTLBVEBOX2_CLKGATE_DIS |
15731bb76ff1Sjsg GAMTLBVEBOX1_CLKGATE_DIS |
15741bb76ff1Sjsg GAMTLBVEBOX0_CLKGATE_DIS);
15751bb76ff1Sjsg }
15761bb76ff1Sjsg
15771bb76ff1Sjsg /* Wa_16012725990:xehpsdv */
15781bb76ff1Sjsg if (IS_XEHPSDV_GRAPHICS_STEP(i915, STEP_A1, STEP_FOREVER))
15791bb76ff1Sjsg wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE, VFUNIT_CLKGATE_DIS);
15801bb76ff1Sjsg
15811bb76ff1Sjsg /* Wa_14011060649:xehpsdv */
15821bb76ff1Sjsg wa_14011060649(gt, wal);
1583f005ef32Sjsg
1584f005ef32Sjsg /* Wa_14012362059:xehpsdv */
1585f005ef32Sjsg wa_mcr_write_or(wal, XEHP_MERT_MOD_CTRL, FORCE_MISS_FTLB);
1586f005ef32Sjsg
1587f005ef32Sjsg /* Wa_14014368820:xehpsdv */
1588f005ef32Sjsg wa_mcr_write_or(wal, XEHP_GAMCNTRL_CTRL,
1589f005ef32Sjsg INVALIDATION_BROADCAST_MODE_DIS | GLOBAL_INVALIDATION_MODE);
1590f005ef32Sjsg
1591f005ef32Sjsg /* Wa_14010670810:xehpsdv */
1592f005ef32Sjsg wa_mcr_write_or(wal, XEHP_L3NODEARBCFG, XEHP_LNESPARE);
1593c349dbc7Sjsg }
1594c349dbc7Sjsg
1595c349dbc7Sjsg static void
dg2_gt_workarounds_init(struct intel_gt * gt,struct i915_wa_list * wal)15961bb76ff1Sjsg dg2_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
1597c349dbc7Sjsg {
15981bb76ff1Sjsg xehp_init_mcr(gt, wal);
15991bb76ff1Sjsg
16001bb76ff1Sjsg /* Wa_14011060649:dg2 */
16011bb76ff1Sjsg wa_14011060649(gt, wal);
16021bb76ff1Sjsg
16031bb76ff1Sjsg if (IS_DG2_G10(gt->i915)) {
16041bb76ff1Sjsg /* Wa_22010523718:dg2 */
16051bb76ff1Sjsg wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE,
16061bb76ff1Sjsg CG3DDISCFEG_CLKGATE_DIS);
16071bb76ff1Sjsg
16081bb76ff1Sjsg /* Wa_14011006942:dg2 */
1609f005ef32Sjsg wa_mcr_write_or(wal, GEN11_SUBSLICE_UNIT_LEVEL_CLKGATE,
16101bb76ff1Sjsg DSS_ROUTER_CLKGATE_DIS);
16111bb76ff1Sjsg }
16121bb76ff1Sjsg
16131bb76ff1Sjsg /* Wa_14014830051:dg2 */
1614f005ef32Sjsg wa_mcr_write_clr(wal, SARB_CHICKEN1, COMP_CKN_IN);
16151bb76ff1Sjsg
16161bb76ff1Sjsg /* Wa_14015795083 */
16171bb76ff1Sjsg wa_write_clr(wal, GEN7_MISCCPCTL, GEN12_DOP_CLOCK_GATE_RENDER_ENABLE);
1618f005ef32Sjsg
1619f005ef32Sjsg /* Wa_18018781329 */
1620f005ef32Sjsg wa_mcr_write_or(wal, RENDER_MOD_CTRL, FORCE_MISS_FTLB);
1621f005ef32Sjsg wa_mcr_write_or(wal, COMP_MOD_CTRL, FORCE_MISS_FTLB);
1622f005ef32Sjsg wa_mcr_write_or(wal, XEHP_VDBX_MOD_CTRL, FORCE_MISS_FTLB);
1623f005ef32Sjsg wa_mcr_write_or(wal, XEHP_VEBX_MOD_CTRL, FORCE_MISS_FTLB);
1624f005ef32Sjsg
1625f005ef32Sjsg /* Wa_1509235366:dg2 */
1626f005ef32Sjsg wa_mcr_write_or(wal, XEHP_GAMCNTRL_CTRL,
1627f005ef32Sjsg INVALIDATION_BROADCAST_MODE_DIS | GLOBAL_INVALIDATION_MODE);
1628f005ef32Sjsg
1629f005ef32Sjsg /* Wa_14010648519:dg2 */
1630f005ef32Sjsg wa_mcr_write_or(wal, XEHP_L3NODEARBCFG, XEHP_LNESPARE);
16311bb76ff1Sjsg }
16321bb76ff1Sjsg
16331bb76ff1Sjsg static void
pvc_gt_workarounds_init(struct intel_gt * gt,struct i915_wa_list * wal)16341bb76ff1Sjsg pvc_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
16351bb76ff1Sjsg {
16361bb76ff1Sjsg pvc_init_mcr(gt, wal);
16371bb76ff1Sjsg
16381bb76ff1Sjsg /* Wa_14015795083 */
16391bb76ff1Sjsg wa_write_clr(wal, GEN7_MISCCPCTL, GEN12_DOP_CLOCK_GATE_RENDER_ENABLE);
1640f005ef32Sjsg
1641f005ef32Sjsg /* Wa_18018781329 */
1642f005ef32Sjsg wa_mcr_write_or(wal, RENDER_MOD_CTRL, FORCE_MISS_FTLB);
1643f005ef32Sjsg wa_mcr_write_or(wal, COMP_MOD_CTRL, FORCE_MISS_FTLB);
1644f005ef32Sjsg wa_mcr_write_or(wal, XEHP_VDBX_MOD_CTRL, FORCE_MISS_FTLB);
1645f005ef32Sjsg wa_mcr_write_or(wal, XEHP_VEBX_MOD_CTRL, FORCE_MISS_FTLB);
1646f005ef32Sjsg
1647f005ef32Sjsg /* Wa_16016694945 */
1648f005ef32Sjsg wa_mcr_masked_en(wal, XEHPC_LNCFMISCCFGREG0, XEHPC_OVRLSCCC);
1649f005ef32Sjsg }
1650f005ef32Sjsg
1651f005ef32Sjsg static void
xelpg_gt_workarounds_init(struct intel_gt * gt,struct i915_wa_list * wal)1652f005ef32Sjsg xelpg_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
1653f005ef32Sjsg {
16546e5fdd49Sjsg /* Wa_14018575942 / Wa_18018781329 */
165585669ef8Sjsg wa_mcr_write_or(wal, RENDER_MOD_CTRL, FORCE_MISS_FTLB);
1656f005ef32Sjsg wa_mcr_write_or(wal, COMP_MOD_CTRL, FORCE_MISS_FTLB);
1657f005ef32Sjsg
1658f005ef32Sjsg /* Wa_22016670082 */
1659f005ef32Sjsg wa_write_or(wal, GEN12_SQCNT1, GEN12_STRICT_RAR_ENABLE);
1660f005ef32Sjsg
1661596b6869Sjsg if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) ||
1662596b6869Sjsg IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0)) {
1663f005ef32Sjsg /* Wa_14014830051 */
1664f005ef32Sjsg wa_mcr_write_clr(wal, SARB_CHICKEN1, COMP_CKN_IN);
1665f005ef32Sjsg
1666f005ef32Sjsg /* Wa_14015795083 */
1667f005ef32Sjsg wa_write_clr(wal, GEN7_MISCCPCTL, GEN12_DOP_CLOCK_GATE_RENDER_ENABLE);
1668f005ef32Sjsg }
1669f005ef32Sjsg
1670f005ef32Sjsg /*
1671f005ef32Sjsg * Unlike older platforms, we no longer setup implicit steering here;
1672f005ef32Sjsg * all MCR accesses are explicitly steered.
1673f005ef32Sjsg */
1674f005ef32Sjsg debug_dump_steering(gt);
1675f005ef32Sjsg }
1676f005ef32Sjsg
1677f005ef32Sjsg static void
xelpmp_gt_workarounds_init(struct intel_gt * gt,struct i915_wa_list * wal)1678f005ef32Sjsg xelpmp_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
1679f005ef32Sjsg {
1680f005ef32Sjsg /*
1681f005ef32Sjsg * Wa_14018778641
1682f005ef32Sjsg * Wa_18018781329
1683f005ef32Sjsg *
1684f005ef32Sjsg * Note that although these registers are MCR on the primary
1685f005ef32Sjsg * GT, the media GT's versions are regular singleton registers.
1686f005ef32Sjsg */
1687f005ef32Sjsg wa_write_or(wal, XELPMP_GSC_MOD_CTRL, FORCE_MISS_FTLB);
1688f005ef32Sjsg
1689f005ef32Sjsg debug_dump_steering(gt);
1690f005ef32Sjsg }
1691f005ef32Sjsg
1692f005ef32Sjsg /*
1693f005ef32Sjsg * The bspec performance guide has recommended MMIO tuning settings. These
1694f005ef32Sjsg * aren't truly "workarounds" but we want to program them through the
1695f005ef32Sjsg * workaround infrastructure to make sure they're (re)applied at the proper
1696f005ef32Sjsg * times.
1697f005ef32Sjsg *
1698f005ef32Sjsg * The programming in this function is for settings that persist through
1699f005ef32Sjsg * engine resets and also are not part of any engine's register state context.
1700f005ef32Sjsg * I.e., settings that only need to be re-applied in the event of a full GT
1701f005ef32Sjsg * reset.
1702f005ef32Sjsg */
gt_tuning_settings(struct intel_gt * gt,struct i915_wa_list * wal)1703f005ef32Sjsg static void gt_tuning_settings(struct intel_gt *gt, struct i915_wa_list *wal)
1704f005ef32Sjsg {
17056e5fdd49Sjsg if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 74))) {
1706f005ef32Sjsg wa_mcr_write_or(wal, XEHP_L3SCQREG7, BLEND_FILL_CACHING_OPT_DIS);
1707f005ef32Sjsg wa_mcr_write_or(wal, XEHP_SQCM, EN_32B_ACCESS);
1708f005ef32Sjsg }
1709f005ef32Sjsg
1710f005ef32Sjsg if (IS_PONTEVECCHIO(gt->i915)) {
1711f005ef32Sjsg wa_mcr_write(wal, XEHPC_L3SCRUB,
1712f005ef32Sjsg SCRUB_CL_DWNGRADE_SHARED | SCRUB_RATE_4B_PER_CLK);
1713f005ef32Sjsg wa_mcr_masked_en(wal, XEHPC_LNCFMISCCFGREG0, XEHPC_HOSTCACHEEN);
1714f005ef32Sjsg }
1715f005ef32Sjsg
1716f005ef32Sjsg if (IS_DG2(gt->i915)) {
1717f005ef32Sjsg wa_mcr_write_or(wal, XEHP_L3SCQREG7, BLEND_FILL_CACHING_OPT_DIS);
1718f005ef32Sjsg wa_mcr_write_or(wal, XEHP_SQCM, EN_32B_ACCESS);
1719f005ef32Sjsg }
17201bb76ff1Sjsg }
17211bb76ff1Sjsg
17221bb76ff1Sjsg static void
gt_init_workarounds(struct intel_gt * gt,struct i915_wa_list * wal)17231bb76ff1Sjsg gt_init_workarounds(struct intel_gt *gt, struct i915_wa_list *wal)
17241bb76ff1Sjsg {
17251bb76ff1Sjsg struct drm_i915_private *i915 = gt->i915;
17261bb76ff1Sjsg
1727f005ef32Sjsg gt_tuning_settings(gt, wal);
1728f005ef32Sjsg
1729f005ef32Sjsg if (gt->type == GT_MEDIA) {
1730f005ef32Sjsg if (MEDIA_VER(i915) >= 13)
1731f005ef32Sjsg xelpmp_gt_workarounds_init(gt, wal);
1732f005ef32Sjsg else
1733f005ef32Sjsg MISSING_CASE(MEDIA_VER(i915));
1734f005ef32Sjsg
1735f005ef32Sjsg return;
1736f005ef32Sjsg }
1737f005ef32Sjsg
17386e5fdd49Sjsg if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 74)))
1739f005ef32Sjsg xelpg_gt_workarounds_init(gt, wal);
1740f005ef32Sjsg else if (IS_PONTEVECCHIO(i915))
17411bb76ff1Sjsg pvc_gt_workarounds_init(gt, wal);
17421bb76ff1Sjsg else if (IS_DG2(i915))
17431bb76ff1Sjsg dg2_gt_workarounds_init(gt, wal);
17441bb76ff1Sjsg else if (IS_XEHPSDV(i915))
17451bb76ff1Sjsg xehpsdv_gt_workarounds_init(gt, wal);
17465ca02815Sjsg else if (IS_DG1(i915))
17471bb76ff1Sjsg dg1_gt_workarounds_init(gt, wal);
17485ca02815Sjsg else if (GRAPHICS_VER(i915) == 12)
17491bb76ff1Sjsg gen12_gt_workarounds_init(gt, wal);
17505ca02815Sjsg else if (GRAPHICS_VER(i915) == 11)
17511bb76ff1Sjsg icl_gt_workarounds_init(gt, wal);
1752ad8b1aafSjsg else if (IS_COFFEELAKE(i915) || IS_COMETLAKE(i915))
17531bb76ff1Sjsg cfl_gt_workarounds_init(gt, wal);
1754c349dbc7Sjsg else if (IS_GEMINILAKE(i915))
17551bb76ff1Sjsg glk_gt_workarounds_init(gt, wal);
1756c349dbc7Sjsg else if (IS_KABYLAKE(i915))
17571bb76ff1Sjsg kbl_gt_workarounds_init(gt, wal);
1758c349dbc7Sjsg else if (IS_BROXTON(i915))
17591bb76ff1Sjsg gen9_gt_workarounds_init(gt, wal);
1760c349dbc7Sjsg else if (IS_SKYLAKE(i915))
17611bb76ff1Sjsg skl_gt_workarounds_init(gt, wal);
17622b49ea17Sjsg else if (IS_HASWELL(i915))
17631bb76ff1Sjsg hsw_gt_workarounds_init(gt, wal);
176444957862Sjsg else if (IS_VALLEYVIEW(i915))
17651bb76ff1Sjsg vlv_gt_workarounds_init(gt, wal);
17662f562361Sjsg else if (IS_IVYBRIDGE(i915))
17671bb76ff1Sjsg ivb_gt_workarounds_init(gt, wal);
17685ca02815Sjsg else if (GRAPHICS_VER(i915) == 6)
17691bb76ff1Sjsg snb_gt_workarounds_init(gt, wal);
17705ca02815Sjsg else if (GRAPHICS_VER(i915) == 5)
17711bb76ff1Sjsg ilk_gt_workarounds_init(gt, wal);
1772e2630e85Sjsg else if (IS_G4X(i915))
17731bb76ff1Sjsg g4x_gt_workarounds_init(gt, wal);
17745ca02815Sjsg else if (GRAPHICS_VER(i915) == 4)
17751bb76ff1Sjsg gen4_gt_workarounds_init(gt, wal);
17765ca02815Sjsg else if (GRAPHICS_VER(i915) <= 8)
17775ca02815Sjsg ;
1778c349dbc7Sjsg else
17795ca02815Sjsg MISSING_CASE(GRAPHICS_VER(i915));
1780c349dbc7Sjsg }
1781c349dbc7Sjsg
intel_gt_init_workarounds(struct intel_gt * gt)17821bb76ff1Sjsg void intel_gt_init_workarounds(struct intel_gt *gt)
1783c349dbc7Sjsg {
17841bb76ff1Sjsg struct i915_wa_list *wal = >->wa_list;
1785c349dbc7Sjsg
1786f005ef32Sjsg wa_init_start(wal, gt, "GT", "global");
17871bb76ff1Sjsg gt_init_workarounds(gt, wal);
1788c349dbc7Sjsg wa_init_finish(wal);
1789c349dbc7Sjsg }
1790c349dbc7Sjsg
1791c349dbc7Sjsg static bool
wa_verify(struct intel_gt * gt,const struct i915_wa * wa,u32 cur,const char * name,const char * from)1792f005ef32Sjsg wa_verify(struct intel_gt *gt, const struct i915_wa *wa, u32 cur,
1793f005ef32Sjsg const char *name, const char *from)
1794c349dbc7Sjsg {
1795c349dbc7Sjsg if ((cur ^ wa->set) & wa->read) {
1796f005ef32Sjsg drm_err(>->i915->drm,
1797f005ef32Sjsg "%s workaround lost on %s! (reg[%x]=0x%x, relevant bits were 0x%x vs expected 0x%x)\n",
1798c349dbc7Sjsg name, from, i915_mmio_reg_offset(wa->reg),
17995ca02815Sjsg cur, cur & wa->read, wa->set & wa->read);
1800c349dbc7Sjsg
1801c349dbc7Sjsg return false;
1802c349dbc7Sjsg }
1803c349dbc7Sjsg
1804c349dbc7Sjsg return true;
1805c349dbc7Sjsg }
1806c349dbc7Sjsg
wa_list_apply(const struct i915_wa_list * wal)1807f005ef32Sjsg static void wa_list_apply(const struct i915_wa_list *wal)
1808c349dbc7Sjsg {
1809f005ef32Sjsg struct intel_gt *gt = wal->gt;
18105ca02815Sjsg struct intel_uncore *uncore = gt->uncore;
1811c349dbc7Sjsg enum forcewake_domains fw;
1812c349dbc7Sjsg unsigned long flags;
1813c349dbc7Sjsg struct i915_wa *wa;
1814c349dbc7Sjsg unsigned int i;
1815c349dbc7Sjsg
1816c349dbc7Sjsg if (!wal->count)
1817c349dbc7Sjsg return;
1818c349dbc7Sjsg
1819c349dbc7Sjsg fw = wal_get_fw_for_rmw(uncore, wal);
1820c349dbc7Sjsg
1821f005ef32Sjsg intel_gt_mcr_lock(gt, &flags);
1822f005ef32Sjsg spin_lock(&uncore->lock);
1823c349dbc7Sjsg intel_uncore_forcewake_get__locked(uncore, fw);
1824c349dbc7Sjsg
1825c349dbc7Sjsg for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
18265ca02815Sjsg u32 val, old = 0;
18275ca02815Sjsg
18285ca02815Sjsg /* open-coded rmw due to steering */
1829f005ef32Sjsg if (wa->clr)
1830f005ef32Sjsg old = wa->is_mcr ?
1831f005ef32Sjsg intel_gt_mcr_read_any_fw(gt, wa->mcr_reg) :
1832f005ef32Sjsg intel_uncore_read_fw(uncore, wa->reg);
18335ca02815Sjsg val = (old & ~wa->clr) | wa->set;
1834f005ef32Sjsg if (val != old || !wa->clr) {
1835f005ef32Sjsg if (wa->is_mcr)
1836f005ef32Sjsg intel_gt_mcr_multicast_write_fw(gt, wa->mcr_reg, val);
1837f005ef32Sjsg else
18385ca02815Sjsg intel_uncore_write_fw(uncore, wa->reg, val);
1839f005ef32Sjsg }
18405ca02815Sjsg
1841f005ef32Sjsg if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) {
1842f005ef32Sjsg u32 val = wa->is_mcr ?
1843f005ef32Sjsg intel_gt_mcr_read_any_fw(gt, wa->mcr_reg) :
1844f005ef32Sjsg intel_uncore_read_fw(uncore, wa->reg);
1845f005ef32Sjsg
1846f005ef32Sjsg wa_verify(gt, wa, val, wal->name, "application");
1847f005ef32Sjsg }
1848c349dbc7Sjsg }
1849c349dbc7Sjsg
1850c349dbc7Sjsg intel_uncore_forcewake_put__locked(uncore, fw);
1851f005ef32Sjsg spin_unlock(&uncore->lock);
1852f005ef32Sjsg intel_gt_mcr_unlock(gt, flags);
1853c349dbc7Sjsg }
1854c349dbc7Sjsg
intel_gt_apply_workarounds(struct intel_gt * gt)1855c349dbc7Sjsg void intel_gt_apply_workarounds(struct intel_gt *gt)
1856c349dbc7Sjsg {
1857f005ef32Sjsg wa_list_apply(>->wa_list);
1858c349dbc7Sjsg }
1859c349dbc7Sjsg
wa_list_verify(struct intel_gt * gt,const struct i915_wa_list * wal,const char * from)18605ca02815Sjsg static bool wa_list_verify(struct intel_gt *gt,
1861c349dbc7Sjsg const struct i915_wa_list *wal,
1862c349dbc7Sjsg const char *from)
1863c349dbc7Sjsg {
18645ca02815Sjsg struct intel_uncore *uncore = gt->uncore;
1865c349dbc7Sjsg struct i915_wa *wa;
18665ca02815Sjsg enum forcewake_domains fw;
18675ca02815Sjsg unsigned long flags;
1868c349dbc7Sjsg unsigned int i;
1869c349dbc7Sjsg bool ok = true;
1870c349dbc7Sjsg
18715ca02815Sjsg fw = wal_get_fw_for_rmw(uncore, wal);
18725ca02815Sjsg
1873f005ef32Sjsg intel_gt_mcr_lock(gt, &flags);
1874f005ef32Sjsg spin_lock(&uncore->lock);
18755ca02815Sjsg intel_uncore_forcewake_get__locked(uncore, fw);
18765ca02815Sjsg
1877c349dbc7Sjsg for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
1878f005ef32Sjsg ok &= wa_verify(wal->gt, wa, wa->is_mcr ?
1879f005ef32Sjsg intel_gt_mcr_read_any_fw(gt, wa->mcr_reg) :
1880f005ef32Sjsg intel_uncore_read_fw(uncore, wa->reg),
1881c349dbc7Sjsg wal->name, from);
1882c349dbc7Sjsg
18835ca02815Sjsg intel_uncore_forcewake_put__locked(uncore, fw);
1884f005ef32Sjsg spin_unlock(&uncore->lock);
1885f005ef32Sjsg intel_gt_mcr_unlock(gt, flags);
18865ca02815Sjsg
1887c349dbc7Sjsg return ok;
1888c349dbc7Sjsg }
1889c349dbc7Sjsg
intel_gt_verify_workarounds(struct intel_gt * gt,const char * from)1890c349dbc7Sjsg bool intel_gt_verify_workarounds(struct intel_gt *gt, const char *from)
1891c349dbc7Sjsg {
18921bb76ff1Sjsg return wa_list_verify(gt, >->wa_list, from);
1893c349dbc7Sjsg }
1894c349dbc7Sjsg
18955ca02815Sjsg __maybe_unused
is_nonpriv_flags_valid(u32 flags)18965ca02815Sjsg static bool is_nonpriv_flags_valid(u32 flags)
1897c349dbc7Sjsg {
1898c349dbc7Sjsg /* Check only valid flag bits are set */
1899c349dbc7Sjsg if (flags & ~RING_FORCE_TO_NONPRIV_MASK_VALID)
1900c349dbc7Sjsg return false;
1901c349dbc7Sjsg
1902c349dbc7Sjsg /* NB: Only 3 out of 4 enum values are valid for access field */
1903c349dbc7Sjsg if ((flags & RING_FORCE_TO_NONPRIV_ACCESS_MASK) ==
1904c349dbc7Sjsg RING_FORCE_TO_NONPRIV_ACCESS_INVALID)
1905c349dbc7Sjsg return false;
1906c349dbc7Sjsg
1907c349dbc7Sjsg return true;
1908c349dbc7Sjsg }
1909c349dbc7Sjsg
1910c349dbc7Sjsg static void
whitelist_reg_ext(struct i915_wa_list * wal,i915_reg_t reg,u32 flags)1911c349dbc7Sjsg whitelist_reg_ext(struct i915_wa_list *wal, i915_reg_t reg, u32 flags)
1912c349dbc7Sjsg {
1913c349dbc7Sjsg struct i915_wa wa = {
1914c349dbc7Sjsg .reg = reg
1915c349dbc7Sjsg };
1916c349dbc7Sjsg
1917c349dbc7Sjsg if (GEM_DEBUG_WARN_ON(wal->count >= RING_MAX_NONPRIV_SLOTS))
1918c349dbc7Sjsg return;
1919c349dbc7Sjsg
1920c349dbc7Sjsg if (GEM_DEBUG_WARN_ON(!is_nonpriv_flags_valid(flags)))
1921c349dbc7Sjsg return;
1922c349dbc7Sjsg
1923c349dbc7Sjsg wa.reg.reg |= flags;
1924c349dbc7Sjsg _wa_add(wal, &wa);
1925c349dbc7Sjsg }
1926c349dbc7Sjsg
1927c349dbc7Sjsg static void
whitelist_mcr_reg_ext(struct i915_wa_list * wal,i915_mcr_reg_t reg,u32 flags)1928f005ef32Sjsg whitelist_mcr_reg_ext(struct i915_wa_list *wal, i915_mcr_reg_t reg, u32 flags)
1929f005ef32Sjsg {
1930f005ef32Sjsg struct i915_wa wa = {
1931f005ef32Sjsg .mcr_reg = reg,
1932f005ef32Sjsg .is_mcr = 1,
1933f005ef32Sjsg };
1934f005ef32Sjsg
1935f005ef32Sjsg if (GEM_DEBUG_WARN_ON(wal->count >= RING_MAX_NONPRIV_SLOTS))
1936f005ef32Sjsg return;
1937f005ef32Sjsg
1938f005ef32Sjsg if (GEM_DEBUG_WARN_ON(!is_nonpriv_flags_valid(flags)))
1939f005ef32Sjsg return;
1940f005ef32Sjsg
1941f005ef32Sjsg wa.mcr_reg.reg |= flags;
1942f005ef32Sjsg _wa_add(wal, &wa);
1943f005ef32Sjsg }
1944f005ef32Sjsg
1945f005ef32Sjsg static void
whitelist_reg(struct i915_wa_list * wal,i915_reg_t reg)1946c349dbc7Sjsg whitelist_reg(struct i915_wa_list *wal, i915_reg_t reg)
1947c349dbc7Sjsg {
1948c349dbc7Sjsg whitelist_reg_ext(wal, reg, RING_FORCE_TO_NONPRIV_ACCESS_RW);
1949c349dbc7Sjsg }
1950c349dbc7Sjsg
1951f005ef32Sjsg static void
whitelist_mcr_reg(struct i915_wa_list * wal,i915_mcr_reg_t reg)1952f005ef32Sjsg whitelist_mcr_reg(struct i915_wa_list *wal, i915_mcr_reg_t reg)
1953f005ef32Sjsg {
1954f005ef32Sjsg whitelist_mcr_reg_ext(wal, reg, RING_FORCE_TO_NONPRIV_ACCESS_RW);
1955f005ef32Sjsg }
1956f005ef32Sjsg
gen9_whitelist_build(struct i915_wa_list * w)1957c349dbc7Sjsg static void gen9_whitelist_build(struct i915_wa_list *w)
1958c349dbc7Sjsg {
1959c349dbc7Sjsg /* WaVFEStateAfterPipeControlwithMediaStateClear:skl,bxt,glk,cfl */
1960c349dbc7Sjsg whitelist_reg(w, GEN9_CTX_PREEMPT_REG);
1961c349dbc7Sjsg
1962c349dbc7Sjsg /* WaEnablePreemptionGranularityControlByUMD:skl,bxt,kbl,cfl,[cnl] */
1963c349dbc7Sjsg whitelist_reg(w, GEN8_CS_CHICKEN1);
1964c349dbc7Sjsg
1965c349dbc7Sjsg /* WaAllowUMDToModifyHDCChicken1:skl,bxt,kbl,glk,cfl */
1966c349dbc7Sjsg whitelist_reg(w, GEN8_HDC_CHICKEN1);
1967c349dbc7Sjsg
1968c349dbc7Sjsg /* WaSendPushConstantsFromMMIO:skl,bxt */
1969c349dbc7Sjsg whitelist_reg(w, COMMON_SLICE_CHICKEN2);
1970c349dbc7Sjsg }
1971c349dbc7Sjsg
skl_whitelist_build(struct intel_engine_cs * engine)1972c349dbc7Sjsg static void skl_whitelist_build(struct intel_engine_cs *engine)
1973c349dbc7Sjsg {
1974c349dbc7Sjsg struct i915_wa_list *w = &engine->whitelist;
1975c349dbc7Sjsg
1976c349dbc7Sjsg if (engine->class != RENDER_CLASS)
1977c349dbc7Sjsg return;
1978c349dbc7Sjsg
1979c349dbc7Sjsg gen9_whitelist_build(w);
1980c349dbc7Sjsg
1981c349dbc7Sjsg /* WaDisableLSQCROPERFforOCL:skl */
1982f005ef32Sjsg whitelist_mcr_reg(w, GEN8_L3SQCREG4);
1983c349dbc7Sjsg }
1984c349dbc7Sjsg
bxt_whitelist_build(struct intel_engine_cs * engine)1985c349dbc7Sjsg static void bxt_whitelist_build(struct intel_engine_cs *engine)
1986c349dbc7Sjsg {
1987c349dbc7Sjsg if (engine->class != RENDER_CLASS)
1988c349dbc7Sjsg return;
1989c349dbc7Sjsg
1990c349dbc7Sjsg gen9_whitelist_build(&engine->whitelist);
1991c349dbc7Sjsg }
1992c349dbc7Sjsg
kbl_whitelist_build(struct intel_engine_cs * engine)1993c349dbc7Sjsg static void kbl_whitelist_build(struct intel_engine_cs *engine)
1994c349dbc7Sjsg {
1995c349dbc7Sjsg struct i915_wa_list *w = &engine->whitelist;
1996c349dbc7Sjsg
1997c349dbc7Sjsg if (engine->class != RENDER_CLASS)
1998c349dbc7Sjsg return;
1999c349dbc7Sjsg
2000c349dbc7Sjsg gen9_whitelist_build(w);
2001c349dbc7Sjsg
2002c349dbc7Sjsg /* WaDisableLSQCROPERFforOCL:kbl */
2003f005ef32Sjsg whitelist_mcr_reg(w, GEN8_L3SQCREG4);
2004c349dbc7Sjsg }
2005c349dbc7Sjsg
glk_whitelist_build(struct intel_engine_cs * engine)2006c349dbc7Sjsg static void glk_whitelist_build(struct intel_engine_cs *engine)
2007c349dbc7Sjsg {
2008c349dbc7Sjsg struct i915_wa_list *w = &engine->whitelist;
2009c349dbc7Sjsg
2010c349dbc7Sjsg if (engine->class != RENDER_CLASS)
2011c349dbc7Sjsg return;
2012c349dbc7Sjsg
2013c349dbc7Sjsg gen9_whitelist_build(w);
2014c349dbc7Sjsg
2015c349dbc7Sjsg /* WA #0862: Userspace has to set "Barrier Mode" to avoid hangs. */
2016c349dbc7Sjsg whitelist_reg(w, GEN9_SLICE_COMMON_ECO_CHICKEN1);
2017c349dbc7Sjsg }
2018c349dbc7Sjsg
cfl_whitelist_build(struct intel_engine_cs * engine)2019c349dbc7Sjsg static void cfl_whitelist_build(struct intel_engine_cs *engine)
2020c349dbc7Sjsg {
2021c349dbc7Sjsg struct i915_wa_list *w = &engine->whitelist;
2022c349dbc7Sjsg
2023c349dbc7Sjsg if (engine->class != RENDER_CLASS)
2024c349dbc7Sjsg return;
2025c349dbc7Sjsg
2026c349dbc7Sjsg gen9_whitelist_build(w);
2027c349dbc7Sjsg
2028c349dbc7Sjsg /*
2029c349dbc7Sjsg * WaAllowPMDepthAndInvocationCountAccessFromUMD:cfl,whl,cml,aml
2030c349dbc7Sjsg *
2031c349dbc7Sjsg * This covers 4 register which are next to one another :
2032c349dbc7Sjsg * - PS_INVOCATION_COUNT
2033c349dbc7Sjsg * - PS_INVOCATION_COUNT_UDW
2034c349dbc7Sjsg * - PS_DEPTH_COUNT
2035c349dbc7Sjsg * - PS_DEPTH_COUNT_UDW
2036c349dbc7Sjsg */
2037c349dbc7Sjsg whitelist_reg_ext(w, PS_INVOCATION_COUNT,
2038c349dbc7Sjsg RING_FORCE_TO_NONPRIV_ACCESS_RD |
2039c349dbc7Sjsg RING_FORCE_TO_NONPRIV_RANGE_4);
2040c349dbc7Sjsg }
2041c349dbc7Sjsg
allow_read_ctx_timestamp(struct intel_engine_cs * engine)20421bb76ff1Sjsg static void allow_read_ctx_timestamp(struct intel_engine_cs *engine)
2043ad8b1aafSjsg {
2044ad8b1aafSjsg struct i915_wa_list *w = &engine->whitelist;
2045ad8b1aafSjsg
2046ad8b1aafSjsg if (engine->class != RENDER_CLASS)
2047ad8b1aafSjsg whitelist_reg_ext(w,
2048ad8b1aafSjsg RING_CTX_TIMESTAMP(engine->mmio_base),
2049ad8b1aafSjsg RING_FORCE_TO_NONPRIV_ACCESS_RD);
20501bb76ff1Sjsg }
20511bb76ff1Sjsg
cml_whitelist_build(struct intel_engine_cs * engine)20521bb76ff1Sjsg static void cml_whitelist_build(struct intel_engine_cs *engine)
20531bb76ff1Sjsg {
20541bb76ff1Sjsg allow_read_ctx_timestamp(engine);
2055ad8b1aafSjsg
2056ad8b1aafSjsg cfl_whitelist_build(engine);
2057ad8b1aafSjsg }
2058ad8b1aafSjsg
icl_whitelist_build(struct intel_engine_cs * engine)2059c349dbc7Sjsg static void icl_whitelist_build(struct intel_engine_cs *engine)
2060c349dbc7Sjsg {
2061c349dbc7Sjsg struct i915_wa_list *w = &engine->whitelist;
2062c349dbc7Sjsg
20631bb76ff1Sjsg allow_read_ctx_timestamp(engine);
20641bb76ff1Sjsg
2065c349dbc7Sjsg switch (engine->class) {
2066c349dbc7Sjsg case RENDER_CLASS:
2067c349dbc7Sjsg /* WaAllowUMDToModifyHalfSliceChicken7:icl */
2068f005ef32Sjsg whitelist_mcr_reg(w, GEN9_HALF_SLICE_CHICKEN7);
2069c349dbc7Sjsg
2070c349dbc7Sjsg /* WaAllowUMDToModifySamplerMode:icl */
2071f005ef32Sjsg whitelist_mcr_reg(w, GEN10_SAMPLER_MODE);
2072c349dbc7Sjsg
2073c349dbc7Sjsg /* WaEnableStateCacheRedirectToCS:icl */
2074c349dbc7Sjsg whitelist_reg(w, GEN9_SLICE_COMMON_ECO_CHICKEN1);
2075c349dbc7Sjsg
2076c349dbc7Sjsg /*
2077c349dbc7Sjsg * WaAllowPMDepthAndInvocationCountAccessFromUMD:icl
2078c349dbc7Sjsg *
2079c349dbc7Sjsg * This covers 4 register which are next to one another :
2080c349dbc7Sjsg * - PS_INVOCATION_COUNT
2081c349dbc7Sjsg * - PS_INVOCATION_COUNT_UDW
2082c349dbc7Sjsg * - PS_DEPTH_COUNT
2083c349dbc7Sjsg * - PS_DEPTH_COUNT_UDW
2084c349dbc7Sjsg */
2085c349dbc7Sjsg whitelist_reg_ext(w, PS_INVOCATION_COUNT,
2086c349dbc7Sjsg RING_FORCE_TO_NONPRIV_ACCESS_RD |
2087c349dbc7Sjsg RING_FORCE_TO_NONPRIV_RANGE_4);
2088c349dbc7Sjsg break;
2089c349dbc7Sjsg
2090c349dbc7Sjsg case VIDEO_DECODE_CLASS:
2091c349dbc7Sjsg /* hucStatusRegOffset */
2092c349dbc7Sjsg whitelist_reg_ext(w, _MMIO(0x2000 + engine->mmio_base),
2093c349dbc7Sjsg RING_FORCE_TO_NONPRIV_ACCESS_RD);
2094c349dbc7Sjsg /* hucUKernelHdrInfoRegOffset */
2095c349dbc7Sjsg whitelist_reg_ext(w, _MMIO(0x2014 + engine->mmio_base),
2096c349dbc7Sjsg RING_FORCE_TO_NONPRIV_ACCESS_RD);
2097c349dbc7Sjsg /* hucStatus2RegOffset */
2098c349dbc7Sjsg whitelist_reg_ext(w, _MMIO(0x23B0 + engine->mmio_base),
2099c349dbc7Sjsg RING_FORCE_TO_NONPRIV_ACCESS_RD);
2100c349dbc7Sjsg break;
2101c349dbc7Sjsg
2102c349dbc7Sjsg default:
2103c349dbc7Sjsg break;
2104c349dbc7Sjsg }
2105c349dbc7Sjsg }
2106c349dbc7Sjsg
tgl_whitelist_build(struct intel_engine_cs * engine)2107c349dbc7Sjsg static void tgl_whitelist_build(struct intel_engine_cs *engine)
2108c349dbc7Sjsg {
2109c349dbc7Sjsg struct i915_wa_list *w = &engine->whitelist;
2110c349dbc7Sjsg
21111bb76ff1Sjsg allow_read_ctx_timestamp(engine);
21121bb76ff1Sjsg
2113c349dbc7Sjsg switch (engine->class) {
2114c349dbc7Sjsg case RENDER_CLASS:
2115c349dbc7Sjsg /*
2116c349dbc7Sjsg * WaAllowPMDepthAndInvocationCountAccessFromUMD:tgl
2117c349dbc7Sjsg * Wa_1408556865:tgl
2118c349dbc7Sjsg *
2119c349dbc7Sjsg * This covers 4 registers which are next to one another :
2120c349dbc7Sjsg * - PS_INVOCATION_COUNT
2121c349dbc7Sjsg * - PS_INVOCATION_COUNT_UDW
2122c349dbc7Sjsg * - PS_DEPTH_COUNT
2123c349dbc7Sjsg * - PS_DEPTH_COUNT_UDW
2124c349dbc7Sjsg */
2125c349dbc7Sjsg whitelist_reg_ext(w, PS_INVOCATION_COUNT,
2126c349dbc7Sjsg RING_FORCE_TO_NONPRIV_ACCESS_RD |
2127c349dbc7Sjsg RING_FORCE_TO_NONPRIV_RANGE_4);
2128c349dbc7Sjsg
21291bb76ff1Sjsg /*
21301bb76ff1Sjsg * Wa_1808121037:tgl
21311bb76ff1Sjsg * Wa_14012131227:dg1
21321bb76ff1Sjsg * Wa_1508744258:tgl,rkl,dg1,adl-s,adl-p
21331bb76ff1Sjsg */
2134c349dbc7Sjsg whitelist_reg(w, GEN7_COMMON_SLICE_CHICKEN1);
2135c349dbc7Sjsg
2136c349dbc7Sjsg /* Wa_1806527549:tgl */
2137c349dbc7Sjsg whitelist_reg(w, HIZ_CHICKEN);
2138f005ef32Sjsg
2139f005ef32Sjsg /* Required by recommended tuning setting (not a workaround) */
2140f005ef32Sjsg whitelist_reg(w, GEN11_COMMON_SLICE_CHICKEN3);
2141f005ef32Sjsg
2142c349dbc7Sjsg break;
2143c349dbc7Sjsg default:
2144c349dbc7Sjsg break;
2145c349dbc7Sjsg }
2146c349dbc7Sjsg }
2147c349dbc7Sjsg
dg2_whitelist_build(struct intel_engine_cs * engine)21481bb76ff1Sjsg static void dg2_whitelist_build(struct intel_engine_cs *engine)
21491bb76ff1Sjsg {
21501bb76ff1Sjsg struct i915_wa_list *w = &engine->whitelist;
21511bb76ff1Sjsg
21521bb76ff1Sjsg switch (engine->class) {
21531bb76ff1Sjsg case RENDER_CLASS:
2154f005ef32Sjsg /* Required by recommended tuning setting (not a workaround) */
2155f005ef32Sjsg whitelist_mcr_reg(w, XEHP_COMMON_SLICE_CHICKEN3);
2156f005ef32Sjsg
21571bb76ff1Sjsg break;
21581bb76ff1Sjsg default:
21591bb76ff1Sjsg break;
21601bb76ff1Sjsg }
21611bb76ff1Sjsg }
21621bb76ff1Sjsg
blacklist_trtt(struct intel_engine_cs * engine)21631bb76ff1Sjsg static void blacklist_trtt(struct intel_engine_cs *engine)
21641bb76ff1Sjsg {
21651bb76ff1Sjsg struct i915_wa_list *w = &engine->whitelist;
21661bb76ff1Sjsg
21671bb76ff1Sjsg /*
21681bb76ff1Sjsg * Prevent read/write access to [0x4400, 0x4600) which covers
21691bb76ff1Sjsg * the TRTT range across all engines. Note that normally userspace
21701bb76ff1Sjsg * cannot access the other engines' trtt control, but for simplicity
21711bb76ff1Sjsg * we cover the entire range on each engine.
21721bb76ff1Sjsg */
21731bb76ff1Sjsg whitelist_reg_ext(w, _MMIO(0x4400),
21741bb76ff1Sjsg RING_FORCE_TO_NONPRIV_DENY |
21751bb76ff1Sjsg RING_FORCE_TO_NONPRIV_RANGE_64);
21761bb76ff1Sjsg whitelist_reg_ext(w, _MMIO(0x4500),
21771bb76ff1Sjsg RING_FORCE_TO_NONPRIV_DENY |
21781bb76ff1Sjsg RING_FORCE_TO_NONPRIV_RANGE_64);
21791bb76ff1Sjsg }
21801bb76ff1Sjsg
pvc_whitelist_build(struct intel_engine_cs * engine)21811bb76ff1Sjsg static void pvc_whitelist_build(struct intel_engine_cs *engine)
21821bb76ff1Sjsg {
21831bb76ff1Sjsg /* Wa_16014440446:pvc */
21841bb76ff1Sjsg blacklist_trtt(engine);
21851bb76ff1Sjsg }
21861bb76ff1Sjsg
xelpg_whitelist_build(struct intel_engine_cs * engine)218713f2a72cSjsg static void xelpg_whitelist_build(struct intel_engine_cs *engine)
2188f005ef32Sjsg {
2189f005ef32Sjsg struct i915_wa_list *w = &engine->whitelist;
2190f005ef32Sjsg
2191f005ef32Sjsg switch (engine->class) {
2192f005ef32Sjsg case RENDER_CLASS:
2193f005ef32Sjsg /* Required by recommended tuning setting (not a workaround) */
2194f005ef32Sjsg whitelist_mcr_reg(w, XEHP_COMMON_SLICE_CHICKEN3);
2195f005ef32Sjsg
2196f005ef32Sjsg break;
2197f005ef32Sjsg default:
2198f005ef32Sjsg break;
2199f005ef32Sjsg }
2200f005ef32Sjsg }
2201f005ef32Sjsg
intel_engine_init_whitelist(struct intel_engine_cs * engine)2202c349dbc7Sjsg void intel_engine_init_whitelist(struct intel_engine_cs *engine)
2203c349dbc7Sjsg {
2204c349dbc7Sjsg struct drm_i915_private *i915 = engine->i915;
2205c349dbc7Sjsg struct i915_wa_list *w = &engine->whitelist;
2206c349dbc7Sjsg
2207f005ef32Sjsg wa_init_start(w, engine->gt, "whitelist", engine->name);
2208c349dbc7Sjsg
220913f2a72cSjsg if (engine->gt->type == GT_MEDIA)
221013f2a72cSjsg ; /* none yet */
22116e5fdd49Sjsg else if (IS_GFX_GT_IP_RANGE(engine->gt, IP_VER(12, 70), IP_VER(12, 74)))
221213f2a72cSjsg xelpg_whitelist_build(engine);
2213f005ef32Sjsg else if (IS_PONTEVECCHIO(i915))
22141bb76ff1Sjsg pvc_whitelist_build(engine);
22151bb76ff1Sjsg else if (IS_DG2(i915))
22161bb76ff1Sjsg dg2_whitelist_build(engine);
22171bb76ff1Sjsg else if (IS_XEHPSDV(i915))
2218f005ef32Sjsg ; /* none needed */
22195ca02815Sjsg else if (GRAPHICS_VER(i915) == 12)
2220c349dbc7Sjsg tgl_whitelist_build(engine);
22215ca02815Sjsg else if (GRAPHICS_VER(i915) == 11)
2222c349dbc7Sjsg icl_whitelist_build(engine);
2223ad8b1aafSjsg else if (IS_COMETLAKE(i915))
2224ad8b1aafSjsg cml_whitelist_build(engine);
2225c349dbc7Sjsg else if (IS_COFFEELAKE(i915))
2226c349dbc7Sjsg cfl_whitelist_build(engine);
2227c349dbc7Sjsg else if (IS_GEMINILAKE(i915))
2228c349dbc7Sjsg glk_whitelist_build(engine);
2229c349dbc7Sjsg else if (IS_KABYLAKE(i915))
2230c349dbc7Sjsg kbl_whitelist_build(engine);
2231c349dbc7Sjsg else if (IS_BROXTON(i915))
2232c349dbc7Sjsg bxt_whitelist_build(engine);
2233c349dbc7Sjsg else if (IS_SKYLAKE(i915))
2234c349dbc7Sjsg skl_whitelist_build(engine);
22355ca02815Sjsg else if (GRAPHICS_VER(i915) <= 8)
22365ca02815Sjsg ;
2237c349dbc7Sjsg else
22385ca02815Sjsg MISSING_CASE(GRAPHICS_VER(i915));
2239c349dbc7Sjsg
2240c349dbc7Sjsg wa_init_finish(w);
2241c349dbc7Sjsg }
2242c349dbc7Sjsg
intel_engine_apply_whitelist(struct intel_engine_cs * engine)2243c349dbc7Sjsg void intel_engine_apply_whitelist(struct intel_engine_cs *engine)
2244c349dbc7Sjsg {
2245c349dbc7Sjsg const struct i915_wa_list *wal = &engine->whitelist;
2246c349dbc7Sjsg struct intel_uncore *uncore = engine->uncore;
2247c349dbc7Sjsg const u32 base = engine->mmio_base;
2248c349dbc7Sjsg struct i915_wa *wa;
2249c349dbc7Sjsg unsigned int i;
2250c349dbc7Sjsg
2251c349dbc7Sjsg if (!wal->count)
2252c349dbc7Sjsg return;
2253c349dbc7Sjsg
2254c349dbc7Sjsg for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
2255c349dbc7Sjsg intel_uncore_write(uncore,
2256c349dbc7Sjsg RING_FORCE_TO_NONPRIV(base, i),
2257c349dbc7Sjsg i915_mmio_reg_offset(wa->reg));
2258c349dbc7Sjsg
2259c349dbc7Sjsg /* And clear the rest just in case of garbage */
2260c349dbc7Sjsg for (; i < RING_MAX_NONPRIV_SLOTS; i++)
2261c349dbc7Sjsg intel_uncore_write(uncore,
2262c349dbc7Sjsg RING_FORCE_TO_NONPRIV(base, i),
2263c349dbc7Sjsg i915_mmio_reg_offset(RING_NOPID(base)));
2264c349dbc7Sjsg }
2265c349dbc7Sjsg
22661bb76ff1Sjsg /*
22671bb76ff1Sjsg * engine_fake_wa_init(), a place holder to program the registers
22681bb76ff1Sjsg * which are not part of an official workaround defined by the
22691bb76ff1Sjsg * hardware team.
22701bb76ff1Sjsg * Adding programming of those register inside workaround will
22711bb76ff1Sjsg * allow utilizing wa framework to proper application and verification.
22721bb76ff1Sjsg */
22731bb76ff1Sjsg static void
engine_fake_wa_init(struct intel_engine_cs * engine,struct i915_wa_list * wal)22741bb76ff1Sjsg engine_fake_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
22751bb76ff1Sjsg {
22761bb76ff1Sjsg u8 mocs_w, mocs_r;
22771bb76ff1Sjsg
22781bb76ff1Sjsg /*
22791bb76ff1Sjsg * RING_CMD_CCTL specifies the default MOCS entry that will be used
22801bb76ff1Sjsg * by the command streamer when executing commands that don't have
22811bb76ff1Sjsg * a way to explicitly specify a MOCS setting. The default should
22821bb76ff1Sjsg * usually reference whichever MOCS entry corresponds to uncached
22831bb76ff1Sjsg * behavior, although use of a WB cached entry is recommended by the
22841bb76ff1Sjsg * spec in certain circumstances on specific platforms.
22851bb76ff1Sjsg */
22861bb76ff1Sjsg if (GRAPHICS_VER(engine->i915) >= 12) {
22871bb76ff1Sjsg mocs_r = engine->gt->mocs.uc_index;
22881bb76ff1Sjsg mocs_w = engine->gt->mocs.uc_index;
22891bb76ff1Sjsg
22901bb76ff1Sjsg if (HAS_L3_CCS_READ(engine->i915) &&
22911bb76ff1Sjsg engine->class == COMPUTE_CLASS) {
22921bb76ff1Sjsg mocs_r = engine->gt->mocs.wb_index;
22931bb76ff1Sjsg
22941bb76ff1Sjsg /*
22951bb76ff1Sjsg * Even on the few platforms where MOCS 0 is a
22961bb76ff1Sjsg * legitimate table entry, it's never the correct
22971bb76ff1Sjsg * setting to use here; we can assume the MOCS init
22981bb76ff1Sjsg * just forgot to initialize wb_index.
22991bb76ff1Sjsg */
23001bb76ff1Sjsg drm_WARN_ON(&engine->i915->drm, mocs_r == 0);
23011bb76ff1Sjsg }
23021bb76ff1Sjsg
23031bb76ff1Sjsg wa_masked_field_set(wal,
23041bb76ff1Sjsg RING_CMD_CCTL(engine->mmio_base),
23051bb76ff1Sjsg CMD_CCTL_MOCS_MASK,
23061bb76ff1Sjsg CMD_CCTL_MOCS_OVERRIDE(mocs_w, mocs_r));
23071bb76ff1Sjsg }
23081bb76ff1Sjsg }
23091bb76ff1Sjsg
2310c349dbc7Sjsg static void
rcs_engine_wa_init(struct intel_engine_cs * engine,struct i915_wa_list * wal)2311c349dbc7Sjsg rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
2312c349dbc7Sjsg {
2313c349dbc7Sjsg struct drm_i915_private *i915 = engine->i915;
2314596b6869Sjsg struct intel_gt *gt = engine->gt;
2315c349dbc7Sjsg
2316596b6869Sjsg if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) ||
2317596b6869Sjsg IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0)) {
2318f005ef32Sjsg /* Wa_22014600077 */
2319f005ef32Sjsg wa_mcr_masked_en(wal, GEN10_CACHE_MODE_SS,
2320f005ef32Sjsg ENABLE_EU_COUNT_FOR_TDL_FLUSH);
2321f005ef32Sjsg }
2322f005ef32Sjsg
2323596b6869Sjsg if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) ||
2324596b6869Sjsg IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0) ||
2325d412e58aSjsg IS_DG2(i915)) {
2326f005ef32Sjsg /* Wa_1509727124 */
2327f005ef32Sjsg wa_mcr_masked_en(wal, GEN10_SAMPLER_MODE,
2328f005ef32Sjsg SC_DISABLE_POWER_OPTIMIZATION_EBB);
2329f005ef32Sjsg }
2330f005ef32Sjsg
2331596b6869Sjsg if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) ||
2332d412e58aSjsg IS_DG2(i915)) {
2333f005ef32Sjsg /* Wa_22012856258 */
2334f005ef32Sjsg wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN2,
2335f005ef32Sjsg GEN12_DISABLE_READ_SUPPRESSION);
23361bb76ff1Sjsg }
23371bb76ff1Sjsg
2338d412e58aSjsg if (IS_DG2(i915)) {
23391bb76ff1Sjsg /*
23401bb76ff1Sjsg * Wa_22010960976:dg2
23411bb76ff1Sjsg * Wa_14013347512:dg2
23421bb76ff1Sjsg */
2343f005ef32Sjsg wa_mcr_masked_dis(wal, XEHP_HDC_CHICKEN0,
23441bb76ff1Sjsg LSC_L1_FLUSH_CTL_3D_DATAPORT_FLUSH_EVENTS_MASK);
23451bb76ff1Sjsg }
23461bb76ff1Sjsg
23474186b845Sjsg if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 71)) ||
23484186b845Sjsg IS_DG2(i915)) {
23494186b845Sjsg /* Wa_14015150844 */
23504186b845Sjsg wa_mcr_add(wal, XEHP_HDC_CHICKEN0, 0,
23514186b845Sjsg _MASKED_BIT_ENABLE(DIS_ATOMIC_CHAINING_TYPED_WRITES),
23524186b845Sjsg 0, true);
23534186b845Sjsg }
23544186b845Sjsg
2355d412e58aSjsg if (IS_DG2_G11(i915) || IS_DG2_G10(i915)) {
23561bb76ff1Sjsg /* Wa_22014600077:dg2 */
2357f005ef32Sjsg wa_mcr_add(wal, GEN10_CACHE_MODE_SS, 0,
23581bb76ff1Sjsg _MASKED_BIT_ENABLE(ENABLE_EU_COUNT_FOR_TDL_FLUSH),
2359f005ef32Sjsg 0 /* Wa_14012342262 write-only reg, so skip verification */,
23601bb76ff1Sjsg true);
23611bb76ff1Sjsg }
23621bb76ff1Sjsg
236305f386cbSjsg if (IS_DG2(i915) || IS_ALDERLAKE_P(i915) || IS_ALDERLAKE_S(i915) ||
236405f386cbSjsg IS_DG1(i915) || IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) {
236505f386cbSjsg /*
236605f386cbSjsg * Wa_1606700617:tgl,dg1,adl-p
236705f386cbSjsg * Wa_22010271021:tgl,rkl,dg1,adl-s,adl-p
236805f386cbSjsg * Wa_14010826681:tgl,dg1,rkl,adl-p
236905f386cbSjsg * Wa_18019627453:dg2
237005f386cbSjsg */
237105f386cbSjsg wa_masked_en(wal,
237205f386cbSjsg GEN9_CS_DEBUG_MODE1,
237305f386cbSjsg FF_DOP_CLOCK_GATE_DISABLE);
237405f386cbSjsg }
237505f386cbSjsg
23765ca02815Sjsg if (IS_ALDERLAKE_P(i915) || IS_ALDERLAKE_S(i915) || IS_DG1(i915) ||
23775ca02815Sjsg IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) {
23785ca02815Sjsg /* Wa_1606931601:tgl,rkl,dg1,adl-s,adl-p */
2379f005ef32Sjsg wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN2, GEN12_DISABLE_EARLY_READ);
2380c349dbc7Sjsg
2381ad8b1aafSjsg /*
2382ad8b1aafSjsg * Wa_1407928979:tgl A*
23835ca02815Sjsg * Wa_18011464164:tgl[B0+],dg1[B0+]
23845ca02815Sjsg * Wa_22010931296:tgl[B0+],dg1[B0+]
23855ca02815Sjsg * Wa_14010919138:rkl,dg1,adl-s,adl-p
2386ad8b1aafSjsg */
2387ad8b1aafSjsg wa_write_or(wal, GEN7_FF_THREAD_MODE,
2388ad8b1aafSjsg GEN12_FF_TESSELATION_DOP_GATE_DISABLE);
2389ad8b1aafSjsg
239005f386cbSjsg /* Wa_1406941453:tgl,rkl,dg1,adl-s,adl-p */
239105f386cbSjsg wa_mcr_masked_en(wal,
239205f386cbSjsg GEN10_SAMPLER_MODE,
239305f386cbSjsg ENABLE_SMALLPL);
2394ad8b1aafSjsg }
2395108fd6d3Sjsg
23965ca02815Sjsg if (IS_ALDERLAKE_P(i915) || IS_ALDERLAKE_S(i915) ||
23975ca02815Sjsg IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) {
2398f005ef32Sjsg /* Wa_1409804808 */
2399f005ef32Sjsg wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN2,
24005ca02815Sjsg GEN12_PUSH_CONST_DEREF_HOLD_DIS);
24015ca02815Sjsg
2402f005ef32Sjsg /* Wa_14010229206 */
2403f005ef32Sjsg wa_mcr_masked_en(wal, GEN9_ROW_CHICKEN4, GEN12_DISABLE_TDL_PUSH);
24045ca02815Sjsg }
24055ca02815Sjsg
2406f005ef32Sjsg if (IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915) || IS_ALDERLAKE_P(i915)) {
24075ca02815Sjsg /*
2408f005ef32Sjsg * Wa_1607297627
24095ca02815Sjsg *
24105ca02815Sjsg * On TGL and RKL there are multiple entries for this WA in the
24115ca02815Sjsg * BSpec; some indicate this is an A0-only WA, others indicate
24125ca02815Sjsg * it applies to all steppings so we trust the "all steppings."
24135ca02815Sjsg */
24145ca02815Sjsg wa_masked_en(wal,
24151bb76ff1Sjsg RING_PSMI_CTL(RENDER_RING_BASE),
24165ca02815Sjsg GEN12_WAIT_FOR_EVENT_POWER_DOWN_DISABLE |
24175ca02815Sjsg GEN8_RC_SEMA_IDLE_MSG_DISABLE);
24185ca02815Sjsg }
24195ca02815Sjsg
24205ca02815Sjsg if (GRAPHICS_VER(i915) == 11) {
2421c349dbc7Sjsg /* This is not an Wa. Enable for better image quality */
2422c349dbc7Sjsg wa_masked_en(wal,
2423c349dbc7Sjsg _3D_CHICKEN3,
2424c349dbc7Sjsg _3D_CHICKEN3_AA_LINE_QUALITY_FIX_ENABLE);
2425c349dbc7Sjsg
2426c349dbc7Sjsg /*
2427c349dbc7Sjsg * Wa_1405543622:icl
2428c349dbc7Sjsg * Formerly known as WaGAPZPriorityScheme
2429c349dbc7Sjsg */
2430c349dbc7Sjsg wa_write_or(wal,
2431c349dbc7Sjsg GEN8_GARBCNTL,
2432c349dbc7Sjsg GEN11_ARBITRATION_PRIO_ORDER_MASK);
2433c349dbc7Sjsg
2434c349dbc7Sjsg /*
2435c349dbc7Sjsg * Wa_1604223664:icl
2436c349dbc7Sjsg * Formerly known as WaL3BankAddressHashing
2437c349dbc7Sjsg */
24385ca02815Sjsg wa_write_clr_set(wal,
2439c349dbc7Sjsg GEN8_GARBCNTL,
2440c349dbc7Sjsg GEN11_HASH_CTRL_EXCL_MASK,
2441c349dbc7Sjsg GEN11_HASH_CTRL_EXCL_BIT0);
24425ca02815Sjsg wa_write_clr_set(wal,
2443c349dbc7Sjsg GEN11_GLBLINVL,
2444c349dbc7Sjsg GEN11_BANK_HASH_ADDR_EXCL_MASK,
2445c349dbc7Sjsg GEN11_BANK_HASH_ADDR_EXCL_BIT0);
2446c349dbc7Sjsg
2447c349dbc7Sjsg /*
2448c349dbc7Sjsg * Wa_1405733216:icl
2449c349dbc7Sjsg * Formerly known as WaDisableCleanEvicts
2450c349dbc7Sjsg */
2451f005ef32Sjsg wa_mcr_write_or(wal,
2452c349dbc7Sjsg GEN8_L3SQCREG4,
2453c349dbc7Sjsg GEN11_LQSC_CLEAN_EVICT_DISABLE);
2454c349dbc7Sjsg
2455c349dbc7Sjsg /* Wa_1606682166:icl */
2456c349dbc7Sjsg wa_write_or(wal,
2457c349dbc7Sjsg GEN7_SARCHKMD,
2458c349dbc7Sjsg GEN7_DISABLE_SAMPLER_PREFETCH);
2459c349dbc7Sjsg
2460c349dbc7Sjsg /* Wa_1409178092:icl */
2461f005ef32Sjsg wa_mcr_write_clr_set(wal,
2462c349dbc7Sjsg GEN11_SCRATCH2,
2463c349dbc7Sjsg GEN11_COHERENT_PARTIAL_WRITE_MERGE_ENABLE,
2464c349dbc7Sjsg 0);
2465c349dbc7Sjsg
2466c349dbc7Sjsg /* WaEnable32PlaneMode:icl */
2467c349dbc7Sjsg wa_masked_en(wal, GEN9_CSFE_CHICKEN1_RCS,
2468c349dbc7Sjsg GEN11_ENABLE_32_PLANE_MODE);
2469c349dbc7Sjsg
2470c349dbc7Sjsg /*
2471c349dbc7Sjsg * Wa_1408767742:icl[a2..forever],ehl[all]
2472c349dbc7Sjsg * Wa_1605460711:icl[a0..c0]
2473c349dbc7Sjsg */
2474c349dbc7Sjsg wa_write_or(wal,
2475c349dbc7Sjsg GEN7_FF_THREAD_MODE,
2476c349dbc7Sjsg GEN12_FF_TESSELATION_DOP_GATE_DISABLE);
2477ad8b1aafSjsg
24785ca02815Sjsg /* Wa_22010271021 */
2479ad8b1aafSjsg wa_masked_en(wal,
2480ad8b1aafSjsg GEN9_CS_DEBUG_MODE1,
2481ad8b1aafSjsg FF_DOP_CLOCK_GATE_DISABLE);
2482c349dbc7Sjsg }
2483c349dbc7Sjsg
2484f005ef32Sjsg /*
2485f005ef32Sjsg * Intel platforms that support fine-grained preemption (i.e., gen9 and
2486f005ef32Sjsg * beyond) allow the kernel-mode driver to choose between two different
2487f005ef32Sjsg * options for controlling preemption granularity and behavior.
2488f005ef32Sjsg *
2489f005ef32Sjsg * Option 1 (hardware default):
2490f005ef32Sjsg * Preemption settings are controlled in a global manner via
2491f005ef32Sjsg * kernel-only register CS_DEBUG_MODE1 (0x20EC). Any granularity
2492f005ef32Sjsg * and settings chosen by the kernel-mode driver will apply to all
2493f005ef32Sjsg * userspace clients.
2494f005ef32Sjsg *
2495f005ef32Sjsg * Option 2:
2496f005ef32Sjsg * Preemption settings are controlled on a per-context basis via
2497f005ef32Sjsg * register CS_CHICKEN1 (0x2580). CS_CHICKEN1 is saved/restored on
2498f005ef32Sjsg * context switch and is writable by userspace (e.g., via
2499f005ef32Sjsg * MI_LOAD_REGISTER_IMMEDIATE instructions placed in a batch buffer)
2500f005ef32Sjsg * which allows different userspace drivers/clients to select
2501f005ef32Sjsg * different settings, or to change those settings on the fly in
2502f005ef32Sjsg * response to runtime needs. This option was known by name
2503f005ef32Sjsg * "FtrPerCtxtPreemptionGranularityControl" at one time, although
2504f005ef32Sjsg * that name is somewhat misleading as other non-granularity
2505f005ef32Sjsg * preemption settings are also impacted by this decision.
2506f005ef32Sjsg *
2507f005ef32Sjsg * On Linux, our policy has always been to let userspace drivers
2508f005ef32Sjsg * control preemption granularity/settings (Option 2). This was
2509f005ef32Sjsg * originally mandatory on gen9 to prevent ABI breakage (old gen9
2510f005ef32Sjsg * userspace developed before object-level preemption was enabled would
2511f005ef32Sjsg * not behave well if i915 were to go with Option 1 and enable that
2512f005ef32Sjsg * preemption in a global manner). On gen9 each context would have
2513f005ef32Sjsg * object-level preemption disabled by default (see
2514f005ef32Sjsg * WaDisable3DMidCmdPreemption in gen9_ctx_workarounds_init), but
2515f005ef32Sjsg * userspace drivers could opt-in to object-level preemption as they
2516f005ef32Sjsg * saw fit. For post-gen9 platforms, we continue to utilize Option 2;
2517f005ef32Sjsg * even though it is no longer necessary for ABI compatibility when
2518f005ef32Sjsg * enabling a new platform, it does ensure that userspace will be able
2519f005ef32Sjsg * to implement any workarounds that show up requiring temporary
2520f005ef32Sjsg * adjustments to preemption behavior at runtime.
2521f005ef32Sjsg *
2522f005ef32Sjsg * Notes/Workarounds:
2523f005ef32Sjsg * - Wa_14015141709: On DG2 and early steppings of MTL,
2524f005ef32Sjsg * CS_CHICKEN1[0] does not disable object-level preemption as
2525f005ef32Sjsg * it is supposed to (nor does CS_DEBUG_MODE1[0] if we had been
2526f005ef32Sjsg * using Option 1). Effectively this means userspace is unable
2527f005ef32Sjsg * to disable object-level preemption on these platforms/steppings
2528f005ef32Sjsg * despite the setting here.
2529f005ef32Sjsg *
2530f005ef32Sjsg * - Wa_16013994831: May require that userspace program
2531f005ef32Sjsg * CS_CHICKEN1[10] when certain runtime conditions are true.
2532f005ef32Sjsg * Userspace requires Option 2 to be in effect for their update of
2533f005ef32Sjsg * CS_CHICKEN1[10] to be effective.
2534f005ef32Sjsg *
2535f005ef32Sjsg * Other workarounds may appear in the future that will also require
2536f005ef32Sjsg * Option 2 behavior to allow proper userspace implementation.
2537f005ef32Sjsg */
2538f005ef32Sjsg if (GRAPHICS_VER(i915) >= 9)
2539c349dbc7Sjsg wa_masked_en(wal,
2540c349dbc7Sjsg GEN7_FF_SLICE_CS_CHICKEN1,
2541c349dbc7Sjsg GEN9_FFSC_PERCTX_PREEMPT_CTRL);
2542c349dbc7Sjsg
2543ad8b1aafSjsg if (IS_SKYLAKE(i915) ||
2544ad8b1aafSjsg IS_KABYLAKE(i915) ||
2545ad8b1aafSjsg IS_COFFEELAKE(i915) ||
2546ad8b1aafSjsg IS_COMETLAKE(i915)) {
2547c349dbc7Sjsg /* WaEnableGapsTsvCreditFix:skl,kbl,cfl */
2548c349dbc7Sjsg wa_write_or(wal,
2549c349dbc7Sjsg GEN8_GARBCNTL,
2550c349dbc7Sjsg GEN9_GAPS_TSV_CREDIT_DISABLE);
2551c349dbc7Sjsg }
2552c349dbc7Sjsg
2553c349dbc7Sjsg if (IS_BROXTON(i915)) {
2554c349dbc7Sjsg /* WaDisablePooledEuLoadBalancingFix:bxt */
2555c349dbc7Sjsg wa_masked_en(wal,
2556c349dbc7Sjsg FF_SLICE_CS_CHICKEN2,
2557c349dbc7Sjsg GEN9_POOLED_EU_LOAD_BALANCING_FIX_DISABLE);
2558c349dbc7Sjsg }
2559c349dbc7Sjsg
25605ca02815Sjsg if (GRAPHICS_VER(i915) == 9) {
2561c349dbc7Sjsg /* WaContextSwitchWithConcurrentTLBInvalidate:skl,bxt,kbl,glk,cfl */
2562c349dbc7Sjsg wa_masked_en(wal,
2563c349dbc7Sjsg GEN9_CSFE_CHICKEN1_RCS,
2564c349dbc7Sjsg GEN9_PREEMPT_GPGPU_SYNC_SWITCH_DISABLE);
2565c349dbc7Sjsg
2566c349dbc7Sjsg /* WaEnableLbsSlaRetryTimerDecrement:skl,bxt,kbl,glk,cfl */
2567f005ef32Sjsg wa_mcr_write_or(wal,
2568c349dbc7Sjsg BDW_SCRATCH1,
2569c349dbc7Sjsg GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE);
2570c349dbc7Sjsg
2571c349dbc7Sjsg /* WaProgramL3SqcReg1DefaultForPerf:bxt,glk */
2572c349dbc7Sjsg if (IS_GEN9_LP(i915))
2573f005ef32Sjsg wa_mcr_write_clr_set(wal,
2574c349dbc7Sjsg GEN8_L3SQCREG1,
2575c349dbc7Sjsg L3_PRIO_CREDITS_MASK,
2576c349dbc7Sjsg L3_GENERAL_PRIO_CREDITS(62) |
2577c349dbc7Sjsg L3_HIGH_PRIO_CREDITS(2));
2578c349dbc7Sjsg
2579c349dbc7Sjsg /* WaOCLCoherentLineFlush:skl,bxt,kbl,cfl */
2580f005ef32Sjsg wa_mcr_write_or(wal,
2581c349dbc7Sjsg GEN8_L3SQCREG4,
2582c349dbc7Sjsg GEN8_LQSC_FLUSH_COHERENT_LINES);
25835ca02815Sjsg
25845ca02815Sjsg /* Disable atomics in L3 to prevent unrecoverable hangs */
25855ca02815Sjsg wa_write_clr_set(wal, GEN9_SCRATCH_LNCF1,
25865ca02815Sjsg GEN9_LNCF_NONIA_COHERENT_ATOMICS_ENABLE, 0);
2587f005ef32Sjsg wa_mcr_write_clr_set(wal, GEN8_L3SQCREG4,
25885ca02815Sjsg GEN8_LQSQ_NONIA_COHERENT_ATOMICS_ENABLE, 0);
2589f005ef32Sjsg wa_mcr_write_clr_set(wal, GEN9_SCRATCH1,
25905ca02815Sjsg EVICTION_PERF_FIX_ENABLE, 0);
2591c349dbc7Sjsg }
2592c349dbc7Sjsg
25935ca02815Sjsg if (IS_HASWELL(i915)) {
25945ca02815Sjsg /* WaSampleCChickenBitEnable:hsw */
25955ca02815Sjsg wa_masked_en(wal,
2596f005ef32Sjsg HSW_HALF_SLICE_CHICKEN3, HSW_SAMPLE_C_PERFORMANCE);
25975ca02815Sjsg
25985ca02815Sjsg wa_masked_dis(wal,
25995ca02815Sjsg CACHE_MODE_0_GEN7,
26005ca02815Sjsg /* enable HiZ Raw Stall Optimization */
26015ca02815Sjsg HIZ_RAW_STALL_OPT_DISABLE);
26025ca02815Sjsg }
26035ca02815Sjsg
26045ca02815Sjsg if (IS_VALLEYVIEW(i915)) {
26055ca02815Sjsg /* WaDisableEarlyCull:vlv */
26065ca02815Sjsg wa_masked_en(wal,
26075ca02815Sjsg _3D_CHICKEN3,
26085ca02815Sjsg _3D_CHICKEN_SF_DISABLE_OBJEND_CULL);
26095ca02815Sjsg
26105ca02815Sjsg /*
26115ca02815Sjsg * WaVSThreadDispatchOverride:ivb,vlv
26125ca02815Sjsg *
26135ca02815Sjsg * This actually overrides the dispatch
26145ca02815Sjsg * mode for all thread types.
26155ca02815Sjsg */
26165ca02815Sjsg wa_write_clr_set(wal,
26175ca02815Sjsg GEN7_FF_THREAD_MODE,
26185ca02815Sjsg GEN7_FF_SCHED_MASK,
26195ca02815Sjsg GEN7_FF_TS_SCHED_HW |
26205ca02815Sjsg GEN7_FF_VS_SCHED_HW |
26215ca02815Sjsg GEN7_FF_DS_SCHED_HW);
26225ca02815Sjsg
26235ca02815Sjsg /* WaPsdDispatchEnable:vlv */
26245ca02815Sjsg /* WaDisablePSDDualDispatchEnable:vlv */
26255ca02815Sjsg wa_masked_en(wal,
26265ca02815Sjsg GEN7_HALF_SLICE_CHICKEN1,
26275ca02815Sjsg GEN7_MAX_PS_THREAD_DEP |
26285ca02815Sjsg GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE);
26295ca02815Sjsg }
26305ca02815Sjsg
26315ca02815Sjsg if (IS_IVYBRIDGE(i915)) {
26325ca02815Sjsg /* WaDisableEarlyCull:ivb */
26335ca02815Sjsg wa_masked_en(wal,
26345ca02815Sjsg _3D_CHICKEN3,
26355ca02815Sjsg _3D_CHICKEN_SF_DISABLE_OBJEND_CULL);
26365ca02815Sjsg
26375ca02815Sjsg if (0) { /* causes HiZ corruption on ivb:gt1 */
26385ca02815Sjsg /* enable HiZ Raw Stall Optimization */
26395ca02815Sjsg wa_masked_dis(wal,
26405ca02815Sjsg CACHE_MODE_0_GEN7,
26415ca02815Sjsg HIZ_RAW_STALL_OPT_DISABLE);
26425ca02815Sjsg }
26435ca02815Sjsg
26445ca02815Sjsg /*
26455ca02815Sjsg * WaVSThreadDispatchOverride:ivb,vlv
26465ca02815Sjsg *
26475ca02815Sjsg * This actually overrides the dispatch
26485ca02815Sjsg * mode for all thread types.
26495ca02815Sjsg */
26505ca02815Sjsg wa_write_clr_set(wal,
26515ca02815Sjsg GEN7_FF_THREAD_MODE,
26525ca02815Sjsg GEN7_FF_SCHED_MASK,
26535ca02815Sjsg GEN7_FF_TS_SCHED_HW |
26545ca02815Sjsg GEN7_FF_VS_SCHED_HW |
26555ca02815Sjsg GEN7_FF_DS_SCHED_HW);
26565ca02815Sjsg
26575ca02815Sjsg /* WaDisablePSDDualDispatchEnable:ivb */
26585ca02815Sjsg if (IS_IVB_GT1(i915))
26595ca02815Sjsg wa_masked_en(wal,
26605ca02815Sjsg GEN7_HALF_SLICE_CHICKEN1,
26615ca02815Sjsg GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE);
26625ca02815Sjsg }
26635ca02815Sjsg
26645ca02815Sjsg if (GRAPHICS_VER(i915) == 7) {
2665c349dbc7Sjsg /* WaBCSVCSTlbInvalidationMode:ivb,vlv,hsw */
2666c349dbc7Sjsg wa_masked_en(wal,
26671bb76ff1Sjsg RING_MODE_GEN7(RENDER_RING_BASE),
2668c349dbc7Sjsg GFX_TLB_INVALIDATE_EXPLICIT | GFX_REPLAY_MODE);
2669c349dbc7Sjsg
26705ca02815Sjsg /* WaDisable_RenderCache_OperationalFlush:ivb,vlv,hsw */
26715ca02815Sjsg wa_masked_dis(wal, CACHE_MODE_0_GEN7, RC_OP_FLUSH_ENABLE);
26725ca02815Sjsg
26735ca02815Sjsg /*
26745ca02815Sjsg * BSpec says this must be set, even though
26755ca02815Sjsg * WaDisable4x2SubspanOptimization:ivb,hsw
26765ca02815Sjsg * WaDisable4x2SubspanOptimization isn't listed for VLV.
26775ca02815Sjsg */
26785ca02815Sjsg wa_masked_en(wal,
26795ca02815Sjsg CACHE_MODE_1,
26805ca02815Sjsg PIXEL_SUBSPAN_COLLECT_OPT_DISABLE);
26815ca02815Sjsg
26825ca02815Sjsg /*
26835ca02815Sjsg * BSpec recommends 8x4 when MSAA is used,
26845ca02815Sjsg * however in practice 16x4 seems fastest.
26855ca02815Sjsg *
26865ca02815Sjsg * Note that PS/WM thread counts depend on the WIZ hashing
26875ca02815Sjsg * disable bit, which we don't touch here, but it's good
26885ca02815Sjsg * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
26895ca02815Sjsg */
26905ca02815Sjsg wa_masked_field_set(wal,
26915ca02815Sjsg GEN7_GT_MODE,
26925ca02815Sjsg GEN6_WIZ_HASHING_MASK,
26935ca02815Sjsg GEN6_WIZ_HASHING_16x4);
26945ca02815Sjsg }
26955ca02815Sjsg
26965ca02815Sjsg if (IS_GRAPHICS_VER(i915, 6, 7))
2697c349dbc7Sjsg /*
2698c349dbc7Sjsg * We need to disable the AsyncFlip performance optimisations in
2699c349dbc7Sjsg * order to use MI_WAIT_FOR_EVENT within the CS. It should
2700c349dbc7Sjsg * already be programmed to '1' on all products.
2701c349dbc7Sjsg *
2702c349dbc7Sjsg * WaDisableAsyncFlipPerfMode:snb,ivb,hsw,vlv
2703c349dbc7Sjsg */
2704c349dbc7Sjsg wa_masked_en(wal,
27051bb76ff1Sjsg RING_MI_MODE(RENDER_RING_BASE),
2706c349dbc7Sjsg ASYNC_FLIP_PERF_DISABLE);
2707c349dbc7Sjsg
27085ca02815Sjsg if (GRAPHICS_VER(i915) == 6) {
2709c349dbc7Sjsg /*
2710c349dbc7Sjsg * Required for the hardware to program scanline values for
2711c349dbc7Sjsg * waiting
2712c349dbc7Sjsg * WaEnableFlushTlbInvalidationMode:snb
2713c349dbc7Sjsg */
2714c349dbc7Sjsg wa_masked_en(wal,
2715c349dbc7Sjsg GFX_MODE,
2716c349dbc7Sjsg GFX_TLB_INVALIDATE_EXPLICIT);
2717c349dbc7Sjsg
27185ca02815Sjsg /* WaDisableHiZPlanesWhenMSAAEnabled:snb */
27195ca02815Sjsg wa_masked_en(wal,
27205ca02815Sjsg _3D_CHICKEN,
27215ca02815Sjsg _3D_CHICKEN_HIZ_PLANE_DISABLE_MSAA_4X_SNB);
27225ca02815Sjsg
27235ca02815Sjsg wa_masked_en(wal,
27245ca02815Sjsg _3D_CHICKEN3,
27255ca02815Sjsg /* WaStripsFansDisableFastClipPerformanceFix:snb */
27265ca02815Sjsg _3D_CHICKEN3_SF_DISABLE_FASTCLIP_CULL |
27275ca02815Sjsg /*
27285ca02815Sjsg * Bspec says:
27295ca02815Sjsg * "This bit must be set if 3DSTATE_CLIP clip mode is set
27305ca02815Sjsg * to normal and 3DSTATE_SF number of SF output attributes
27315ca02815Sjsg * is more than 16."
27325ca02815Sjsg */
27335ca02815Sjsg _3D_CHICKEN3_SF_DISABLE_PIPELINED_ATTR_FETCH);
27345ca02815Sjsg
27355ca02815Sjsg /*
27365ca02815Sjsg * BSpec recommends 8x4 when MSAA is used,
27375ca02815Sjsg * however in practice 16x4 seems fastest.
27385ca02815Sjsg *
27395ca02815Sjsg * Note that PS/WM thread counts depend on the WIZ hashing
27405ca02815Sjsg * disable bit, which we don't touch here, but it's good
27415ca02815Sjsg * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
27425ca02815Sjsg */
27435ca02815Sjsg wa_masked_field_set(wal,
27445ca02815Sjsg GEN6_GT_MODE,
27455ca02815Sjsg GEN6_WIZ_HASHING_MASK,
27465ca02815Sjsg GEN6_WIZ_HASHING_16x4);
27475ca02815Sjsg
27485ca02815Sjsg /* WaDisable_RenderCache_OperationalFlush:snb */
27495ca02815Sjsg wa_masked_dis(wal, CACHE_MODE_0, RC_OP_FLUSH_ENABLE);
27505ca02815Sjsg
2751c349dbc7Sjsg /*
2752c349dbc7Sjsg * From the Sandybridge PRM, volume 1 part 3, page 24:
2753c349dbc7Sjsg * "If this bit is set, STCunit will have LRA as replacement
2754c349dbc7Sjsg * policy. [...] This bit must be reset. LRA replacement
2755c349dbc7Sjsg * policy is not supported."
2756c349dbc7Sjsg */
2757c349dbc7Sjsg wa_masked_dis(wal,
2758c349dbc7Sjsg CACHE_MODE_0,
2759c349dbc7Sjsg CM0_STC_EVICT_DISABLE_LRA_SNB);
2760c349dbc7Sjsg }
2761c349dbc7Sjsg
27625ca02815Sjsg if (IS_GRAPHICS_VER(i915, 4, 6))
2763c349dbc7Sjsg /* WaTimedSingleVertexDispatch:cl,bw,ctg,elk,ilk,snb */
27641bb76ff1Sjsg wa_add(wal, RING_MI_MODE(RENDER_RING_BASE),
2765c349dbc7Sjsg 0, _MASKED_BIT_ENABLE(VS_TIMER_DISPATCH),
2766c349dbc7Sjsg /* XXX bit doesn't stick on Broadwater */
27675ca02815Sjsg IS_I965G(i915) ? 0 : VS_TIMER_DISPATCH, true);
2768ad8b1aafSjsg
27695ca02815Sjsg if (GRAPHICS_VER(i915) == 4)
2770ad8b1aafSjsg /*
2771ad8b1aafSjsg * Disable CONSTANT_BUFFER before it is loaded from the context
2772ad8b1aafSjsg * image. For as it is loaded, it is executed and the stored
2773ad8b1aafSjsg * address may no longer be valid, leading to a GPU hang.
2774ad8b1aafSjsg *
2775ad8b1aafSjsg * This imposes the requirement that userspace reload their
2776ad8b1aafSjsg * CONSTANT_BUFFER on every batch, fortunately a requirement
2777ad8b1aafSjsg * they are already accustomed to from before contexts were
2778ad8b1aafSjsg * enabled.
2779ad8b1aafSjsg */
27801bb76ff1Sjsg wa_add(wal, ECOSKPD(RENDER_RING_BASE),
2781ad8b1aafSjsg 0, _MASKED_BIT_ENABLE(ECO_CONSTANT_BUFFER_SR_DISABLE),
27825ca02815Sjsg 0 /* XXX bit doesn't stick on Broadwater */,
27835ca02815Sjsg true);
2784c349dbc7Sjsg }
2785c349dbc7Sjsg
2786c349dbc7Sjsg static void
xcs_engine_wa_init(struct intel_engine_cs * engine,struct i915_wa_list * wal)2787c349dbc7Sjsg xcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
2788c349dbc7Sjsg {
2789c349dbc7Sjsg struct drm_i915_private *i915 = engine->i915;
2790c349dbc7Sjsg
2791c349dbc7Sjsg /* WaKBLVECSSemaphoreWaitPoll:kbl */
2792f005ef32Sjsg if (IS_KABYLAKE(i915) && IS_GRAPHICS_STEP(i915, STEP_A0, STEP_F0)) {
2793c349dbc7Sjsg wa_write(wal,
2794c349dbc7Sjsg RING_SEMA_WAIT_POLL(engine->mmio_base),
2795c349dbc7Sjsg 1);
2796c349dbc7Sjsg }
2797c349dbc7Sjsg }
2798c349dbc7Sjsg
2799c349dbc7Sjsg static void
ccs_engine_wa_init(struct intel_engine_cs * engine,struct i915_wa_list * wal)28001bb76ff1Sjsg ccs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
28011bb76ff1Sjsg {
28021bb76ff1Sjsg if (IS_PVC_CT_STEP(engine->i915, STEP_A0, STEP_C0)) {
28031bb76ff1Sjsg /* Wa_14014999345:pvc */
2804f005ef32Sjsg wa_mcr_masked_en(wal, GEN10_CACHE_MODE_SS, DISABLE_ECC);
28051bb76ff1Sjsg }
28061bb76ff1Sjsg }
28071bb76ff1Sjsg
28081bb76ff1Sjsg /*
28091bb76ff1Sjsg * The bspec performance guide has recommended MMIO tuning settings. These
28101bb76ff1Sjsg * aren't truly "workarounds" but we want to program them with the same
28111bb76ff1Sjsg * workaround infrastructure to ensure that they're automatically added to
28121bb76ff1Sjsg * the GuC save/restore lists, re-applied at the right times, and checked for
28131bb76ff1Sjsg * any conflicting programming requested by real workarounds.
28141bb76ff1Sjsg *
28151bb76ff1Sjsg * Programming settings should be added here only if their registers are not
28161bb76ff1Sjsg * part of an engine's register state context. If a register is part of a
28171bb76ff1Sjsg * context, then any tuning settings should be programmed in an appropriate
28181bb76ff1Sjsg * function invoked by __intel_engine_init_ctx_wa().
28191bb76ff1Sjsg */
28201bb76ff1Sjsg static void
add_render_compute_tuning_settings(struct intel_gt * gt,struct i915_wa_list * wal)282113f2a72cSjsg add_render_compute_tuning_settings(struct intel_gt *gt,
28221bb76ff1Sjsg struct i915_wa_list *wal)
28231bb76ff1Sjsg {
282413f2a72cSjsg struct drm_i915_private *i915 = gt->i915;
282513f2a72cSjsg
28266e5fdd49Sjsg if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 74)) || IS_DG2(i915))
2827f005ef32Sjsg wa_mcr_write_clr_set(wal, RT_CTRL, STACKID_CTRL, STACKID_CTRL_512);
28281bb76ff1Sjsg
28291bb76ff1Sjsg /*
28301bb76ff1Sjsg * This tuning setting proves beneficial only on ATS-M designs; the
28311bb76ff1Sjsg * default "age based" setting is optimal on regular DG2 and other
28321bb76ff1Sjsg * platforms.
28331bb76ff1Sjsg */
28341bb76ff1Sjsg if (INTEL_INFO(i915)->tuning_thread_rr_after_dep)
2835f005ef32Sjsg wa_mcr_masked_field_set(wal, GEN9_ROW_CHICKEN4, THREAD_EX_ARB_MODE,
28361bb76ff1Sjsg THREAD_EX_ARB_MODE_RR_AFTER_DEP);
2837f005ef32Sjsg
2838f005ef32Sjsg if (GRAPHICS_VER(i915) == 12 && GRAPHICS_VER_FULL(i915) < IP_VER(12, 50))
2839f005ef32Sjsg wa_write_clr(wal, GEN8_GARBCNTL, GEN12_BUS_HASH_CTL_BIT_EXC);
28401bb76ff1Sjsg }
28411bb76ff1Sjsg
ccs_engine_wa_mode(struct intel_engine_cs * engine,struct i915_wa_list * wal)2842d29fdb3cSjsg static void ccs_engine_wa_mode(struct intel_engine_cs *engine, struct i915_wa_list *wal)
2843d29fdb3cSjsg {
2844d29fdb3cSjsg struct intel_gt *gt = engine->gt;
28456081a0efSjsg u32 mode;
2846d29fdb3cSjsg
2847d29fdb3cSjsg if (!IS_DG2(gt->i915))
2848d29fdb3cSjsg return;
2849d29fdb3cSjsg
2850d29fdb3cSjsg /*
2851d29fdb3cSjsg * Wa_14019159160: This workaround, along with others, leads to
2852d29fdb3cSjsg * significant challenges in utilizing load balancing among the
2853d29fdb3cSjsg * CCS slices. Consequently, an architectural decision has been
2854d29fdb3cSjsg * made to completely disable automatic CCS load balancing.
2855d29fdb3cSjsg */
2856d29fdb3cSjsg wa_masked_en(wal, GEN12_RCU_MODE, XEHP_RCU_MODE_FIXED_SLICE_CCS_MODE);
28570fe191bfSjsg
28580fe191bfSjsg /*
28590fe191bfSjsg * After having disabled automatic load balancing we need to
28600fe191bfSjsg * assign all slices to a single CCS. We will call it CCS mode 1
28610fe191bfSjsg */
28626081a0efSjsg mode = intel_gt_apply_ccs_mode(gt);
28636081a0efSjsg wa_masked_en(wal, XEHP_CCS_MODE, mode);
2864d29fdb3cSjsg }
2865d29fdb3cSjsg
28661bb76ff1Sjsg /*
28671bb76ff1Sjsg * The workarounds in this function apply to shared registers in
28681bb76ff1Sjsg * the general render reset domain that aren't tied to a
28691bb76ff1Sjsg * specific engine. Since all render+compute engines get reset
28701bb76ff1Sjsg * together, and the contents of these registers are lost during
28711bb76ff1Sjsg * the shared render domain reset, we'll define such workarounds
28721bb76ff1Sjsg * here and then add them to just a single RCS or CCS engine's
28731bb76ff1Sjsg * workaround list (whichever engine has the XXXX flag).
28741bb76ff1Sjsg */
28751bb76ff1Sjsg static void
general_render_compute_wa_init(struct intel_engine_cs * engine,struct i915_wa_list * wal)28761bb76ff1Sjsg general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
28771bb76ff1Sjsg {
28781bb76ff1Sjsg struct drm_i915_private *i915 = engine->i915;
2879596b6869Sjsg struct intel_gt *gt = engine->gt;
28801bb76ff1Sjsg
2881596b6869Sjsg add_render_compute_tuning_settings(gt, wal);
28821bb76ff1Sjsg
2883f005ef32Sjsg if (GRAPHICS_VER(i915) >= 11) {
2884f005ef32Sjsg /* This is not a Wa (although referred to as
2885f005ef32Sjsg * WaSetInidrectStateOverride in places), this allows
2886f005ef32Sjsg * applications that reference sampler states through
2887f005ef32Sjsg * the BindlessSamplerStateBaseAddress to have their
2888f005ef32Sjsg * border color relative to DynamicStateBaseAddress
2889f005ef32Sjsg * rather than BindlessSamplerStateBaseAddress.
2890f005ef32Sjsg *
2891f005ef32Sjsg * Otherwise SAMPLER_STATE border colors have to be
2892f005ef32Sjsg * copied in multiple heaps (DynamicStateBaseAddress &
2893f005ef32Sjsg * BindlessSamplerStateBaseAddress)
2894f005ef32Sjsg *
2895f005ef32Sjsg * BSpec: 46052
2896f005ef32Sjsg */
2897f005ef32Sjsg wa_mcr_masked_en(wal,
2898f005ef32Sjsg GEN10_SAMPLER_MODE,
2899f005ef32Sjsg GEN11_INDIRECT_STATE_BASE_ADDR_OVERRIDE);
2900f005ef32Sjsg }
2901f005ef32Sjsg
2902596b6869Sjsg if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_B0, STEP_FOREVER) ||
29036e5fdd49Sjsg IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_B0, STEP_FOREVER) ||
2904*3a253a7fSjsg IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 74), IP_VER(12, 74))) {
2905f005ef32Sjsg /* Wa_14017856879 */
2906f005ef32Sjsg wa_mcr_masked_en(wal, GEN9_ROW_CHICKEN3, MTL_DISABLE_FIX_FOR_EOT_FLUSH);
2907f005ef32Sjsg
2908*3a253a7fSjsg /* Wa_14020495402 */
2909*3a253a7fSjsg wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN2, XELPG_DISABLE_TDL_SVHS_GATING);
2910*3a253a7fSjsg }
2911*3a253a7fSjsg
2912596b6869Sjsg if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) ||
2913596b6869Sjsg IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0))
2914f005ef32Sjsg /*
2915f005ef32Sjsg * Wa_14017066071
2916f005ef32Sjsg * Wa_14017654203
2917f005ef32Sjsg */
2918f005ef32Sjsg wa_mcr_masked_en(wal, GEN10_SAMPLER_MODE,
2919f005ef32Sjsg MTL_DISABLE_SAMPLER_SC_OOO);
2920f005ef32Sjsg
2921596b6869Sjsg if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0))
2922f005ef32Sjsg /* Wa_22015279794 */
2923f005ef32Sjsg wa_mcr_masked_en(wal, GEN10_CACHE_MODE_SS,
2924f005ef32Sjsg DISABLE_PREFETCH_INTO_IC);
2925f005ef32Sjsg
2926596b6869Sjsg if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) ||
2927596b6869Sjsg IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0) ||
2928d412e58aSjsg IS_DG2(i915)) {
2929f005ef32Sjsg /* Wa_22013037850 */
2930f005ef32Sjsg wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0_UDW,
2931f005ef32Sjsg DISABLE_128B_EVICTION_COMMAND_UDW);
293205f386cbSjsg
293305f386cbSjsg /* Wa_18017747507 */
293405f386cbSjsg wa_masked_en(wal, VFG_PREEMPTION_CHICKEN, POLYGON_TRIFAN_LINELOOP_DISABLE);
2935f005ef32Sjsg }
2936f005ef32Sjsg
2937596b6869Sjsg if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) ||
2938596b6869Sjsg IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0) ||
2939f005ef32Sjsg IS_PONTEVECCHIO(i915) ||
2940f005ef32Sjsg IS_DG2(i915)) {
2941f005ef32Sjsg /* Wa_22014226127 */
2942f005ef32Sjsg wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0, DISABLE_D8_D16_COASLESCE);
2943f005ef32Sjsg }
2944f005ef32Sjsg
294505f386cbSjsg if (IS_PONTEVECCHIO(i915) || IS_DG2(i915)) {
294605f386cbSjsg /* Wa_14015227452:dg2,pvc */
294705f386cbSjsg wa_mcr_masked_en(wal, GEN9_ROW_CHICKEN4, XEHP_DIS_BBL_SYSPIPE);
294805f386cbSjsg
294905f386cbSjsg /* Wa_16015675438:dg2,pvc */
295005f386cbSjsg wa_masked_en(wal, FF_SLICE_CS_CHICKEN2, GEN12_PERF_FIX_BALANCING_CFE_DISABLE);
295105f386cbSjsg }
295205f386cbSjsg
295305f386cbSjsg if (IS_DG2(i915)) {
295405f386cbSjsg /*
295505f386cbSjsg * Wa_16011620976:dg2_g11
295605f386cbSjsg * Wa_22015475538:dg2
295705f386cbSjsg */
295805f386cbSjsg wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0_UDW, DIS_CHAIN_2XSIMD8);
2959f005ef32Sjsg }
2960f005ef32Sjsg
2961d412e58aSjsg if (IS_DG2_G11(i915)) {
2962f005ef32Sjsg /*
2963f005ef32Sjsg * Wa_22012826095:dg2
2964f005ef32Sjsg * Wa_22013059131:dg2
2965f005ef32Sjsg */
2966f005ef32Sjsg wa_mcr_write_clr_set(wal, LSC_CHICKEN_BIT_0_UDW,
2967f005ef32Sjsg MAXREQS_PER_BANK,
2968f005ef32Sjsg REG_FIELD_PREP(MAXREQS_PER_BANK, 2));
2969f005ef32Sjsg
2970f005ef32Sjsg /* Wa_22013059131:dg2 */
2971f005ef32Sjsg wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0,
2972f005ef32Sjsg FORCE_1_SUB_MESSAGE_PER_FRAGMENT);
297305f386cbSjsg
297405f386cbSjsg /*
297505f386cbSjsg * Wa_22012654132
297605f386cbSjsg *
297705f386cbSjsg * Note that register 0xE420 is write-only and cannot be read
297805f386cbSjsg * back for verification on DG2 (due to Wa_14012342262), so
297905f386cbSjsg * we need to explicitly skip the readback.
298005f386cbSjsg */
298105f386cbSjsg wa_mcr_add(wal, GEN10_CACHE_MODE_SS, 0,
298205f386cbSjsg _MASKED_BIT_ENABLE(ENABLE_PREFETCH_INTO_IC),
298305f386cbSjsg 0 /* write-only, so skip validation */,
298405f386cbSjsg true);
2985f005ef32Sjsg }
2986f005ef32Sjsg
29871bb76ff1Sjsg if (IS_XEHPSDV(i915)) {
29881bb76ff1Sjsg /* Wa_1409954639 */
2989f005ef32Sjsg wa_mcr_masked_en(wal,
29901bb76ff1Sjsg GEN8_ROW_CHICKEN,
29911bb76ff1Sjsg SYSTOLIC_DOP_CLOCK_GATING_DIS);
29921bb76ff1Sjsg
29931bb76ff1Sjsg /* Wa_1607196519 */
2994f005ef32Sjsg wa_mcr_masked_en(wal,
29951bb76ff1Sjsg GEN9_ROW_CHICKEN4,
29961bb76ff1Sjsg GEN12_DISABLE_GRF_CLEAR);
29971bb76ff1Sjsg
29981bb76ff1Sjsg /* Wa_14010449647:xehpsdv */
2999f005ef32Sjsg wa_mcr_masked_en(wal, GEN8_HALF_SLICE_CHICKEN1,
30001bb76ff1Sjsg GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE);
30011bb76ff1Sjsg }
30021bb76ff1Sjsg }
30031bb76ff1Sjsg
30041bb76ff1Sjsg static void
engine_init_workarounds(struct intel_engine_cs * engine,struct i915_wa_list * wal)3005c349dbc7Sjsg engine_init_workarounds(struct intel_engine_cs *engine, struct i915_wa_list *wal)
3006c349dbc7Sjsg {
3007f005ef32Sjsg if (GRAPHICS_VER(engine->i915) < 4)
3008c349dbc7Sjsg return;
3009c349dbc7Sjsg
30101bb76ff1Sjsg engine_fake_wa_init(engine, wal);
30111bb76ff1Sjsg
30121bb76ff1Sjsg /*
30131bb76ff1Sjsg * These are common workarounds that just need to applied
30141bb76ff1Sjsg * to a single RCS/CCS engine's workaround list since
30151bb76ff1Sjsg * they're reset as part of the general render domain reset.
30161bb76ff1Sjsg */
3017d29fdb3cSjsg if (engine->flags & I915_ENGINE_FIRST_RENDER_COMPUTE) {
30181bb76ff1Sjsg general_render_compute_wa_init(engine, wal);
3019d29fdb3cSjsg ccs_engine_wa_mode(engine, wal);
3020d29fdb3cSjsg }
30211bb76ff1Sjsg
30221bb76ff1Sjsg if (engine->class == COMPUTE_CLASS)
30231bb76ff1Sjsg ccs_engine_wa_init(engine, wal);
30241bb76ff1Sjsg else if (engine->class == RENDER_CLASS)
3025c349dbc7Sjsg rcs_engine_wa_init(engine, wal);
3026c349dbc7Sjsg else
3027c349dbc7Sjsg xcs_engine_wa_init(engine, wal);
3028c349dbc7Sjsg }
3029c349dbc7Sjsg
intel_engine_init_workarounds(struct intel_engine_cs * engine)3030c349dbc7Sjsg void intel_engine_init_workarounds(struct intel_engine_cs *engine)
3031c349dbc7Sjsg {
3032c349dbc7Sjsg struct i915_wa_list *wal = &engine->wa_list;
3033c349dbc7Sjsg
3034f005ef32Sjsg wa_init_start(wal, engine->gt, "engine", engine->name);
3035c349dbc7Sjsg engine_init_workarounds(engine, wal);
3036c349dbc7Sjsg wa_init_finish(wal);
3037c349dbc7Sjsg }
3038c349dbc7Sjsg
intel_engine_apply_workarounds(struct intel_engine_cs * engine)3039c349dbc7Sjsg void intel_engine_apply_workarounds(struct intel_engine_cs *engine)
3040c349dbc7Sjsg {
3041f005ef32Sjsg wa_list_apply(&engine->wa_list);
3042c349dbc7Sjsg }
3043c349dbc7Sjsg
30441bb76ff1Sjsg static const struct i915_range mcr_ranges_gen8[] = {
3045c349dbc7Sjsg { .start = 0x5500, .end = 0x55ff },
3046c349dbc7Sjsg { .start = 0x7000, .end = 0x7fff },
3047c349dbc7Sjsg { .start = 0x9400, .end = 0x97ff },
3048c349dbc7Sjsg { .start = 0xb000, .end = 0xb3ff },
3049c349dbc7Sjsg { .start = 0xe000, .end = 0xe7ff },
3050c349dbc7Sjsg {},
3051c349dbc7Sjsg };
3052c349dbc7Sjsg
30531bb76ff1Sjsg static const struct i915_range mcr_ranges_gen12[] = {
30545ca02815Sjsg { .start = 0x8150, .end = 0x815f },
30555ca02815Sjsg { .start = 0x9520, .end = 0x955f },
30565ca02815Sjsg { .start = 0xb100, .end = 0xb3ff },
30575ca02815Sjsg { .start = 0xde80, .end = 0xe8ff },
30585ca02815Sjsg { .start = 0x24a00, .end = 0x24a7f },
30595ca02815Sjsg {},
30605ca02815Sjsg };
30615ca02815Sjsg
30621bb76ff1Sjsg static const struct i915_range mcr_ranges_xehp[] = {
30635ca02815Sjsg { .start = 0x4000, .end = 0x4aff },
30645ca02815Sjsg { .start = 0x5200, .end = 0x52ff },
30655ca02815Sjsg { .start = 0x5400, .end = 0x7fff },
30665ca02815Sjsg { .start = 0x8140, .end = 0x815f },
30675ca02815Sjsg { .start = 0x8c80, .end = 0x8dff },
30685ca02815Sjsg { .start = 0x94d0, .end = 0x955f },
30695ca02815Sjsg { .start = 0x9680, .end = 0x96ff },
30705ca02815Sjsg { .start = 0xb000, .end = 0xb3ff },
30715ca02815Sjsg { .start = 0xc800, .end = 0xcfff },
30725ca02815Sjsg { .start = 0xd800, .end = 0xd8ff },
30735ca02815Sjsg { .start = 0xdc00, .end = 0xffff },
30745ca02815Sjsg { .start = 0x17000, .end = 0x17fff },
30755ca02815Sjsg { .start = 0x24a00, .end = 0x24a7f },
30765ca02815Sjsg {},
30775ca02815Sjsg };
30785ca02815Sjsg
mcr_range(struct drm_i915_private * i915,u32 offset)3079c349dbc7Sjsg static bool mcr_range(struct drm_i915_private *i915, u32 offset)
3080c349dbc7Sjsg {
30811bb76ff1Sjsg const struct i915_range *mcr_ranges;
3082c349dbc7Sjsg int i;
3083c349dbc7Sjsg
30845ca02815Sjsg if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50))
30855ca02815Sjsg mcr_ranges = mcr_ranges_xehp;
30865ca02815Sjsg else if (GRAPHICS_VER(i915) >= 12)
30875ca02815Sjsg mcr_ranges = mcr_ranges_gen12;
30885ca02815Sjsg else if (GRAPHICS_VER(i915) >= 8)
30895ca02815Sjsg mcr_ranges = mcr_ranges_gen8;
30905ca02815Sjsg else
3091c349dbc7Sjsg return false;
3092c349dbc7Sjsg
3093c349dbc7Sjsg /*
3094c349dbc7Sjsg * Registers in these ranges are affected by the MCR selector
3095c349dbc7Sjsg * which only controls CPU initiated MMIO. Routing does not
3096c349dbc7Sjsg * work for CS access so we cannot verify them on this path.
3097c349dbc7Sjsg */
30985ca02815Sjsg for (i = 0; mcr_ranges[i].start; i++)
30995ca02815Sjsg if (offset >= mcr_ranges[i].start &&
31005ca02815Sjsg offset <= mcr_ranges[i].end)
3101c349dbc7Sjsg return true;
3102c349dbc7Sjsg
3103c349dbc7Sjsg return false;
3104c349dbc7Sjsg }
3105c349dbc7Sjsg
3106c349dbc7Sjsg static int
wa_list_srm(struct i915_request * rq,const struct i915_wa_list * wal,struct i915_vma * vma)3107c349dbc7Sjsg wa_list_srm(struct i915_request *rq,
3108c349dbc7Sjsg const struct i915_wa_list *wal,
3109c349dbc7Sjsg struct i915_vma *vma)
3110c349dbc7Sjsg {
3111f005ef32Sjsg struct drm_i915_private *i915 = rq->i915;
3112c349dbc7Sjsg unsigned int i, count = 0;
3113c349dbc7Sjsg const struct i915_wa *wa;
3114c349dbc7Sjsg u32 srm, *cs;
3115c349dbc7Sjsg
3116c349dbc7Sjsg srm = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
31175ca02815Sjsg if (GRAPHICS_VER(i915) >= 8)
3118c349dbc7Sjsg srm++;
3119c349dbc7Sjsg
3120c349dbc7Sjsg for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
3121c349dbc7Sjsg if (!mcr_range(i915, i915_mmio_reg_offset(wa->reg)))
3122c349dbc7Sjsg count++;
3123c349dbc7Sjsg }
3124c349dbc7Sjsg
3125c349dbc7Sjsg cs = intel_ring_begin(rq, 4 * count);
3126c349dbc7Sjsg if (IS_ERR(cs))
3127c349dbc7Sjsg return PTR_ERR(cs);
3128c349dbc7Sjsg
3129c349dbc7Sjsg for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
3130c349dbc7Sjsg u32 offset = i915_mmio_reg_offset(wa->reg);
3131c349dbc7Sjsg
3132c349dbc7Sjsg if (mcr_range(i915, offset))
3133c349dbc7Sjsg continue;
3134c349dbc7Sjsg
3135c349dbc7Sjsg *cs++ = srm;
3136c349dbc7Sjsg *cs++ = offset;
3137c349dbc7Sjsg *cs++ = i915_ggtt_offset(vma) + sizeof(u32) * i;
3138c349dbc7Sjsg *cs++ = 0;
3139c349dbc7Sjsg }
3140c349dbc7Sjsg intel_ring_advance(rq, cs);
3141c349dbc7Sjsg
3142c349dbc7Sjsg return 0;
3143c349dbc7Sjsg }
3144c349dbc7Sjsg
engine_wa_list_verify(struct intel_context * ce,const struct i915_wa_list * const wal,const char * from)3145c349dbc7Sjsg static int engine_wa_list_verify(struct intel_context *ce,
3146c349dbc7Sjsg const struct i915_wa_list * const wal,
3147c349dbc7Sjsg const char *from)
3148c349dbc7Sjsg {
3149c349dbc7Sjsg const struct i915_wa *wa;
3150c349dbc7Sjsg struct i915_request *rq;
3151c349dbc7Sjsg struct i915_vma *vma;
3152ad8b1aafSjsg struct i915_gem_ww_ctx ww;
3153c349dbc7Sjsg unsigned int i;
3154c349dbc7Sjsg u32 *results;
3155c349dbc7Sjsg int err;
3156c349dbc7Sjsg
3157c349dbc7Sjsg if (!wal->count)
3158c349dbc7Sjsg return 0;
3159c349dbc7Sjsg
31605ca02815Sjsg vma = __vm_create_scratch_for_read(&ce->engine->gt->ggtt->vm,
31615ca02815Sjsg wal->count * sizeof(u32));
3162c349dbc7Sjsg if (IS_ERR(vma))
3163c349dbc7Sjsg return PTR_ERR(vma);
3164c349dbc7Sjsg
3165c349dbc7Sjsg intel_engine_pm_get(ce->engine);
3166ad8b1aafSjsg i915_gem_ww_ctx_init(&ww, false);
3167ad8b1aafSjsg retry:
3168ad8b1aafSjsg err = i915_gem_object_lock(vma->obj, &ww);
3169ad8b1aafSjsg if (err == 0)
3170ad8b1aafSjsg err = intel_context_pin_ww(ce, &ww);
3171ad8b1aafSjsg if (err)
3172ad8b1aafSjsg goto err_pm;
3173ad8b1aafSjsg
31745ca02815Sjsg err = i915_vma_pin_ww(vma, &ww, 0, 0,
31755ca02815Sjsg i915_vma_is_ggtt(vma) ? PIN_GLOBAL : PIN_USER);
31765ca02815Sjsg if (err)
31775ca02815Sjsg goto err_unpin;
31785ca02815Sjsg
3179ad8b1aafSjsg rq = i915_request_create(ce);
3180c349dbc7Sjsg if (IS_ERR(rq)) {
3181c349dbc7Sjsg err = PTR_ERR(rq);
31825ca02815Sjsg goto err_vma;
3183c349dbc7Sjsg }
3184c349dbc7Sjsg
3185c349dbc7Sjsg err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
3186ad8b1aafSjsg if (err == 0)
3187c349dbc7Sjsg err = wa_list_srm(rq, wal, vma);
3188c349dbc7Sjsg
3189c349dbc7Sjsg i915_request_get(rq);
3190ad8b1aafSjsg if (err)
3191ad8b1aafSjsg i915_request_set_error_once(rq, err);
3192c349dbc7Sjsg i915_request_add(rq);
3193ad8b1aafSjsg
3194ad8b1aafSjsg if (err)
3195ad8b1aafSjsg goto err_rq;
3196ad8b1aafSjsg
3197c349dbc7Sjsg if (i915_request_wait(rq, 0, HZ / 5) < 0) {
3198c349dbc7Sjsg err = -ETIME;
3199c349dbc7Sjsg goto err_rq;
3200c349dbc7Sjsg }
3201c349dbc7Sjsg
3202c349dbc7Sjsg results = i915_gem_object_pin_map(vma->obj, I915_MAP_WB);
3203c349dbc7Sjsg if (IS_ERR(results)) {
3204c349dbc7Sjsg err = PTR_ERR(results);
3205c349dbc7Sjsg goto err_rq;
3206c349dbc7Sjsg }
3207c349dbc7Sjsg
3208c349dbc7Sjsg err = 0;
3209c349dbc7Sjsg for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
3210f005ef32Sjsg if (mcr_range(rq->i915, i915_mmio_reg_offset(wa->reg)))
3211c349dbc7Sjsg continue;
3212c349dbc7Sjsg
3213f005ef32Sjsg if (!wa_verify(wal->gt, wa, results[i], wal->name, from))
3214c349dbc7Sjsg err = -ENXIO;
3215c349dbc7Sjsg }
3216c349dbc7Sjsg
3217c349dbc7Sjsg i915_gem_object_unpin_map(vma->obj);
3218c349dbc7Sjsg
3219c349dbc7Sjsg err_rq:
3220c349dbc7Sjsg i915_request_put(rq);
32215ca02815Sjsg err_vma:
32225ca02815Sjsg i915_vma_unpin(vma);
3223ad8b1aafSjsg err_unpin:
3224ad8b1aafSjsg intel_context_unpin(ce);
3225ad8b1aafSjsg err_pm:
3226ad8b1aafSjsg if (err == -EDEADLK) {
3227ad8b1aafSjsg err = i915_gem_ww_ctx_backoff(&ww);
3228ad8b1aafSjsg if (!err)
3229ad8b1aafSjsg goto retry;
3230ad8b1aafSjsg }
3231ad8b1aafSjsg i915_gem_ww_ctx_fini(&ww);
3232ad8b1aafSjsg intel_engine_pm_put(ce->engine);
3233c349dbc7Sjsg i915_vma_put(vma);
3234c349dbc7Sjsg return err;
3235c349dbc7Sjsg }
3236c349dbc7Sjsg
intel_engine_verify_workarounds(struct intel_engine_cs * engine,const char * from)3237c349dbc7Sjsg int intel_engine_verify_workarounds(struct intel_engine_cs *engine,
3238c349dbc7Sjsg const char *from)
3239c349dbc7Sjsg {
3240c349dbc7Sjsg return engine_wa_list_verify(engine->kernel_context,
3241c349dbc7Sjsg &engine->wa_list,
3242c349dbc7Sjsg from);
3243c349dbc7Sjsg }
3244c349dbc7Sjsg
3245c349dbc7Sjsg #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
3246c349dbc7Sjsg #include "selftest_workarounds.c"
3247c349dbc7Sjsg #endif
3248