1 /* $NetBSD: intel_workarounds.c,v 1.3 2021/12/19 11:49:11 riastradh Exp $ */
2
3 /*
4 * SPDX-License-Identifier: MIT
5 *
6 * Copyright © 2014-2018 Intel Corporation
7 */
8
9 #include <sys/cdefs.h>
10 __KERNEL_RCSID(0, "$NetBSD: intel_workarounds.c,v 1.3 2021/12/19 11:49:11 riastradh Exp $");
11
12 #include "i915_drv.h"
13 #include "intel_context.h"
14 #include "intel_engine_pm.h"
15 #include "intel_gt.h"
16 #include "intel_ring.h"
17 #include "intel_workarounds.h"
18
19 #include <linux/nbsd-namespace.h>
20
21 /**
22 * DOC: Hardware workarounds
23 *
24 * This file is intended as a central place to implement most [1]_ of the
25 * required workarounds for hardware to work as originally intended. They fall
26 * in five basic categories depending on how/when they are applied:
27 *
28 * - Workarounds that touch registers that are saved/restored to/from the HW
29 * context image. The list is emitted (via Load Register Immediate commands)
30 * everytime a new context is created.
31 * - GT workarounds. The list of these WAs is applied whenever these registers
32 * revert to default values (on GPU reset, suspend/resume [2]_, etc..).
33 * - Display workarounds. The list is applied during display clock-gating
34 * initialization.
35 * - Workarounds that whitelist a privileged register, so that UMDs can manage
36 * them directly. This is just a special case of a MMMIO workaround (as we
37 * write the list of these to/be-whitelisted registers to some special HW
38 * registers).
39 * - Workaround batchbuffers, that get executed automatically by the hardware
40 * on every HW context restore.
41 *
42 * .. [1] Please notice that there are other WAs that, due to their nature,
43 * cannot be applied from a central place. Those are peppered around the rest
44 * of the code, as needed.
45 *
46 * .. [2] Technically, some registers are powercontext saved & restored, so they
47 * survive a suspend/resume. In practice, writing them again is not too
48 * costly and simplifies things. We can revisit this in the future.
49 *
50 * Layout
51 * ~~~~~~
52 *
53 * Keep things in this file ordered by WA type, as per the above (context, GT,
54 * display, register whitelist, batchbuffer). Then, inside each type, keep the
55 * following order:
56 *
57 * - Infrastructure functions and macros
58 * - WAs per platform in standard gen/chrono order
59 * - Public functions to init or apply the given workaround type.
60 */
61
wa_init_start(struct i915_wa_list * wal,const char * name,const char * engine_name)62 static void wa_init_start(struct i915_wa_list *wal, const char *name, const char *engine_name)
63 {
64 wal->name = name;
65 wal->engine_name = engine_name;
66 }
67
68 #define WA_LIST_CHUNK (1 << 4)
69
wa_init_finish(struct i915_wa_list * wal)70 static void wa_init_finish(struct i915_wa_list *wal)
71 {
72 /* Trim unused entries. */
73 if (!IS_ALIGNED(wal->count, WA_LIST_CHUNK)) {
74 struct i915_wa *list = kmemdup(wal->list,
75 wal->count * sizeof(*list),
76 GFP_KERNEL);
77
78 if (list) {
79 kfree(wal->list);
80 wal->list = list;
81 }
82 }
83
84 if (!wal->count)
85 return;
86
87 DRM_DEBUG_DRIVER("Initialized %u %s workarounds on %s\n",
88 wal->wa_count, wal->name, wal->engine_name);
89 }
90
_wa_add(struct i915_wa_list * wal,const struct i915_wa * wa)91 static void _wa_add(struct i915_wa_list *wal, const struct i915_wa *wa)
92 {
93 unsigned int addr = i915_mmio_reg_offset(wa->reg);
94 unsigned int start = 0, end = wal->count;
95 const unsigned int grow = WA_LIST_CHUNK;
96 struct i915_wa *wa_;
97
98 GEM_BUG_ON(!is_power_of_2(grow));
99
100 if (IS_ALIGNED(wal->count, grow)) { /* Either uninitialized or full. */
101 struct i915_wa *list;
102
103 list = kmalloc_array(ALIGN(wal->count + 1, grow), sizeof(*wa),
104 GFP_KERNEL);
105 if (!list) {
106 DRM_ERROR("No space for workaround init!\n");
107 return;
108 }
109
110 if (wal->list)
111 memcpy(list, wal->list, sizeof(*wa) * wal->count);
112
113 wal->list = list;
114 }
115
116 while (start < end) {
117 unsigned int mid = start + (end - start) / 2;
118
119 if (i915_mmio_reg_offset(wal->list[mid].reg) < addr) {
120 start = mid + 1;
121 } else if (i915_mmio_reg_offset(wal->list[mid].reg) > addr) {
122 end = mid;
123 } else {
124 wa_ = &wal->list[mid];
125
126 if ((wa->mask & ~wa_->mask) == 0) {
127 DRM_ERROR("Discarding overwritten w/a for reg %04x (mask: %08x, value: %08x)\n",
128 i915_mmio_reg_offset(wa_->reg),
129 wa_->mask, wa_->val);
130
131 wa_->val &= ~wa->mask;
132 }
133
134 wal->wa_count++;
135 wa_->val |= wa->val;
136 wa_->mask |= wa->mask;
137 wa_->read |= wa->read;
138 return;
139 }
140 }
141
142 wal->wa_count++;
143 wa_ = &wal->list[wal->count++];
144 *wa_ = *wa;
145
146 while (wa_-- > wal->list) {
147 GEM_BUG_ON(i915_mmio_reg_offset(wa_[0].reg) ==
148 i915_mmio_reg_offset(wa_[1].reg));
149 if (i915_mmio_reg_offset(wa_[1].reg) >
150 i915_mmio_reg_offset(wa_[0].reg))
151 break;
152
153 swap(wa_[1], wa_[0]);
154 }
155 }
156
wa_add(struct i915_wa_list * wal,i915_reg_t reg,u32 mask,u32 val,u32 read_mask)157 static void wa_add(struct i915_wa_list *wal, i915_reg_t reg, u32 mask,
158 u32 val, u32 read_mask)
159 {
160 struct i915_wa wa = {
161 .reg = reg,
162 .mask = mask,
163 .val = val,
164 .read = read_mask,
165 };
166
167 _wa_add(wal, &wa);
168 }
169
170 static void
wa_write_masked_or(struct i915_wa_list * wal,i915_reg_t reg,u32 mask,u32 val)171 wa_write_masked_or(struct i915_wa_list *wal, i915_reg_t reg, u32 mask,
172 u32 val)
173 {
174 wa_add(wal, reg, mask, val, mask);
175 }
176
177 static void
wa_masked_en(struct i915_wa_list * wal,i915_reg_t reg,u32 val)178 wa_masked_en(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
179 {
180 wa_write_masked_or(wal, reg, val, _MASKED_BIT_ENABLE(val));
181 }
182
183 static void
wa_write(struct i915_wa_list * wal,i915_reg_t reg,u32 val)184 wa_write(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
185 {
186 wa_write_masked_or(wal, reg, ~0, val);
187 }
188
189 static void
wa_write_or(struct i915_wa_list * wal,i915_reg_t reg,u32 val)190 wa_write_or(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
191 {
192 wa_write_masked_or(wal, reg, val, val);
193 }
194
195 #define WA_SET_BIT_MASKED(addr, mask) \
196 wa_write_masked_or(wal, (addr), (mask), _MASKED_BIT_ENABLE(mask))
197
198 #define WA_CLR_BIT_MASKED(addr, mask) \
199 wa_write_masked_or(wal, (addr), (mask), _MASKED_BIT_DISABLE(mask))
200
201 #define WA_SET_FIELD_MASKED(addr, mask, value) \
202 wa_write_masked_or(wal, (addr), (mask), _MASKED_FIELD((mask), (value)))
203
gen8_ctx_workarounds_init(struct intel_engine_cs * engine,struct i915_wa_list * wal)204 static void gen8_ctx_workarounds_init(struct intel_engine_cs *engine,
205 struct i915_wa_list *wal)
206 {
207 WA_SET_BIT_MASKED(INSTPM, INSTPM_FORCE_ORDERING);
208
209 /* WaDisableAsyncFlipPerfMode:bdw,chv */
210 WA_SET_BIT_MASKED(MI_MODE, ASYNC_FLIP_PERF_DISABLE);
211
212 /* WaDisablePartialInstShootdown:bdw,chv */
213 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
214 PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
215
216 /* Use Force Non-Coherent whenever executing a 3D context. This is a
217 * workaround for for a possible hang in the unlikely event a TLB
218 * invalidation occurs during a PSD flush.
219 */
220 /* WaForceEnableNonCoherent:bdw,chv */
221 /* WaHdcDisableFetchWhenMasked:bdw,chv */
222 WA_SET_BIT_MASKED(HDC_CHICKEN0,
223 HDC_DONOT_FETCH_MEM_WHEN_MASKED |
224 HDC_FORCE_NON_COHERENT);
225
226 /* From the Haswell PRM, Command Reference: Registers, CACHE_MODE_0:
227 * "The Hierarchical Z RAW Stall Optimization allows non-overlapping
228 * polygons in the same 8x4 pixel/sample area to be processed without
229 * stalling waiting for the earlier ones to write to Hierarchical Z
230 * buffer."
231 *
232 * This optimization is off by default for BDW and CHV; turn it on.
233 */
234 WA_CLR_BIT_MASKED(CACHE_MODE_0_GEN7, HIZ_RAW_STALL_OPT_DISABLE);
235
236 /* Wa4x4STCOptimizationDisable:bdw,chv */
237 WA_SET_BIT_MASKED(CACHE_MODE_1, GEN8_4x4_STC_OPTIMIZATION_DISABLE);
238
239 /*
240 * BSpec recommends 8x4 when MSAA is used,
241 * however in practice 16x4 seems fastest.
242 *
243 * Note that PS/WM thread counts depend on the WIZ hashing
244 * disable bit, which we don't touch here, but it's good
245 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
246 */
247 WA_SET_FIELD_MASKED(GEN7_GT_MODE,
248 GEN6_WIZ_HASHING_MASK,
249 GEN6_WIZ_HASHING_16x4);
250 }
251
bdw_ctx_workarounds_init(struct intel_engine_cs * engine,struct i915_wa_list * wal)252 static void bdw_ctx_workarounds_init(struct intel_engine_cs *engine,
253 struct i915_wa_list *wal)
254 {
255 struct drm_i915_private *i915 = engine->i915;
256
257 gen8_ctx_workarounds_init(engine, wal);
258
259 /* WaDisableThreadStallDopClockGating:bdw (pre-production) */
260 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
261
262 /* WaDisableDopClockGating:bdw
263 *
264 * Also see the related UCGTCL1 write in bdw_init_clock_gating()
265 * to disable EUTC clock gating.
266 */
267 WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2,
268 DOP_CLOCK_GATING_DISABLE);
269
270 WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
271 GEN8_SAMPLER_POWER_BYPASS_DIS);
272
273 WA_SET_BIT_MASKED(HDC_CHICKEN0,
274 /* WaForceContextSaveRestoreNonCoherent:bdw */
275 HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT |
276 /* WaDisableFenceDestinationToSLM:bdw (pre-prod) */
277 (IS_BDW_GT3(i915) ? HDC_FENCE_DEST_SLM_DISABLE : 0));
278 }
279
chv_ctx_workarounds_init(struct intel_engine_cs * engine,struct i915_wa_list * wal)280 static void chv_ctx_workarounds_init(struct intel_engine_cs *engine,
281 struct i915_wa_list *wal)
282 {
283 gen8_ctx_workarounds_init(engine, wal);
284
285 /* WaDisableThreadStallDopClockGating:chv */
286 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
287
288 /* Improve HiZ throughput on CHV. */
289 WA_SET_BIT_MASKED(HIZ_CHICKEN, CHV_HZ_8X8_MODE_IN_1X);
290 }
291
gen9_ctx_workarounds_init(struct intel_engine_cs * engine,struct i915_wa_list * wal)292 static void gen9_ctx_workarounds_init(struct intel_engine_cs *engine,
293 struct i915_wa_list *wal)
294 {
295 struct drm_i915_private *i915 = engine->i915;
296
297 if (HAS_LLC(i915)) {
298 /* WaCompressedResourceSamplerPbeMediaNewHashMode:skl,kbl
299 *
300 * Must match Display Engine. See
301 * WaCompressedResourceDisplayNewHashMode.
302 */
303 WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
304 GEN9_PBE_COMPRESSED_HASH_SELECTION);
305 WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7,
306 GEN9_SAMPLER_HASH_COMPRESSED_READ_ADDR);
307 }
308
309 /* WaClearFlowControlGpgpuContextSave:skl,bxt,kbl,glk,cfl */
310 /* WaDisablePartialInstShootdown:skl,bxt,kbl,glk,cfl */
311 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
312 FLOW_CONTROL_ENABLE |
313 PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
314
315 /* WaEnableYV12BugFixInHalfSliceChicken7:skl,bxt,kbl,glk,cfl */
316 /* WaEnableSamplerGPGPUPreemptionSupport:skl,bxt,kbl,cfl */
317 WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7,
318 GEN9_ENABLE_YV12_BUGFIX |
319 GEN9_ENABLE_GPGPU_PREEMPTION);
320
321 /* Wa4x4STCOptimizationDisable:skl,bxt,kbl,glk,cfl */
322 /* WaDisablePartialResolveInVc:skl,bxt,kbl,cfl */
323 WA_SET_BIT_MASKED(CACHE_MODE_1,
324 GEN8_4x4_STC_OPTIMIZATION_DISABLE |
325 GEN9_PARTIAL_RESOLVE_IN_VC_DISABLE);
326
327 /* WaCcsTlbPrefetchDisable:skl,bxt,kbl,glk,cfl */
328 WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5,
329 GEN9_CCS_TLB_PREFETCH_ENABLE);
330
331 /* WaForceContextSaveRestoreNonCoherent:skl,bxt,kbl,cfl */
332 WA_SET_BIT_MASKED(HDC_CHICKEN0,
333 HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT |
334 HDC_FORCE_CSR_NON_COHERENT_OVR_DISABLE);
335
336 /* WaForceEnableNonCoherent and WaDisableHDCInvalidation are
337 * both tied to WaForceContextSaveRestoreNonCoherent
338 * in some hsds for skl. We keep the tie for all gen9. The
339 * documentation is a bit hazy and so we want to get common behaviour,
340 * even though there is no clear evidence we would need both on kbl/bxt.
341 * This area has been source of system hangs so we play it safe
342 * and mimic the skl regardless of what bspec says.
343 *
344 * Use Force Non-Coherent whenever executing a 3D context. This
345 * is a workaround for a possible hang in the unlikely event
346 * a TLB invalidation occurs during a PSD flush.
347 */
348
349 /* WaForceEnableNonCoherent:skl,bxt,kbl,cfl */
350 WA_SET_BIT_MASKED(HDC_CHICKEN0,
351 HDC_FORCE_NON_COHERENT);
352
353 /* WaDisableSamplerPowerBypassForSOPingPong:skl,bxt,kbl,cfl */
354 if (IS_SKYLAKE(i915) || IS_KABYLAKE(i915) || IS_COFFEELAKE(i915))
355 WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
356 GEN8_SAMPLER_POWER_BYPASS_DIS);
357
358 /* WaDisableSTUnitPowerOptimization:skl,bxt,kbl,glk,cfl */
359 WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN2, GEN8_ST_PO_DISABLE);
360
361 /*
362 * Supporting preemption with fine-granularity requires changes in the
363 * batch buffer programming. Since we can't break old userspace, we
364 * need to set our default preemption level to safe value. Userspace is
365 * still able to use more fine-grained preemption levels, since in
366 * WaEnablePreemptionGranularityControlByUMD we're whitelisting the
367 * per-ctx register. As such, WaDisable{3D,GPGPU}MidCmdPreemption are
368 * not real HW workarounds, but merely a way to start using preemption
369 * while maintaining old contract with userspace.
370 */
371
372 /* WaDisable3DMidCmdPreemption:skl,bxt,glk,cfl,[cnl] */
373 WA_CLR_BIT_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_3D_OBJECT_LEVEL);
374
375 /* WaDisableGPGPUMidCmdPreemption:skl,bxt,blk,cfl,[cnl] */
376 WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1,
377 GEN9_PREEMPT_GPGPU_LEVEL_MASK,
378 GEN9_PREEMPT_GPGPU_COMMAND_LEVEL);
379
380 /* WaClearHIZ_WM_CHICKEN3:bxt,glk */
381 if (IS_GEN9_LP(i915))
382 WA_SET_BIT_MASKED(GEN9_WM_CHICKEN3, GEN9_FACTOR_IN_CLR_VAL_HIZ);
383 }
384
skl_tune_iz_hashing(struct intel_engine_cs * engine,struct i915_wa_list * wal)385 static void skl_tune_iz_hashing(struct intel_engine_cs *engine,
386 struct i915_wa_list *wal)
387 {
388 struct drm_i915_private *i915 = engine->i915;
389 u8 vals[3] = { 0, 0, 0 };
390 unsigned int i;
391
392 for (i = 0; i < 3; i++) {
393 u8 ss;
394
395 /*
396 * Only consider slices where one, and only one, subslice has 7
397 * EUs
398 */
399 if (!is_power_of_2(RUNTIME_INFO(i915)->sseu.subslice_7eu[i]))
400 continue;
401
402 /*
403 * subslice_7eu[i] != 0 (because of the check above) and
404 * ss_max == 4 (maximum number of subslices possible per slice)
405 *
406 * -> 0 <= ss <= 3;
407 */
408 ss = ffs(RUNTIME_INFO(i915)->sseu.subslice_7eu[i]) - 1;
409 vals[i] = 3 - ss;
410 }
411
412 if (vals[0] == 0 && vals[1] == 0 && vals[2] == 0)
413 return;
414
415 /* Tune IZ hashing. See intel_device_info_runtime_init() */
416 WA_SET_FIELD_MASKED(GEN7_GT_MODE,
417 GEN9_IZ_HASHING_MASK(2) |
418 GEN9_IZ_HASHING_MASK(1) |
419 GEN9_IZ_HASHING_MASK(0),
420 GEN9_IZ_HASHING(2, vals[2]) |
421 GEN9_IZ_HASHING(1, vals[1]) |
422 GEN9_IZ_HASHING(0, vals[0]));
423 }
424
skl_ctx_workarounds_init(struct intel_engine_cs * engine,struct i915_wa_list * wal)425 static void skl_ctx_workarounds_init(struct intel_engine_cs *engine,
426 struct i915_wa_list *wal)
427 {
428 gen9_ctx_workarounds_init(engine, wal);
429 skl_tune_iz_hashing(engine, wal);
430 }
431
bxt_ctx_workarounds_init(struct intel_engine_cs * engine,struct i915_wa_list * wal)432 static void bxt_ctx_workarounds_init(struct intel_engine_cs *engine,
433 struct i915_wa_list *wal)
434 {
435 gen9_ctx_workarounds_init(engine, wal);
436
437 /* WaDisableThreadStallDopClockGating:bxt */
438 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
439 STALL_DOP_GATING_DISABLE);
440
441 /* WaToEnableHwFixForPushConstHWBug:bxt */
442 WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
443 GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
444 }
445
kbl_ctx_workarounds_init(struct intel_engine_cs * engine,struct i915_wa_list * wal)446 static void kbl_ctx_workarounds_init(struct intel_engine_cs *engine,
447 struct i915_wa_list *wal)
448 {
449 struct drm_i915_private *i915 = engine->i915;
450
451 gen9_ctx_workarounds_init(engine, wal);
452
453 /* WaToEnableHwFixForPushConstHWBug:kbl */
454 if (IS_KBL_REVID(i915, KBL_REVID_C0, REVID_FOREVER))
455 WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
456 GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
457
458 /* WaDisableSbeCacheDispatchPortSharing:kbl */
459 WA_SET_BIT_MASKED(GEN7_HALF_SLICE_CHICKEN1,
460 GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
461 }
462
glk_ctx_workarounds_init(struct intel_engine_cs * engine,struct i915_wa_list * wal)463 static void glk_ctx_workarounds_init(struct intel_engine_cs *engine,
464 struct i915_wa_list *wal)
465 {
466 gen9_ctx_workarounds_init(engine, wal);
467
468 /* WaToEnableHwFixForPushConstHWBug:glk */
469 WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
470 GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
471 }
472
cfl_ctx_workarounds_init(struct intel_engine_cs * engine,struct i915_wa_list * wal)473 static void cfl_ctx_workarounds_init(struct intel_engine_cs *engine,
474 struct i915_wa_list *wal)
475 {
476 gen9_ctx_workarounds_init(engine, wal);
477
478 /* WaToEnableHwFixForPushConstHWBug:cfl */
479 WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
480 GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
481
482 /* WaDisableSbeCacheDispatchPortSharing:cfl */
483 WA_SET_BIT_MASKED(GEN7_HALF_SLICE_CHICKEN1,
484 GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
485 }
486
cnl_ctx_workarounds_init(struct intel_engine_cs * engine,struct i915_wa_list * wal)487 static void cnl_ctx_workarounds_init(struct intel_engine_cs *engine,
488 struct i915_wa_list *wal)
489 {
490 struct drm_i915_private *i915 = engine->i915;
491
492 /* WaForceContextSaveRestoreNonCoherent:cnl */
493 WA_SET_BIT_MASKED(CNL_HDC_CHICKEN0,
494 HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT);
495
496 /* WaThrottleEUPerfToAvoidTDBackPressure:cnl(pre-prod) */
497 if (IS_CNL_REVID(i915, CNL_REVID_B0, CNL_REVID_B0))
498 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, THROTTLE_12_5);
499
500 /* WaDisableReplayBufferBankArbitrationOptimization:cnl */
501 WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
502 GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
503
504 /* WaDisableEnhancedSBEVertexCaching:cnl (pre-prod) */
505 if (IS_CNL_REVID(i915, 0, CNL_REVID_B0))
506 WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
507 GEN8_CSC2_SBE_VUE_CACHE_CONSERVATIVE);
508
509 /* WaPushConstantDereferenceHoldDisable:cnl */
510 WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2, PUSH_CONSTANT_DEREF_DISABLE);
511
512 /* FtrEnableFastAnisoL1BankingFix:cnl */
513 WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, CNL_FAST_ANISO_L1_BANKING_FIX);
514
515 /* WaDisable3DMidCmdPreemption:cnl */
516 WA_CLR_BIT_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_3D_OBJECT_LEVEL);
517
518 /* WaDisableGPGPUMidCmdPreemption:cnl */
519 WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1,
520 GEN9_PREEMPT_GPGPU_LEVEL_MASK,
521 GEN9_PREEMPT_GPGPU_COMMAND_LEVEL);
522
523 /* WaDisableEarlyEOT:cnl */
524 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, DISABLE_EARLY_EOT);
525 }
526
icl_ctx_workarounds_init(struct intel_engine_cs * engine,struct i915_wa_list * wal)527 static void icl_ctx_workarounds_init(struct intel_engine_cs *engine,
528 struct i915_wa_list *wal)
529 {
530 struct drm_i915_private *i915 = engine->i915;
531
532 /* WaDisableBankHangMode:icl */
533 wa_write(wal,
534 GEN8_L3CNTLREG,
535 intel_uncore_read(engine->uncore, GEN8_L3CNTLREG) |
536 GEN8_ERRDETBCTRL);
537
538 /* Wa_1604370585:icl (pre-prod)
539 * Formerly known as WaPushConstantDereferenceHoldDisable
540 */
541 if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_B0))
542 WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2,
543 PUSH_CONSTANT_DEREF_DISABLE);
544
545 /* WaForceEnableNonCoherent:icl
546 * This is not the same workaround as in early Gen9 platforms, where
547 * lacking this could cause system hangs, but coherency performance
548 * overhead is high and only a few compute workloads really need it
549 * (the register is whitelisted in hardware now, so UMDs can opt in
550 * for coherency if they have a good reason).
551 */
552 WA_SET_BIT_MASKED(ICL_HDC_MODE, HDC_FORCE_NON_COHERENT);
553
554 /* Wa_2006611047:icl (pre-prod)
555 * Formerly known as WaDisableImprovedTdlClkGating
556 */
557 if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_A0))
558 WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2,
559 GEN11_TDL_CLOCK_GATING_FIX_DISABLE);
560
561 /* Wa_2006665173:icl (pre-prod) */
562 if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_A0))
563 WA_SET_BIT_MASKED(GEN11_COMMON_SLICE_CHICKEN3,
564 GEN11_BLEND_EMB_FIX_DISABLE_IN_RCC);
565
566 /* WaEnableFloatBlendOptimization:icl */
567 wa_write_masked_or(wal,
568 GEN10_CACHE_MODE_SS,
569 0, /* write-only, so skip validation */
570 _MASKED_BIT_ENABLE(FLOAT_BLEND_OPTIMIZATION_ENABLE));
571
572 /* WaDisableGPGPUMidThreadPreemption:icl */
573 WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1,
574 GEN9_PREEMPT_GPGPU_LEVEL_MASK,
575 GEN9_PREEMPT_GPGPU_THREAD_GROUP_LEVEL);
576
577 /* allow headerless messages for preemptible GPGPU context */
578 WA_SET_BIT_MASKED(GEN10_SAMPLER_MODE,
579 GEN11_SAMPLER_ENABLE_HEADLESS_MSG);
580 }
581
tgl_ctx_workarounds_init(struct intel_engine_cs * engine,struct i915_wa_list * wal)582 static void tgl_ctx_workarounds_init(struct intel_engine_cs *engine,
583 struct i915_wa_list *wal)
584 {
585 u32 val;
586
587 /* Wa_1409142259:tgl */
588 WA_SET_BIT_MASKED(GEN11_COMMON_SLICE_CHICKEN3,
589 GEN12_DISABLE_CPS_AWARE_COLOR_PIPE);
590
591 /* Wa_1604555607:tgl */
592 val = intel_uncore_read(engine->uncore, FF_MODE2);
593 val &= ~FF_MODE2_TDS_TIMER_MASK;
594 val |= FF_MODE2_TDS_TIMER_128;
595 /*
596 * FIXME: FF_MODE2 register is not readable till TGL B0. We can
597 * enable verification of WA from the later steppings, which enables
598 * the read of FF_MODE2.
599 */
600 wa_add(wal, FF_MODE2, FF_MODE2_TDS_TIMER_MASK, val,
601 IS_TGL_REVID(engine->i915, TGL_REVID_A0, TGL_REVID_A0) ? 0 :
602 FF_MODE2_TDS_TIMER_MASK);
603 }
604
605 static void
__intel_engine_init_ctx_wa(struct intel_engine_cs * engine,struct i915_wa_list * wal,const char * name)606 __intel_engine_init_ctx_wa(struct intel_engine_cs *engine,
607 struct i915_wa_list *wal,
608 const char *name)
609 {
610 struct drm_i915_private *i915 = engine->i915;
611
612 if (engine->class != RENDER_CLASS)
613 return;
614
615 wa_init_start(wal, name, engine->name);
616
617 if (IS_GEN(i915, 12))
618 tgl_ctx_workarounds_init(engine, wal);
619 else if (IS_GEN(i915, 11))
620 icl_ctx_workarounds_init(engine, wal);
621 else if (IS_CANNONLAKE(i915))
622 cnl_ctx_workarounds_init(engine, wal);
623 else if (IS_COFFEELAKE(i915))
624 cfl_ctx_workarounds_init(engine, wal);
625 else if (IS_GEMINILAKE(i915))
626 glk_ctx_workarounds_init(engine, wal);
627 else if (IS_KABYLAKE(i915))
628 kbl_ctx_workarounds_init(engine, wal);
629 else if (IS_BROXTON(i915))
630 bxt_ctx_workarounds_init(engine, wal);
631 else if (IS_SKYLAKE(i915))
632 skl_ctx_workarounds_init(engine, wal);
633 else if (IS_CHERRYVIEW(i915))
634 chv_ctx_workarounds_init(engine, wal);
635 else if (IS_BROADWELL(i915))
636 bdw_ctx_workarounds_init(engine, wal);
637 else if (INTEL_GEN(i915) < 8)
638 return;
639 else
640 MISSING_CASE(INTEL_GEN(i915));
641
642 wa_init_finish(wal);
643 }
644
intel_engine_init_ctx_wa(struct intel_engine_cs * engine)645 void intel_engine_init_ctx_wa(struct intel_engine_cs *engine)
646 {
647 __intel_engine_init_ctx_wa(engine, &engine->ctx_wa_list, "context");
648 }
649
intel_engine_emit_ctx_wa(struct i915_request * rq)650 int intel_engine_emit_ctx_wa(struct i915_request *rq)
651 {
652 struct i915_wa_list *wal = &rq->engine->ctx_wa_list;
653 struct i915_wa *wa;
654 unsigned int i;
655 u32 *cs;
656 int ret;
657
658 if (wal->count == 0)
659 return 0;
660
661 ret = rq->engine->emit_flush(rq, EMIT_BARRIER);
662 if (ret)
663 return ret;
664
665 cs = intel_ring_begin(rq, (wal->count * 2 + 2));
666 if (IS_ERR(cs))
667 return PTR_ERR(cs);
668
669 *cs++ = MI_LOAD_REGISTER_IMM(wal->count);
670 for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
671 *cs++ = i915_mmio_reg_offset(wa->reg);
672 *cs++ = wa->val;
673 }
674 *cs++ = MI_NOOP;
675
676 intel_ring_advance(rq, cs);
677
678 ret = rq->engine->emit_flush(rq, EMIT_BARRIER);
679 if (ret)
680 return ret;
681
682 return 0;
683 }
684
685 static void
gen9_gt_workarounds_init(struct drm_i915_private * i915,struct i915_wa_list * wal)686 gen9_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
687 {
688 /* WaDisableKillLogic:bxt,skl,kbl */
689 if (!IS_COFFEELAKE(i915))
690 wa_write_or(wal,
691 GAM_ECOCHK,
692 ECOCHK_DIS_TLB);
693
694 if (HAS_LLC(i915)) {
695 /* WaCompressedResourceSamplerPbeMediaNewHashMode:skl,kbl
696 *
697 * Must match Display Engine. See
698 * WaCompressedResourceDisplayNewHashMode.
699 */
700 wa_write_or(wal,
701 MMCD_MISC_CTRL,
702 MMCD_PCLA | MMCD_HOTSPOT_EN);
703 }
704
705 /* WaDisableHDCInvalidation:skl,bxt,kbl,cfl */
706 wa_write_or(wal,
707 GAM_ECOCHK,
708 BDW_DISABLE_HDC_INVALIDATION);
709 }
710
711 static void
skl_gt_workarounds_init(struct drm_i915_private * i915,struct i915_wa_list * wal)712 skl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
713 {
714 gen9_gt_workarounds_init(i915, wal);
715
716 /* WaDisableGafsUnitClkGating:skl */
717 wa_write_or(wal,
718 GEN7_UCGCTL4,
719 GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
720
721 /* WaInPlaceDecompressionHang:skl */
722 if (IS_SKL_REVID(i915, SKL_REVID_H0, REVID_FOREVER))
723 wa_write_or(wal,
724 GEN9_GAMT_ECO_REG_RW_IA,
725 GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
726 }
727
728 static void
bxt_gt_workarounds_init(struct drm_i915_private * i915,struct i915_wa_list * wal)729 bxt_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
730 {
731 gen9_gt_workarounds_init(i915, wal);
732
733 /* WaInPlaceDecompressionHang:bxt */
734 wa_write_or(wal,
735 GEN9_GAMT_ECO_REG_RW_IA,
736 GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
737 }
738
739 static void
kbl_gt_workarounds_init(struct drm_i915_private * i915,struct i915_wa_list * wal)740 kbl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
741 {
742 gen9_gt_workarounds_init(i915, wal);
743
744 /* WaDisableDynamicCreditSharing:kbl */
745 if (IS_KBL_REVID(i915, 0, KBL_REVID_B0))
746 wa_write_or(wal,
747 GAMT_CHKN_BIT_REG,
748 GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING);
749
750 /* WaDisableGafsUnitClkGating:kbl */
751 wa_write_or(wal,
752 GEN7_UCGCTL4,
753 GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
754
755 /* WaInPlaceDecompressionHang:kbl */
756 wa_write_or(wal,
757 GEN9_GAMT_ECO_REG_RW_IA,
758 GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
759 }
760
761 static void
glk_gt_workarounds_init(struct drm_i915_private * i915,struct i915_wa_list * wal)762 glk_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
763 {
764 gen9_gt_workarounds_init(i915, wal);
765 }
766
767 static void
cfl_gt_workarounds_init(struct drm_i915_private * i915,struct i915_wa_list * wal)768 cfl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
769 {
770 gen9_gt_workarounds_init(i915, wal);
771
772 /* WaDisableGafsUnitClkGating:cfl */
773 wa_write_or(wal,
774 GEN7_UCGCTL4,
775 GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
776
777 /* WaInPlaceDecompressionHang:cfl */
778 wa_write_or(wal,
779 GEN9_GAMT_ECO_REG_RW_IA,
780 GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
781 }
782
783 static void
wa_init_mcr(struct drm_i915_private * i915,struct i915_wa_list * wal)784 wa_init_mcr(struct drm_i915_private *i915, struct i915_wa_list *wal)
785 {
786 const struct sseu_dev_info *sseu = &RUNTIME_INFO(i915)->sseu;
787 unsigned int slice, subslice;
788 u32 l3_en, mcr, mcr_mask;
789
790 GEM_BUG_ON(INTEL_GEN(i915) < 10);
791
792 /*
793 * WaProgramMgsrForL3BankSpecificMmioReads: cnl,icl
794 * L3Banks could be fused off in single slice scenario. If that is
795 * the case, we might need to program MCR select to a valid L3Bank
796 * by default, to make sure we correctly read certain registers
797 * later on (in the range 0xB100 - 0xB3FF).
798 *
799 * WaProgramMgsrForCorrectSliceSpecificMmioReads:cnl,icl
800 * Before any MMIO read into slice/subslice specific registers, MCR
801 * packet control register needs to be programmed to point to any
802 * enabled s/ss pair. Otherwise, incorrect values will be returned.
803 * This means each subsequent MMIO read will be forwarded to an
804 * specific s/ss combination, but this is OK since these registers
805 * are consistent across s/ss in almost all cases. In the rare
806 * occasions, such as INSTDONE, where this value is dependent
807 * on s/ss combo, the read should be done with read_subslice_reg.
808 *
809 * Since GEN8_MCR_SELECTOR contains dual-purpose bits which select both
810 * to which subslice, or to which L3 bank, the respective mmio reads
811 * will go, we have to find a common index which works for both
812 * accesses.
813 *
814 * Case where we cannot find a common index fortunately should not
815 * happen in production hardware, so we only emit a warning instead of
816 * implementing something more complex that requires checking the range
817 * of every MMIO read.
818 */
819
820 if (INTEL_GEN(i915) >= 10 && is_power_of_2(sseu->slice_mask)) {
821 u32 l3_fuse =
822 intel_uncore_read(&i915->uncore, GEN10_MIRROR_FUSE3) &
823 GEN10_L3BANK_MASK;
824
825 DRM_DEBUG_DRIVER("L3 fuse = %x\n", l3_fuse);
826 l3_en = ~(l3_fuse << GEN10_L3BANK_PAIR_COUNT | l3_fuse);
827 } else {
828 l3_en = ~0;
829 }
830
831 slice = fls(sseu->slice_mask) - 1;
832 subslice = fls(l3_en & intel_sseu_get_subslices(sseu, slice));
833 if (!subslice) {
834 DRM_WARN("No common index found between subslice mask %x and L3 bank mask %x!\n",
835 intel_sseu_get_subslices(sseu, slice), l3_en);
836 subslice = fls(l3_en);
837 WARN_ON(!subslice);
838 }
839 subslice--;
840
841 if (INTEL_GEN(i915) >= 11) {
842 mcr = GEN11_MCR_SLICE(slice) | GEN11_MCR_SUBSLICE(subslice);
843 mcr_mask = GEN11_MCR_SLICE_MASK | GEN11_MCR_SUBSLICE_MASK;
844 } else {
845 mcr = GEN8_MCR_SLICE(slice) | GEN8_MCR_SUBSLICE(subslice);
846 mcr_mask = GEN8_MCR_SLICE_MASK | GEN8_MCR_SUBSLICE_MASK;
847 }
848
849 DRM_DEBUG_DRIVER("MCR slice/subslice = %x\n", mcr);
850
851 wa_write_masked_or(wal, GEN8_MCR_SELECTOR, mcr_mask, mcr);
852 }
853
854 static void
cnl_gt_workarounds_init(struct drm_i915_private * i915,struct i915_wa_list * wal)855 cnl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
856 {
857 wa_init_mcr(i915, wal);
858
859 /* WaDisableI2mCycleOnWRPort:cnl (pre-prod) */
860 if (IS_CNL_REVID(i915, CNL_REVID_B0, CNL_REVID_B0))
861 wa_write_or(wal,
862 GAMT_CHKN_BIT_REG,
863 GAMT_CHKN_DISABLE_I2M_CYCLE_ON_WR_PORT);
864
865 /* WaInPlaceDecompressionHang:cnl */
866 wa_write_or(wal,
867 GEN9_GAMT_ECO_REG_RW_IA,
868 GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
869 }
870
871 static void
icl_gt_workarounds_init(struct drm_i915_private * i915,struct i915_wa_list * wal)872 icl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
873 {
874 wa_init_mcr(i915, wal);
875
876 /* WaInPlaceDecompressionHang:icl */
877 wa_write_or(wal,
878 GEN9_GAMT_ECO_REG_RW_IA,
879 GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
880
881 /* WaModifyGamTlbPartitioning:icl */
882 wa_write_masked_or(wal,
883 GEN11_GACB_PERF_CTRL,
884 GEN11_HASH_CTRL_MASK,
885 GEN11_HASH_CTRL_BIT0 | GEN11_HASH_CTRL_BIT4);
886
887 /* Wa_1405766107:icl
888 * Formerly known as WaCL2SFHalfMaxAlloc
889 */
890 wa_write_or(wal,
891 GEN11_LSN_UNSLCVC,
892 GEN11_LSN_UNSLCVC_GAFS_HALF_SF_MAXALLOC |
893 GEN11_LSN_UNSLCVC_GAFS_HALF_CL2_MAXALLOC);
894
895 /* Wa_220166154:icl
896 * Formerly known as WaDisCtxReload
897 */
898 wa_write_or(wal,
899 GEN8_GAMW_ECO_DEV_RW_IA,
900 GAMW_ECO_DEV_CTX_RELOAD_DISABLE);
901
902 /* Wa_1405779004:icl (pre-prod) */
903 if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_A0))
904 wa_write_or(wal,
905 SLICE_UNIT_LEVEL_CLKGATE,
906 MSCUNIT_CLKGATE_DIS);
907
908 /* Wa_1406680159:icl */
909 wa_write_or(wal,
910 SUBSLICE_UNIT_LEVEL_CLKGATE,
911 GWUNIT_CLKGATE_DIS);
912
913 /* Wa_1406838659:icl (pre-prod) */
914 if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_B0))
915 wa_write_or(wal,
916 INF_UNIT_LEVEL_CLKGATE,
917 CGPSF_CLKGATE_DIS);
918
919 /* Wa_1406463099:icl
920 * Formerly known as WaGamTlbPendError
921 */
922 wa_write_or(wal,
923 GAMT_CHKN_BIT_REG,
924 GAMT_CHKN_DISABLE_L3_COH_PIPE);
925
926 /* Wa_1607087056:icl */
927 wa_write_or(wal,
928 SLICE_UNIT_LEVEL_CLKGATE,
929 L3_CLKGATE_DIS | L3_CR2X_CLKGATE_DIS);
930 }
931
932 static void
tgl_gt_workarounds_init(struct drm_i915_private * i915,struct i915_wa_list * wal)933 tgl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
934 {
935 /* Wa_1409420604:tgl */
936 if (IS_TGL_REVID(i915, TGL_REVID_A0, TGL_REVID_A0))
937 wa_write_or(wal,
938 SUBSLICE_UNIT_LEVEL_CLKGATE2,
939 CPSSUNIT_CLKGATE_DIS);
940
941 /* Wa_1409180338:tgl */
942 if (IS_TGL_REVID(i915, TGL_REVID_A0, TGL_REVID_A0))
943 wa_write_or(wal,
944 SLICE_UNIT_LEVEL_CLKGATE,
945 L3_CLKGATE_DIS | L3_CR2X_CLKGATE_DIS);
946 }
947
948 static void
gt_init_workarounds(struct drm_i915_private * i915,struct i915_wa_list * wal)949 gt_init_workarounds(struct drm_i915_private *i915, struct i915_wa_list *wal)
950 {
951 if (IS_GEN(i915, 12))
952 tgl_gt_workarounds_init(i915, wal);
953 else if (IS_GEN(i915, 11))
954 icl_gt_workarounds_init(i915, wal);
955 else if (IS_CANNONLAKE(i915))
956 cnl_gt_workarounds_init(i915, wal);
957 else if (IS_COFFEELAKE(i915))
958 cfl_gt_workarounds_init(i915, wal);
959 else if (IS_GEMINILAKE(i915))
960 glk_gt_workarounds_init(i915, wal);
961 else if (IS_KABYLAKE(i915))
962 kbl_gt_workarounds_init(i915, wal);
963 else if (IS_BROXTON(i915))
964 bxt_gt_workarounds_init(i915, wal);
965 else if (IS_SKYLAKE(i915))
966 skl_gt_workarounds_init(i915, wal);
967 else if (INTEL_GEN(i915) <= 8)
968 return;
969 else
970 MISSING_CASE(INTEL_GEN(i915));
971 }
972
intel_gt_init_workarounds(struct drm_i915_private * i915)973 void intel_gt_init_workarounds(struct drm_i915_private *i915)
974 {
975 struct i915_wa_list *wal = &i915->gt_wa_list;
976
977 wa_init_start(wal, "GT", "global");
978 gt_init_workarounds(i915, wal);
979 wa_init_finish(wal);
980 }
981
982 static enum forcewake_domains
wal_get_fw_for_rmw(struct intel_uncore * uncore,const struct i915_wa_list * wal)983 wal_get_fw_for_rmw(struct intel_uncore *uncore, const struct i915_wa_list *wal)
984 {
985 enum forcewake_domains fw = 0;
986 struct i915_wa *wa;
987 unsigned int i;
988
989 for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
990 fw |= intel_uncore_forcewake_for_reg(uncore,
991 wa->reg,
992 FW_REG_READ |
993 FW_REG_WRITE);
994
995 return fw;
996 }
997
998 static bool
wa_verify(const struct i915_wa * wa,u32 cur,const char * name,const char * from)999 wa_verify(const struct i915_wa *wa, u32 cur, const char *name, const char *from)
1000 {
1001 if ((cur ^ wa->val) & wa->read) {
1002 DRM_ERROR("%s workaround lost on %s! (%x=%x/%x, expected %x, mask=%x)\n",
1003 name, from, i915_mmio_reg_offset(wa->reg),
1004 cur, cur & wa->read,
1005 wa->val, wa->mask);
1006
1007 return false;
1008 }
1009
1010 return true;
1011 }
1012
1013 static void
wa_list_apply(struct intel_uncore * uncore,const struct i915_wa_list * wal)1014 wa_list_apply(struct intel_uncore *uncore, const struct i915_wa_list *wal)
1015 {
1016 enum forcewake_domains fw;
1017 unsigned long flags;
1018 struct i915_wa *wa;
1019 unsigned int i;
1020
1021 if (!wal->count)
1022 return;
1023
1024 fw = wal_get_fw_for_rmw(uncore, wal);
1025
1026 spin_lock_irqsave(&uncore->lock, flags);
1027 intel_uncore_forcewake_get__locked(uncore, fw);
1028
1029 for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
1030 intel_uncore_rmw_fw(uncore, wa->reg, wa->mask, wa->val);
1031 if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
1032 wa_verify(wa,
1033 intel_uncore_read_fw(uncore, wa->reg),
1034 wal->name, "application");
1035 }
1036
1037 intel_uncore_forcewake_put__locked(uncore, fw);
1038 spin_unlock_irqrestore(&uncore->lock, flags);
1039 }
1040
intel_gt_apply_workarounds(struct intel_gt * gt)1041 void intel_gt_apply_workarounds(struct intel_gt *gt)
1042 {
1043 wa_list_apply(gt->uncore, >->i915->gt_wa_list);
1044 }
1045
wa_list_verify(struct intel_uncore * uncore,const struct i915_wa_list * wal,const char * from)1046 static bool wa_list_verify(struct intel_uncore *uncore,
1047 const struct i915_wa_list *wal,
1048 const char *from)
1049 {
1050 struct i915_wa *wa;
1051 unsigned int i;
1052 bool ok = true;
1053
1054 for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
1055 ok &= wa_verify(wa,
1056 intel_uncore_read(uncore, wa->reg),
1057 wal->name, from);
1058
1059 return ok;
1060 }
1061
intel_gt_verify_workarounds(struct intel_gt * gt,const char * from)1062 bool intel_gt_verify_workarounds(struct intel_gt *gt, const char *from)
1063 {
1064 return wa_list_verify(gt->uncore, >->i915->gt_wa_list, from);
1065 }
1066
is_nonpriv_flags_valid(u32 flags)1067 static inline bool is_nonpriv_flags_valid(u32 flags)
1068 {
1069 /* Check only valid flag bits are set */
1070 if (flags & ~RING_FORCE_TO_NONPRIV_MASK_VALID)
1071 return false;
1072
1073 /* NB: Only 3 out of 4 enum values are valid for access field */
1074 if ((flags & RING_FORCE_TO_NONPRIV_ACCESS_MASK) ==
1075 RING_FORCE_TO_NONPRIV_ACCESS_INVALID)
1076 return false;
1077
1078 return true;
1079 }
1080
1081 static void
whitelist_reg_ext(struct i915_wa_list * wal,i915_reg_t reg,u32 flags)1082 whitelist_reg_ext(struct i915_wa_list *wal, i915_reg_t reg, u32 flags)
1083 {
1084 struct i915_wa wa = {
1085 .reg = reg
1086 };
1087
1088 if (GEM_DEBUG_WARN_ON(wal->count >= RING_MAX_NONPRIV_SLOTS))
1089 return;
1090
1091 if (GEM_DEBUG_WARN_ON(!is_nonpriv_flags_valid(flags)))
1092 return;
1093
1094 wa.reg.reg |= flags;
1095 _wa_add(wal, &wa);
1096 }
1097
1098 static void
whitelist_reg(struct i915_wa_list * wal,i915_reg_t reg)1099 whitelist_reg(struct i915_wa_list *wal, i915_reg_t reg)
1100 {
1101 whitelist_reg_ext(wal, reg, RING_FORCE_TO_NONPRIV_ACCESS_RW);
1102 }
1103
gen9_whitelist_build(struct i915_wa_list * w)1104 static void gen9_whitelist_build(struct i915_wa_list *w)
1105 {
1106 /* WaVFEStateAfterPipeControlwithMediaStateClear:skl,bxt,glk,cfl */
1107 whitelist_reg(w, GEN9_CTX_PREEMPT_REG);
1108
1109 /* WaEnablePreemptionGranularityControlByUMD:skl,bxt,kbl,cfl,[cnl] */
1110 whitelist_reg(w, GEN8_CS_CHICKEN1);
1111
1112 /* WaAllowUMDToModifyHDCChicken1:skl,bxt,kbl,glk,cfl */
1113 whitelist_reg(w, GEN8_HDC_CHICKEN1);
1114
1115 /* WaSendPushConstantsFromMMIO:skl,bxt */
1116 whitelist_reg(w, COMMON_SLICE_CHICKEN2);
1117 }
1118
skl_whitelist_build(struct intel_engine_cs * engine)1119 static void skl_whitelist_build(struct intel_engine_cs *engine)
1120 {
1121 struct i915_wa_list *w = &engine->whitelist;
1122
1123 if (engine->class != RENDER_CLASS)
1124 return;
1125
1126 gen9_whitelist_build(w);
1127
1128 /* WaDisableLSQCROPERFforOCL:skl */
1129 whitelist_reg(w, GEN8_L3SQCREG4);
1130 }
1131
bxt_whitelist_build(struct intel_engine_cs * engine)1132 static void bxt_whitelist_build(struct intel_engine_cs *engine)
1133 {
1134 if (engine->class != RENDER_CLASS)
1135 return;
1136
1137 gen9_whitelist_build(&engine->whitelist);
1138 }
1139
kbl_whitelist_build(struct intel_engine_cs * engine)1140 static void kbl_whitelist_build(struct intel_engine_cs *engine)
1141 {
1142 struct i915_wa_list *w = &engine->whitelist;
1143
1144 if (engine->class != RENDER_CLASS)
1145 return;
1146
1147 gen9_whitelist_build(w);
1148
1149 /* WaDisableLSQCROPERFforOCL:kbl */
1150 whitelist_reg(w, GEN8_L3SQCREG4);
1151 }
1152
glk_whitelist_build(struct intel_engine_cs * engine)1153 static void glk_whitelist_build(struct intel_engine_cs *engine)
1154 {
1155 struct i915_wa_list *w = &engine->whitelist;
1156
1157 if (engine->class != RENDER_CLASS)
1158 return;
1159
1160 gen9_whitelist_build(w);
1161
1162 /* WA #0862: Userspace has to set "Barrier Mode" to avoid hangs. */
1163 whitelist_reg(w, GEN9_SLICE_COMMON_ECO_CHICKEN1);
1164 }
1165
cfl_whitelist_build(struct intel_engine_cs * engine)1166 static void cfl_whitelist_build(struct intel_engine_cs *engine)
1167 {
1168 struct i915_wa_list *w = &engine->whitelist;
1169
1170 if (engine->class != RENDER_CLASS)
1171 return;
1172
1173 gen9_whitelist_build(w);
1174
1175 /*
1176 * WaAllowPMDepthAndInvocationCountAccessFromUMD:cfl,whl,cml,aml
1177 *
1178 * This covers 4 register which are next to one another :
1179 * - PS_INVOCATION_COUNT
1180 * - PS_INVOCATION_COUNT_UDW
1181 * - PS_DEPTH_COUNT
1182 * - PS_DEPTH_COUNT_UDW
1183 */
1184 whitelist_reg_ext(w, PS_INVOCATION_COUNT,
1185 RING_FORCE_TO_NONPRIV_ACCESS_RD |
1186 RING_FORCE_TO_NONPRIV_RANGE_4);
1187 }
1188
cnl_whitelist_build(struct intel_engine_cs * engine)1189 static void cnl_whitelist_build(struct intel_engine_cs *engine)
1190 {
1191 struct i915_wa_list *w = &engine->whitelist;
1192
1193 if (engine->class != RENDER_CLASS)
1194 return;
1195
1196 /* WaEnablePreemptionGranularityControlByUMD:cnl */
1197 whitelist_reg(w, GEN8_CS_CHICKEN1);
1198 }
1199
icl_whitelist_build(struct intel_engine_cs * engine)1200 static void icl_whitelist_build(struct intel_engine_cs *engine)
1201 {
1202 struct i915_wa_list *w = &engine->whitelist;
1203
1204 switch (engine->class) {
1205 case RENDER_CLASS:
1206 /* WaAllowUMDToModifyHalfSliceChicken7:icl */
1207 whitelist_reg(w, GEN9_HALF_SLICE_CHICKEN7);
1208
1209 /* WaAllowUMDToModifySamplerMode:icl */
1210 whitelist_reg(w, GEN10_SAMPLER_MODE);
1211
1212 /* WaEnableStateCacheRedirectToCS:icl */
1213 whitelist_reg(w, GEN9_SLICE_COMMON_ECO_CHICKEN1);
1214
1215 /*
1216 * WaAllowPMDepthAndInvocationCountAccessFromUMD:icl
1217 *
1218 * This covers 4 register which are next to one another :
1219 * - PS_INVOCATION_COUNT
1220 * - PS_INVOCATION_COUNT_UDW
1221 * - PS_DEPTH_COUNT
1222 * - PS_DEPTH_COUNT_UDW
1223 */
1224 whitelist_reg_ext(w, PS_INVOCATION_COUNT,
1225 RING_FORCE_TO_NONPRIV_ACCESS_RD |
1226 RING_FORCE_TO_NONPRIV_RANGE_4);
1227 break;
1228
1229 case VIDEO_DECODE_CLASS:
1230 /* hucStatusRegOffset */
1231 whitelist_reg_ext(w, _MMIO(0x2000 + engine->mmio_base),
1232 RING_FORCE_TO_NONPRIV_ACCESS_RD);
1233 /* hucUKernelHdrInfoRegOffset */
1234 whitelist_reg_ext(w, _MMIO(0x2014 + engine->mmio_base),
1235 RING_FORCE_TO_NONPRIV_ACCESS_RD);
1236 /* hucStatus2RegOffset */
1237 whitelist_reg_ext(w, _MMIO(0x23B0 + engine->mmio_base),
1238 RING_FORCE_TO_NONPRIV_ACCESS_RD);
1239 break;
1240
1241 default:
1242 break;
1243 }
1244 }
1245
tgl_whitelist_build(struct intel_engine_cs * engine)1246 static void tgl_whitelist_build(struct intel_engine_cs *engine)
1247 {
1248 struct i915_wa_list *w = &engine->whitelist;
1249
1250 switch (engine->class) {
1251 case RENDER_CLASS:
1252 /*
1253 * WaAllowPMDepthAndInvocationCountAccessFromUMD:tgl
1254 *
1255 * This covers 4 registers which are next to one another :
1256 * - PS_INVOCATION_COUNT
1257 * - PS_INVOCATION_COUNT_UDW
1258 * - PS_DEPTH_COUNT
1259 * - PS_DEPTH_COUNT_UDW
1260 */
1261 whitelist_reg_ext(w, PS_INVOCATION_COUNT,
1262 RING_FORCE_TO_NONPRIV_ACCESS_RD |
1263 RING_FORCE_TO_NONPRIV_RANGE_4);
1264 break;
1265 default:
1266 break;
1267 }
1268 }
1269
intel_engine_init_whitelist(struct intel_engine_cs * engine)1270 void intel_engine_init_whitelist(struct intel_engine_cs *engine)
1271 {
1272 struct drm_i915_private *i915 = engine->i915;
1273 struct i915_wa_list *w = &engine->whitelist;
1274
1275 wa_init_start(w, "whitelist", engine->name);
1276
1277 if (IS_GEN(i915, 12))
1278 tgl_whitelist_build(engine);
1279 else if (IS_GEN(i915, 11))
1280 icl_whitelist_build(engine);
1281 else if (IS_CANNONLAKE(i915))
1282 cnl_whitelist_build(engine);
1283 else if (IS_COFFEELAKE(i915))
1284 cfl_whitelist_build(engine);
1285 else if (IS_GEMINILAKE(i915))
1286 glk_whitelist_build(engine);
1287 else if (IS_KABYLAKE(i915))
1288 kbl_whitelist_build(engine);
1289 else if (IS_BROXTON(i915))
1290 bxt_whitelist_build(engine);
1291 else if (IS_SKYLAKE(i915))
1292 skl_whitelist_build(engine);
1293 else if (INTEL_GEN(i915) <= 8)
1294 return;
1295 else
1296 MISSING_CASE(INTEL_GEN(i915));
1297
1298 wa_init_finish(w);
1299 }
1300
intel_engine_apply_whitelist(struct intel_engine_cs * engine)1301 void intel_engine_apply_whitelist(struct intel_engine_cs *engine)
1302 {
1303 const struct i915_wa_list *wal = &engine->whitelist;
1304 struct intel_uncore *uncore = engine->uncore;
1305 const u32 base = engine->mmio_base;
1306 struct i915_wa *wa;
1307 unsigned int i;
1308
1309 if (!wal->count)
1310 return;
1311
1312 for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
1313 intel_uncore_write(uncore,
1314 RING_FORCE_TO_NONPRIV(base, i),
1315 i915_mmio_reg_offset(wa->reg));
1316
1317 /* And clear the rest just in case of garbage */
1318 for (; i < RING_MAX_NONPRIV_SLOTS; i++)
1319 intel_uncore_write(uncore,
1320 RING_FORCE_TO_NONPRIV(base, i),
1321 i915_mmio_reg_offset(RING_NOPID(base)));
1322 }
1323
1324 static void
rcs_engine_wa_init(struct intel_engine_cs * engine,struct i915_wa_list * wal)1325 rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
1326 {
1327 struct drm_i915_private *i915 = engine->i915;
1328
1329 if (IS_TGL_REVID(i915, TGL_REVID_A0, TGL_REVID_A0)) {
1330 /* Wa_1606700617:tgl */
1331 wa_masked_en(wal,
1332 GEN9_CS_DEBUG_MODE1,
1333 FF_DOP_CLOCK_GATE_DISABLE);
1334
1335 /* Wa_1607138336:tgl */
1336 wa_write_or(wal,
1337 GEN9_CTX_PREEMPT_REG,
1338 GEN12_DISABLE_POSH_BUSY_FF_DOP_CG);
1339
1340 /* Wa_1607030317:tgl */
1341 /* Wa_1607186500:tgl */
1342 /* Wa_1607297627:tgl */
1343 wa_masked_en(wal,
1344 GEN6_RC_SLEEP_PSMI_CONTROL,
1345 GEN12_WAIT_FOR_EVENT_POWER_DOWN_DISABLE |
1346 GEN8_RC_SEMA_IDLE_MSG_DISABLE);
1347
1348 /*
1349 * Wa_1606679103:tgl
1350 * (see also Wa_1606682166:icl)
1351 */
1352 wa_write_or(wal,
1353 GEN7_SARCHKMD,
1354 GEN7_DISABLE_SAMPLER_PREFETCH);
1355 }
1356
1357 if (IS_GEN(i915, 11)) {
1358 /* This is not an Wa. Enable for better image quality */
1359 wa_masked_en(wal,
1360 _3D_CHICKEN3,
1361 _3D_CHICKEN3_AA_LINE_QUALITY_FIX_ENABLE);
1362
1363 /* WaPipelineFlushCoherentLines:icl */
1364 wa_write_or(wal,
1365 GEN8_L3SQCREG4,
1366 GEN8_LQSC_FLUSH_COHERENT_LINES);
1367
1368 /*
1369 * Wa_1405543622:icl
1370 * Formerly known as WaGAPZPriorityScheme
1371 */
1372 wa_write_or(wal,
1373 GEN8_GARBCNTL,
1374 GEN11_ARBITRATION_PRIO_ORDER_MASK);
1375
1376 /*
1377 * Wa_1604223664:icl
1378 * Formerly known as WaL3BankAddressHashing
1379 */
1380 wa_write_masked_or(wal,
1381 GEN8_GARBCNTL,
1382 GEN11_HASH_CTRL_EXCL_MASK,
1383 GEN11_HASH_CTRL_EXCL_BIT0);
1384 wa_write_masked_or(wal,
1385 GEN11_GLBLINVL,
1386 GEN11_BANK_HASH_ADDR_EXCL_MASK,
1387 GEN11_BANK_HASH_ADDR_EXCL_BIT0);
1388
1389 /*
1390 * Wa_1405733216:icl
1391 * Formerly known as WaDisableCleanEvicts
1392 */
1393 wa_write_or(wal,
1394 GEN8_L3SQCREG4,
1395 GEN11_LQSC_CLEAN_EVICT_DISABLE);
1396
1397 /* WaForwardProgressSoftReset:icl */
1398 wa_write_or(wal,
1399 GEN10_SCRATCH_LNCF2,
1400 PMFLUSHDONE_LNICRSDROP |
1401 PMFLUSH_GAPL3UNBLOCK |
1402 PMFLUSHDONE_LNEBLK);
1403
1404 /* Wa_1406609255:icl (pre-prod) */
1405 if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_B0))
1406 wa_write_or(wal,
1407 GEN7_SARCHKMD,
1408 GEN7_DISABLE_DEMAND_PREFETCH);
1409
1410 /* Wa_1606682166:icl */
1411 wa_write_or(wal,
1412 GEN7_SARCHKMD,
1413 GEN7_DISABLE_SAMPLER_PREFETCH);
1414
1415 /* Wa_1409178092:icl */
1416 wa_write_masked_or(wal,
1417 GEN11_SCRATCH2,
1418 GEN11_COHERENT_PARTIAL_WRITE_MERGE_ENABLE,
1419 0);
1420 }
1421
1422 if (IS_GEN_RANGE(i915, 9, 11)) {
1423 /* FtrPerCtxtPreemptionGranularityControl:skl,bxt,kbl,cfl,cnl,icl */
1424 wa_masked_en(wal,
1425 GEN7_FF_SLICE_CS_CHICKEN1,
1426 GEN9_FFSC_PERCTX_PREEMPT_CTRL);
1427 }
1428
1429 if (IS_SKYLAKE(i915) || IS_KABYLAKE(i915) || IS_COFFEELAKE(i915)) {
1430 /* WaEnableGapsTsvCreditFix:skl,kbl,cfl */
1431 wa_write_or(wal,
1432 GEN8_GARBCNTL,
1433 GEN9_GAPS_TSV_CREDIT_DISABLE);
1434 }
1435
1436 if (IS_BROXTON(i915)) {
1437 /* WaDisablePooledEuLoadBalancingFix:bxt */
1438 wa_masked_en(wal,
1439 FF_SLICE_CS_CHICKEN2,
1440 GEN9_POOLED_EU_LOAD_BALANCING_FIX_DISABLE);
1441 }
1442
1443 if (IS_GEN(i915, 9)) {
1444 /* WaContextSwitchWithConcurrentTLBInvalidate:skl,bxt,kbl,glk,cfl */
1445 wa_masked_en(wal,
1446 GEN9_CSFE_CHICKEN1_RCS,
1447 GEN9_PREEMPT_GPGPU_SYNC_SWITCH_DISABLE);
1448
1449 /* WaEnableLbsSlaRetryTimerDecrement:skl,bxt,kbl,glk,cfl */
1450 wa_write_or(wal,
1451 BDW_SCRATCH1,
1452 GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE);
1453
1454 /* WaProgramL3SqcReg1DefaultForPerf:bxt,glk */
1455 if (IS_GEN9_LP(i915))
1456 wa_write_masked_or(wal,
1457 GEN8_L3SQCREG1,
1458 L3_PRIO_CREDITS_MASK,
1459 L3_GENERAL_PRIO_CREDITS(62) |
1460 L3_HIGH_PRIO_CREDITS(2));
1461
1462 /* WaOCLCoherentLineFlush:skl,bxt,kbl,cfl */
1463 wa_write_or(wal,
1464 GEN8_L3SQCREG4,
1465 GEN8_LQSC_FLUSH_COHERENT_LINES);
1466 }
1467 }
1468
1469 static void
xcs_engine_wa_init(struct intel_engine_cs * engine,struct i915_wa_list * wal)1470 xcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
1471 {
1472 struct drm_i915_private *i915 = engine->i915;
1473
1474 /* WaKBLVECSSemaphoreWaitPoll:kbl */
1475 if (IS_KBL_REVID(i915, KBL_REVID_A0, KBL_REVID_E0)) {
1476 wa_write(wal,
1477 RING_SEMA_WAIT_POLL(engine->mmio_base),
1478 1);
1479 }
1480 }
1481
1482 static void
engine_init_workarounds(struct intel_engine_cs * engine,struct i915_wa_list * wal)1483 engine_init_workarounds(struct intel_engine_cs *engine, struct i915_wa_list *wal)
1484 {
1485 if (I915_SELFTEST_ONLY(INTEL_GEN(engine->i915) < 8))
1486 return;
1487
1488 if (engine->class == RENDER_CLASS)
1489 rcs_engine_wa_init(engine, wal);
1490 else
1491 xcs_engine_wa_init(engine, wal);
1492 }
1493
intel_engine_init_workarounds(struct intel_engine_cs * engine)1494 void intel_engine_init_workarounds(struct intel_engine_cs *engine)
1495 {
1496 struct i915_wa_list *wal = &engine->wa_list;
1497
1498 if (INTEL_GEN(engine->i915) < 8)
1499 return;
1500
1501 wa_init_start(wal, "engine", engine->name);
1502 engine_init_workarounds(engine, wal);
1503 wa_init_finish(wal);
1504 }
1505
intel_engine_apply_workarounds(struct intel_engine_cs * engine)1506 void intel_engine_apply_workarounds(struct intel_engine_cs *engine)
1507 {
1508 wa_list_apply(engine->uncore, &engine->wa_list);
1509 }
1510
1511 static struct i915_vma *
create_scratch(struct i915_address_space * vm,int count)1512 create_scratch(struct i915_address_space *vm, int count)
1513 {
1514 struct drm_i915_gem_object *obj;
1515 struct i915_vma *vma;
1516 unsigned int size;
1517 int err;
1518
1519 size = round_up(count * sizeof(u32), PAGE_SIZE);
1520 obj = i915_gem_object_create_internal(vm->i915, size);
1521 if (IS_ERR(obj))
1522 return ERR_CAST(obj);
1523
1524 i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC);
1525
1526 vma = i915_vma_instance(obj, vm, NULL);
1527 if (IS_ERR(vma)) {
1528 err = PTR_ERR(vma);
1529 goto err_obj;
1530 }
1531
1532 err = i915_vma_pin(vma, 0, 0,
1533 i915_vma_is_ggtt(vma) ? PIN_GLOBAL : PIN_USER);
1534 if (err)
1535 goto err_obj;
1536
1537 return vma;
1538
1539 err_obj:
1540 i915_gem_object_put(obj);
1541 return ERR_PTR(err);
1542 }
1543
mcr_range(struct drm_i915_private * i915,u32 offset)1544 static bool mcr_range(struct drm_i915_private *i915, u32 offset)
1545 {
1546 /*
1547 * Registers in this range are affected by the MCR selector
1548 * which only controls CPU initiated MMIO. Routing does not
1549 * work for CS access so we cannot verify them on this path.
1550 */
1551 if (INTEL_GEN(i915) >= 8 && (offset >= 0xb000 && offset <= 0xb4ff))
1552 return true;
1553
1554 return false;
1555 }
1556
1557 static int
wa_list_srm(struct i915_request * rq,const struct i915_wa_list * wal,struct i915_vma * vma)1558 wa_list_srm(struct i915_request *rq,
1559 const struct i915_wa_list *wal,
1560 struct i915_vma *vma)
1561 {
1562 struct drm_i915_private *i915 = rq->i915;
1563 unsigned int i, count = 0;
1564 const struct i915_wa *wa;
1565 u32 srm, *cs;
1566
1567 srm = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
1568 if (INTEL_GEN(i915) >= 8)
1569 srm++;
1570
1571 for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
1572 if (!mcr_range(i915, i915_mmio_reg_offset(wa->reg)))
1573 count++;
1574 }
1575
1576 cs = intel_ring_begin(rq, 4 * count);
1577 if (IS_ERR(cs))
1578 return PTR_ERR(cs);
1579
1580 for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
1581 u32 offset = i915_mmio_reg_offset(wa->reg);
1582
1583 if (mcr_range(i915, offset))
1584 continue;
1585
1586 *cs++ = srm;
1587 *cs++ = offset;
1588 *cs++ = i915_ggtt_offset(vma) + sizeof(u32) * i;
1589 *cs++ = 0;
1590 }
1591 intel_ring_advance(rq, cs);
1592
1593 return 0;
1594 }
1595
engine_wa_list_verify(struct intel_context * ce,const struct i915_wa_list * const wal,const char * from)1596 static int engine_wa_list_verify(struct intel_context *ce,
1597 const struct i915_wa_list * const wal,
1598 const char *from)
1599 {
1600 const struct i915_wa *wa;
1601 struct i915_request *rq;
1602 struct i915_vma *vma;
1603 unsigned int i;
1604 u32 *results;
1605 int err;
1606
1607 if (!wal->count)
1608 return 0;
1609
1610 vma = create_scratch(&ce->engine->gt->ggtt->vm, wal->count);
1611 if (IS_ERR(vma))
1612 return PTR_ERR(vma);
1613
1614 intel_engine_pm_get(ce->engine);
1615 rq = intel_context_create_request(ce);
1616 intel_engine_pm_put(ce->engine);
1617 if (IS_ERR(rq)) {
1618 err = PTR_ERR(rq);
1619 goto err_vma;
1620 }
1621
1622 err = wa_list_srm(rq, wal, vma);
1623 if (err)
1624 goto err_vma;
1625
1626 i915_request_get(rq);
1627 i915_request_add(rq);
1628 if (i915_request_wait(rq, 0, HZ / 5) < 0) {
1629 err = -ETIME;
1630 goto err_rq;
1631 }
1632
1633 results = i915_gem_object_pin_map(vma->obj, I915_MAP_WB);
1634 if (IS_ERR(results)) {
1635 err = PTR_ERR(results);
1636 goto err_rq;
1637 }
1638
1639 err = 0;
1640 for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
1641 if (mcr_range(rq->i915, i915_mmio_reg_offset(wa->reg)))
1642 continue;
1643
1644 if (!wa_verify(wa, results[i], wal->name, from))
1645 err = -ENXIO;
1646 }
1647
1648 i915_gem_object_unpin_map(vma->obj);
1649
1650 err_rq:
1651 i915_request_put(rq);
1652 err_vma:
1653 i915_vma_unpin(vma);
1654 i915_vma_put(vma);
1655 return err;
1656 }
1657
intel_engine_verify_workarounds(struct intel_engine_cs * engine,const char * from)1658 int intel_engine_verify_workarounds(struct intel_engine_cs *engine,
1659 const char *from)
1660 {
1661 return engine_wa_list_verify(engine->kernel_context,
1662 &engine->wa_list,
1663 from);
1664 }
1665
1666 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
1667 #include "selftest_workarounds.c"
1668 #endif
1669