xref: /openbsd-src/sys/dev/pci/drm/i915/gt/intel_tlb.c (revision f005ef32267c16bdb134f0e9fa4477dbe07c263a)
1*f005ef32Sjsg // SPDX-License-Identifier: MIT
2*f005ef32Sjsg /*
3*f005ef32Sjsg  * Copyright © 2023 Intel Corporation
4*f005ef32Sjsg  */
5*f005ef32Sjsg 
6*f005ef32Sjsg #include "i915_drv.h"
7*f005ef32Sjsg #include "i915_perf_oa_regs.h"
8*f005ef32Sjsg #include "intel_engine_pm.h"
9*f005ef32Sjsg #include "intel_gt.h"
10*f005ef32Sjsg #include "intel_gt_mcr.h"
11*f005ef32Sjsg #include "intel_gt_pm.h"
12*f005ef32Sjsg #include "intel_gt_print.h"
13*f005ef32Sjsg #include "intel_gt_regs.h"
14*f005ef32Sjsg #include "intel_tlb.h"
15*f005ef32Sjsg 
16*f005ef32Sjsg /*
17*f005ef32Sjsg  * HW architecture suggest typical invalidation time at 40us,
18*f005ef32Sjsg  * with pessimistic cases up to 100us and a recommendation to
19*f005ef32Sjsg  * cap at 1ms. We go a bit higher just in case.
20*f005ef32Sjsg  */
21*f005ef32Sjsg #define TLB_INVAL_TIMEOUT_US 100
22*f005ef32Sjsg #define TLB_INVAL_TIMEOUT_MS 4
23*f005ef32Sjsg 
24*f005ef32Sjsg /*
25*f005ef32Sjsg  * On Xe_HP the TLB invalidation registers are located at the same MMIO offsets
26*f005ef32Sjsg  * but are now considered MCR registers.  Since they exist within a GAM range,
27*f005ef32Sjsg  * the primary instance of the register rolls up the status from each unit.
28*f005ef32Sjsg  */
wait_for_invalidate(struct intel_engine_cs * engine)29*f005ef32Sjsg static int wait_for_invalidate(struct intel_engine_cs *engine)
30*f005ef32Sjsg {
31*f005ef32Sjsg 	if (engine->tlb_inv.mcr)
32*f005ef32Sjsg 		return intel_gt_mcr_wait_for_reg(engine->gt,
33*f005ef32Sjsg 						 engine->tlb_inv.reg.mcr_reg,
34*f005ef32Sjsg 						 engine->tlb_inv.done,
35*f005ef32Sjsg 						 0,
36*f005ef32Sjsg 						 TLB_INVAL_TIMEOUT_US,
37*f005ef32Sjsg 						 TLB_INVAL_TIMEOUT_MS);
38*f005ef32Sjsg 	else
39*f005ef32Sjsg 		return __intel_wait_for_register_fw(engine->gt->uncore,
40*f005ef32Sjsg 						    engine->tlb_inv.reg.reg,
41*f005ef32Sjsg 						    engine->tlb_inv.done,
42*f005ef32Sjsg 						    0,
43*f005ef32Sjsg 						    TLB_INVAL_TIMEOUT_US,
44*f005ef32Sjsg 						    TLB_INVAL_TIMEOUT_MS,
45*f005ef32Sjsg 						    NULL);
46*f005ef32Sjsg }
47*f005ef32Sjsg 
mmio_invalidate_full(struct intel_gt * gt)48*f005ef32Sjsg static void mmio_invalidate_full(struct intel_gt *gt)
49*f005ef32Sjsg {
50*f005ef32Sjsg 	struct drm_i915_private *i915 = gt->i915;
51*f005ef32Sjsg 	struct intel_uncore *uncore = gt->uncore;
52*f005ef32Sjsg 	struct intel_engine_cs *engine;
53*f005ef32Sjsg 	intel_engine_mask_t awake, tmp;
54*f005ef32Sjsg 	enum intel_engine_id id;
55*f005ef32Sjsg 	unsigned long flags;
56*f005ef32Sjsg 
57*f005ef32Sjsg 	if (GRAPHICS_VER(i915) < 8)
58*f005ef32Sjsg 		return;
59*f005ef32Sjsg 
60*f005ef32Sjsg 	intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
61*f005ef32Sjsg 
62*f005ef32Sjsg 	intel_gt_mcr_lock(gt, &flags);
63*f005ef32Sjsg 	spin_lock(&uncore->lock); /* serialise invalidate with GT reset */
64*f005ef32Sjsg 
65*f005ef32Sjsg 	awake = 0;
66*f005ef32Sjsg 	for_each_engine(engine, gt, id) {
67*f005ef32Sjsg 		if (!intel_engine_pm_is_awake(engine))
68*f005ef32Sjsg 			continue;
69*f005ef32Sjsg 
70*f005ef32Sjsg 		if (engine->tlb_inv.mcr)
71*f005ef32Sjsg 			intel_gt_mcr_multicast_write_fw(gt,
72*f005ef32Sjsg 							engine->tlb_inv.reg.mcr_reg,
73*f005ef32Sjsg 							engine->tlb_inv.request);
74*f005ef32Sjsg 		else
75*f005ef32Sjsg 			intel_uncore_write_fw(uncore,
76*f005ef32Sjsg 					      engine->tlb_inv.reg.reg,
77*f005ef32Sjsg 					      engine->tlb_inv.request);
78*f005ef32Sjsg 
79*f005ef32Sjsg 		awake |= engine->mask;
80*f005ef32Sjsg 	}
81*f005ef32Sjsg 
82*f005ef32Sjsg 	GT_TRACE(gt, "invalidated engines %08x\n", awake);
83*f005ef32Sjsg 
84*f005ef32Sjsg 	/* Wa_2207587034:tgl,dg1,rkl,adl-s,adl-p */
85*f005ef32Sjsg 	if (awake &&
86*f005ef32Sjsg 	    (IS_TIGERLAKE(i915) ||
87*f005ef32Sjsg 	     IS_DG1(i915) ||
88*f005ef32Sjsg 	     IS_ROCKETLAKE(i915) ||
89*f005ef32Sjsg 	     IS_ALDERLAKE_S(i915) ||
90*f005ef32Sjsg 	     IS_ALDERLAKE_P(i915)))
91*f005ef32Sjsg 		intel_uncore_write_fw(uncore, GEN12_OA_TLB_INV_CR, 1);
92*f005ef32Sjsg 
93*f005ef32Sjsg 	spin_unlock(&uncore->lock);
94*f005ef32Sjsg 	intel_gt_mcr_unlock(gt, flags);
95*f005ef32Sjsg 
96*f005ef32Sjsg 	for_each_engine_masked(engine, gt, awake, tmp) {
97*f005ef32Sjsg 		if (wait_for_invalidate(engine))
98*f005ef32Sjsg 			gt_err_ratelimited(gt,
99*f005ef32Sjsg 					   "%s TLB invalidation did not complete in %ums!\n",
100*f005ef32Sjsg 					   engine->name, TLB_INVAL_TIMEOUT_MS);
101*f005ef32Sjsg 	}
102*f005ef32Sjsg 
103*f005ef32Sjsg 	/*
104*f005ef32Sjsg 	 * Use delayed put since a) we mostly expect a flurry of TLB
105*f005ef32Sjsg 	 * invalidations so it is good to avoid paying the forcewake cost and
106*f005ef32Sjsg 	 * b) it works around a bug in Icelake which cannot cope with too rapid
107*f005ef32Sjsg 	 * transitions.
108*f005ef32Sjsg 	 */
109*f005ef32Sjsg 	intel_uncore_forcewake_put_delayed(uncore, FORCEWAKE_ALL);
110*f005ef32Sjsg }
111*f005ef32Sjsg 
tlb_seqno_passed(const struct intel_gt * gt,u32 seqno)112*f005ef32Sjsg static bool tlb_seqno_passed(const struct intel_gt *gt, u32 seqno)
113*f005ef32Sjsg {
114*f005ef32Sjsg 	u32 cur = intel_gt_tlb_seqno(gt);
115*f005ef32Sjsg 
116*f005ef32Sjsg 	/* Only skip if a *full* TLB invalidate barrier has passed */
117*f005ef32Sjsg 	return (s32)(cur - ALIGN(seqno, 2)) > 0;
118*f005ef32Sjsg }
119*f005ef32Sjsg 
intel_gt_invalidate_tlb_full(struct intel_gt * gt,u32 seqno)120*f005ef32Sjsg void intel_gt_invalidate_tlb_full(struct intel_gt *gt, u32 seqno)
121*f005ef32Sjsg {
122*f005ef32Sjsg 	intel_wakeref_t wakeref;
123*f005ef32Sjsg 
124*f005ef32Sjsg 	if (I915_SELFTEST_ONLY(gt->awake == -ENODEV))
125*f005ef32Sjsg 		return;
126*f005ef32Sjsg 
127*f005ef32Sjsg 	if (intel_gt_is_wedged(gt))
128*f005ef32Sjsg 		return;
129*f005ef32Sjsg 
130*f005ef32Sjsg 	if (tlb_seqno_passed(gt, seqno))
131*f005ef32Sjsg 		return;
132*f005ef32Sjsg 
133*f005ef32Sjsg 	with_intel_gt_pm_if_awake(gt, wakeref) {
134*f005ef32Sjsg 		mutex_lock(&gt->tlb.invalidate_lock);
135*f005ef32Sjsg 		if (tlb_seqno_passed(gt, seqno))
136*f005ef32Sjsg 			goto unlock;
137*f005ef32Sjsg 
138*f005ef32Sjsg 		mmio_invalidate_full(gt);
139*f005ef32Sjsg 
140*f005ef32Sjsg #ifdef notyet
141*f005ef32Sjsg 		write_seqcount_invalidate(&gt->tlb.seqno);
142*f005ef32Sjsg #else
143*f005ef32Sjsg 		barrier();
144*f005ef32Sjsg 		gt->tlb.seqno.seq.sequence += 2;
145*f005ef32Sjsg #endif
146*f005ef32Sjsg unlock:
147*f005ef32Sjsg 		mutex_unlock(&gt->tlb.invalidate_lock);
148*f005ef32Sjsg 	}
149*f005ef32Sjsg }
150*f005ef32Sjsg 
intel_gt_init_tlb(struct intel_gt * gt)151*f005ef32Sjsg void intel_gt_init_tlb(struct intel_gt *gt)
152*f005ef32Sjsg {
153*f005ef32Sjsg 	rw_init(&gt->tlb.invalidate_lock, "gttlb");
154*f005ef32Sjsg 	seqcount_mutex_init(&gt->tlb.seqno, &gt->tlb.invalidate_lock);
155*f005ef32Sjsg }
156*f005ef32Sjsg 
intel_gt_fini_tlb(struct intel_gt * gt)157*f005ef32Sjsg void intel_gt_fini_tlb(struct intel_gt *gt)
158*f005ef32Sjsg {
159*f005ef32Sjsg 	mutex_destroy(&gt->tlb.invalidate_lock);
160*f005ef32Sjsg }
161*f005ef32Sjsg 
162*f005ef32Sjsg #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
163*f005ef32Sjsg #include "selftest_tlb.c"
164*f005ef32Sjsg #endif
165