xref: /openbsd-src/sys/dev/pci/drm/i915/gt/selftest_rps.c (revision f005ef32267c16bdb134f0e9fa4477dbe07c263a)
1ad8b1aafSjsg // SPDX-License-Identifier: MIT
2ad8b1aafSjsg /*
3ad8b1aafSjsg  * Copyright © 2020 Intel Corporation
4ad8b1aafSjsg  */
5ad8b1aafSjsg 
6ad8b1aafSjsg #include <linux/pm_qos.h>
7ad8b1aafSjsg #include <linux/sort.h>
8ad8b1aafSjsg 
91bb76ff1Sjsg #include "gem/i915_gem_internal.h"
101bb76ff1Sjsg 
11*f005ef32Sjsg #include "i915_reg.h"
12ad8b1aafSjsg #include "intel_engine_heartbeat.h"
13ad8b1aafSjsg #include "intel_engine_pm.h"
141bb76ff1Sjsg #include "intel_engine_regs.h"
15ad8b1aafSjsg #include "intel_gpu_commands.h"
16ad8b1aafSjsg #include "intel_gt_clock_utils.h"
17ad8b1aafSjsg #include "intel_gt_pm.h"
18ad8b1aafSjsg #include "intel_rc6.h"
19ad8b1aafSjsg #include "selftest_engine_heartbeat.h"
20ad8b1aafSjsg #include "selftest_rps.h"
21ad8b1aafSjsg #include "selftests/igt_flush_test.h"
22ad8b1aafSjsg #include "selftests/igt_spinner.h"
23ad8b1aafSjsg #include "selftests/librapl.h"
24ad8b1aafSjsg 
25ad8b1aafSjsg /* Try to isolate the impact of cstates from determing frequency response */
26ad8b1aafSjsg #define CPU_LATENCY 0 /* -1 to disable pm_qos, 0 to disable cstates */
27ad8b1aafSjsg 
dummy_rps_work(struct work_struct * wrk)28ad8b1aafSjsg static void dummy_rps_work(struct work_struct *wrk)
29ad8b1aafSjsg {
30ad8b1aafSjsg }
31ad8b1aafSjsg 
cmp_u64(const void * A,const void * B)32ad8b1aafSjsg static int cmp_u64(const void *A, const void *B)
33ad8b1aafSjsg {
34ad8b1aafSjsg 	const u64 *a = A, *b = B;
35ad8b1aafSjsg 
36ad8b1aafSjsg 	if (*a < *b)
37ad8b1aafSjsg 		return -1;
38ad8b1aafSjsg 	else if (*a > *b)
39ad8b1aafSjsg 		return 1;
40ad8b1aafSjsg 	else
41ad8b1aafSjsg 		return 0;
42ad8b1aafSjsg }
43ad8b1aafSjsg 
cmp_u32(const void * A,const void * B)44ad8b1aafSjsg static int cmp_u32(const void *A, const void *B)
45ad8b1aafSjsg {
46ad8b1aafSjsg 	const u32 *a = A, *b = B;
47ad8b1aafSjsg 
48ad8b1aafSjsg 	if (*a < *b)
49ad8b1aafSjsg 		return -1;
50ad8b1aafSjsg 	else if (*a > *b)
51ad8b1aafSjsg 		return 1;
52ad8b1aafSjsg 	else
53ad8b1aafSjsg 		return 0;
54ad8b1aafSjsg }
55ad8b1aafSjsg 
56ad8b1aafSjsg static struct i915_vma *
create_spin_counter(struct intel_engine_cs * engine,struct i915_address_space * vm,bool srm,u32 ** cancel,u32 ** counter)57ad8b1aafSjsg create_spin_counter(struct intel_engine_cs *engine,
58ad8b1aafSjsg 		    struct i915_address_space *vm,
59ad8b1aafSjsg 		    bool srm,
60ad8b1aafSjsg 		    u32 **cancel,
61ad8b1aafSjsg 		    u32 **counter)
62ad8b1aafSjsg {
63ad8b1aafSjsg 	enum {
64ad8b1aafSjsg 		COUNT,
65ad8b1aafSjsg 		INC,
66ad8b1aafSjsg 		__NGPR__,
67ad8b1aafSjsg 	};
68ad8b1aafSjsg #define CS_GPR(x) GEN8_RING_CS_GPR(engine->mmio_base, x)
69ad8b1aafSjsg 	struct drm_i915_gem_object *obj;
70ad8b1aafSjsg 	struct i915_vma *vma;
71ad8b1aafSjsg 	unsigned long end;
72ad8b1aafSjsg 	u32 *base, *cs;
73ad8b1aafSjsg 	int loop, i;
74ad8b1aafSjsg 	int err;
75ad8b1aafSjsg 
76ad8b1aafSjsg 	obj = i915_gem_object_create_internal(vm->i915, 64 << 10);
77ad8b1aafSjsg 	if (IS_ERR(obj))
78ad8b1aafSjsg 		return ERR_CAST(obj);
79ad8b1aafSjsg 
80ad8b1aafSjsg 	end = obj->base.size / sizeof(u32) - 1;
81ad8b1aafSjsg 
82ad8b1aafSjsg 	vma = i915_vma_instance(obj, vm, NULL);
83ad8b1aafSjsg 	if (IS_ERR(vma)) {
84ad8b1aafSjsg 		err = PTR_ERR(vma);
85ad8b1aafSjsg 		goto err_put;
86ad8b1aafSjsg 	}
87ad8b1aafSjsg 
88ad8b1aafSjsg 	err = i915_vma_pin(vma, 0, 0, PIN_USER);
89ad8b1aafSjsg 	if (err)
90ad8b1aafSjsg 		goto err_unlock;
91ad8b1aafSjsg 
92ad8b1aafSjsg 	i915_vma_lock(vma);
93ad8b1aafSjsg 
94ad8b1aafSjsg 	base = i915_gem_object_pin_map(obj, I915_MAP_WC);
95ad8b1aafSjsg 	if (IS_ERR(base)) {
96ad8b1aafSjsg 		err = PTR_ERR(base);
97ad8b1aafSjsg 		goto err_unpin;
98ad8b1aafSjsg 	}
99ad8b1aafSjsg 	cs = base;
100ad8b1aafSjsg 
101ad8b1aafSjsg 	*cs++ = MI_LOAD_REGISTER_IMM(__NGPR__ * 2);
102ad8b1aafSjsg 	for (i = 0; i < __NGPR__; i++) {
103ad8b1aafSjsg 		*cs++ = i915_mmio_reg_offset(CS_GPR(i));
104ad8b1aafSjsg 		*cs++ = 0;
105ad8b1aafSjsg 		*cs++ = i915_mmio_reg_offset(CS_GPR(i)) + 4;
106ad8b1aafSjsg 		*cs++ = 0;
107ad8b1aafSjsg 	}
108ad8b1aafSjsg 
109ad8b1aafSjsg 	*cs++ = MI_LOAD_REGISTER_IMM(1);
110ad8b1aafSjsg 	*cs++ = i915_mmio_reg_offset(CS_GPR(INC));
111ad8b1aafSjsg 	*cs++ = 1;
112ad8b1aafSjsg 
113ad8b1aafSjsg 	loop = cs - base;
114ad8b1aafSjsg 
115ad8b1aafSjsg 	/* Unroll the loop to avoid MI_BB_START stalls impacting measurements */
116ad8b1aafSjsg 	for (i = 0; i < 1024; i++) {
117ad8b1aafSjsg 		*cs++ = MI_MATH(4);
118ad8b1aafSjsg 		*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(COUNT));
119ad8b1aafSjsg 		*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(INC));
120ad8b1aafSjsg 		*cs++ = MI_MATH_ADD;
121ad8b1aafSjsg 		*cs++ = MI_MATH_STORE(MI_MATH_REG(COUNT), MI_MATH_REG_ACCU);
122ad8b1aafSjsg 
123ad8b1aafSjsg 		if (srm) {
124ad8b1aafSjsg 			*cs++ = MI_STORE_REGISTER_MEM_GEN8;
125ad8b1aafSjsg 			*cs++ = i915_mmio_reg_offset(CS_GPR(COUNT));
126*f005ef32Sjsg 			*cs++ = lower_32_bits(i915_vma_offset(vma) + end * sizeof(*cs));
127*f005ef32Sjsg 			*cs++ = upper_32_bits(i915_vma_offset(vma) + end * sizeof(*cs));
128ad8b1aafSjsg 		}
129ad8b1aafSjsg 	}
130ad8b1aafSjsg 
131ad8b1aafSjsg 	*cs++ = MI_BATCH_BUFFER_START_GEN8;
132*f005ef32Sjsg 	*cs++ = lower_32_bits(i915_vma_offset(vma) + loop * sizeof(*cs));
133*f005ef32Sjsg 	*cs++ = upper_32_bits(i915_vma_offset(vma) + loop * sizeof(*cs));
134ad8b1aafSjsg 	GEM_BUG_ON(cs - base > end);
135ad8b1aafSjsg 
136ad8b1aafSjsg 	i915_gem_object_flush_map(obj);
137ad8b1aafSjsg 
138ad8b1aafSjsg 	*cancel = base + loop;
139ad8b1aafSjsg 	*counter = srm ? memset32(base + end, 0, 1) : NULL;
140ad8b1aafSjsg 	return vma;
141ad8b1aafSjsg 
142ad8b1aafSjsg err_unpin:
143ad8b1aafSjsg 	i915_vma_unpin(vma);
144ad8b1aafSjsg err_unlock:
145ad8b1aafSjsg 	i915_vma_unlock(vma);
146ad8b1aafSjsg err_put:
147ad8b1aafSjsg 	i915_gem_object_put(obj);
148ad8b1aafSjsg 	return ERR_PTR(err);
149ad8b1aafSjsg }
150ad8b1aafSjsg 
wait_for_freq(struct intel_rps * rps,u8 freq,int timeout_ms)151ad8b1aafSjsg static u8 wait_for_freq(struct intel_rps *rps, u8 freq, int timeout_ms)
152ad8b1aafSjsg {
153ad8b1aafSjsg 	u8 history[64], i;
154ad8b1aafSjsg 	unsigned long end;
155ad8b1aafSjsg 	int sleep;
156ad8b1aafSjsg 
157ad8b1aafSjsg 	i = 0;
158ad8b1aafSjsg 	memset(history, freq, sizeof(history));
159ad8b1aafSjsg 	sleep = 20;
160ad8b1aafSjsg 
161ad8b1aafSjsg 	/* The PCU does not change instantly, but drifts towards the goal? */
162ad8b1aafSjsg 	end = jiffies + msecs_to_jiffies(timeout_ms);
163ad8b1aafSjsg 	do {
164ad8b1aafSjsg 		u8 act;
165ad8b1aafSjsg 
166ad8b1aafSjsg 		act = read_cagf(rps);
167ad8b1aafSjsg 		if (time_after(jiffies, end))
168ad8b1aafSjsg 			return act;
169ad8b1aafSjsg 
170ad8b1aafSjsg 		/* Target acquired */
171ad8b1aafSjsg 		if (act == freq)
172ad8b1aafSjsg 			return act;
173ad8b1aafSjsg 
174ad8b1aafSjsg 		/* Any change within the last N samples? */
175ad8b1aafSjsg 		if (!memchr_inv(history, act, sizeof(history)))
176ad8b1aafSjsg 			return act;
177ad8b1aafSjsg 
178ad8b1aafSjsg 		history[i] = act;
179ad8b1aafSjsg 		i = (i + 1) % ARRAY_SIZE(history);
180ad8b1aafSjsg 
181ad8b1aafSjsg 		usleep_range(sleep, 2 * sleep);
182ad8b1aafSjsg 		sleep *= 2;
183ad8b1aafSjsg 		if (sleep > timeout_ms * 20)
184ad8b1aafSjsg 			sleep = timeout_ms * 20;
185ad8b1aafSjsg 	} while (1);
186ad8b1aafSjsg }
187ad8b1aafSjsg 
rps_set_check(struct intel_rps * rps,u8 freq)188ad8b1aafSjsg static u8 rps_set_check(struct intel_rps *rps, u8 freq)
189ad8b1aafSjsg {
190ad8b1aafSjsg 	mutex_lock(&rps->lock);
191ad8b1aafSjsg 	GEM_BUG_ON(!intel_rps_is_active(rps));
1925ca02815Sjsg 	if (wait_for(!intel_rps_set(rps, freq), 50)) {
1935ca02815Sjsg 		mutex_unlock(&rps->lock);
1945ca02815Sjsg 		return 0;
1955ca02815Sjsg 	}
196ad8b1aafSjsg 	GEM_BUG_ON(rps->last_freq != freq);
197ad8b1aafSjsg 	mutex_unlock(&rps->lock);
198ad8b1aafSjsg 
199ad8b1aafSjsg 	return wait_for_freq(rps, freq, 50);
200ad8b1aafSjsg }
201ad8b1aafSjsg 
show_pstate_limits(struct intel_rps * rps)202ad8b1aafSjsg static void show_pstate_limits(struct intel_rps *rps)
203ad8b1aafSjsg {
204ad8b1aafSjsg 	struct drm_i915_private *i915 = rps_to_i915(rps);
205ad8b1aafSjsg 
206ad8b1aafSjsg 	if (IS_BROXTON(i915)) {
207ad8b1aafSjsg 		pr_info("P_STATE_CAP[%x]: 0x%08x\n",
208ad8b1aafSjsg 			i915_mmio_reg_offset(BXT_RP_STATE_CAP),
209ad8b1aafSjsg 			intel_uncore_read(rps_to_uncore(rps),
210ad8b1aafSjsg 					  BXT_RP_STATE_CAP));
2115ca02815Sjsg 	} else if (GRAPHICS_VER(i915) == 9) {
212ad8b1aafSjsg 		pr_info("P_STATE_LIMITS[%x]: 0x%08x\n",
213ad8b1aafSjsg 			i915_mmio_reg_offset(GEN9_RP_STATE_LIMITS),
214ad8b1aafSjsg 			intel_uncore_read(rps_to_uncore(rps),
215ad8b1aafSjsg 					  GEN9_RP_STATE_LIMITS));
216ad8b1aafSjsg 	}
217ad8b1aafSjsg }
218ad8b1aafSjsg 
live_rps_clock_interval(void * arg)219ad8b1aafSjsg int live_rps_clock_interval(void *arg)
220ad8b1aafSjsg {
221ad8b1aafSjsg 	struct intel_gt *gt = arg;
222ad8b1aafSjsg 	struct intel_rps *rps = &gt->rps;
223ad8b1aafSjsg 	void (*saved_work)(struct work_struct *wrk);
224ad8b1aafSjsg 	struct intel_engine_cs *engine;
225ad8b1aafSjsg 	enum intel_engine_id id;
226ad8b1aafSjsg 	struct igt_spinner spin;
227ad8b1aafSjsg 	int err = 0;
228ad8b1aafSjsg 
2295ca02815Sjsg 	if (!intel_rps_is_enabled(rps) || GRAPHICS_VER(gt->i915) < 6)
230ad8b1aafSjsg 		return 0;
231ad8b1aafSjsg 
232ad8b1aafSjsg 	if (igt_spinner_init(&spin, gt))
233ad8b1aafSjsg 		return -ENOMEM;
234ad8b1aafSjsg 
235ad8b1aafSjsg 	intel_gt_pm_wait_for_idle(gt);
236ad8b1aafSjsg 	saved_work = rps->work.func;
237ad8b1aafSjsg 	rps->work.func = dummy_rps_work;
238ad8b1aafSjsg 
239ad8b1aafSjsg 	intel_gt_pm_get(gt);
240ad8b1aafSjsg 	intel_rps_disable(&gt->rps);
241ad8b1aafSjsg 
242ad8b1aafSjsg 	intel_gt_check_clock_frequency(gt);
243ad8b1aafSjsg 
244ad8b1aafSjsg 	for_each_engine(engine, gt, id) {
245ad8b1aafSjsg 		struct i915_request *rq;
246ad8b1aafSjsg 		u32 cycles;
247ad8b1aafSjsg 		u64 dt;
248ad8b1aafSjsg 
249ad8b1aafSjsg 		if (!intel_engine_can_store_dword(engine))
250ad8b1aafSjsg 			continue;
251ad8b1aafSjsg 
252ad8b1aafSjsg 		st_engine_heartbeat_disable(engine);
253ad8b1aafSjsg 
254ad8b1aafSjsg 		rq = igt_spinner_create_request(&spin,
255ad8b1aafSjsg 						engine->kernel_context,
256ad8b1aafSjsg 						MI_NOOP);
257ad8b1aafSjsg 		if (IS_ERR(rq)) {
258ad8b1aafSjsg 			st_engine_heartbeat_enable(engine);
259ad8b1aafSjsg 			err = PTR_ERR(rq);
260ad8b1aafSjsg 			break;
261ad8b1aafSjsg 		}
262ad8b1aafSjsg 
263ad8b1aafSjsg 		i915_request_add(rq);
264ad8b1aafSjsg 
265ad8b1aafSjsg 		if (!igt_wait_for_spinner(&spin, rq)) {
266ad8b1aafSjsg 			pr_err("%s: RPS spinner did not start\n",
267ad8b1aafSjsg 			       engine->name);
268ad8b1aafSjsg 			igt_spinner_end(&spin);
269ad8b1aafSjsg 			st_engine_heartbeat_enable(engine);
270ad8b1aafSjsg 			intel_gt_set_wedged(engine->gt);
271ad8b1aafSjsg 			err = -EIO;
272ad8b1aafSjsg 			break;
273ad8b1aafSjsg 		}
274ad8b1aafSjsg 
275ad8b1aafSjsg 		intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL);
276ad8b1aafSjsg 
277ad8b1aafSjsg 		intel_uncore_write_fw(gt->uncore, GEN6_RP_CUR_UP_EI, 0);
278ad8b1aafSjsg 
279ad8b1aafSjsg 		/* Set the evaluation interval to infinity! */
280ad8b1aafSjsg 		intel_uncore_write_fw(gt->uncore,
281ad8b1aafSjsg 				      GEN6_RP_UP_EI, 0xffffffff);
282ad8b1aafSjsg 		intel_uncore_write_fw(gt->uncore,
283ad8b1aafSjsg 				      GEN6_RP_UP_THRESHOLD, 0xffffffff);
284ad8b1aafSjsg 
285ad8b1aafSjsg 		intel_uncore_write_fw(gt->uncore, GEN6_RP_CONTROL,
286ad8b1aafSjsg 				      GEN6_RP_ENABLE | GEN6_RP_UP_BUSY_AVG);
287ad8b1aafSjsg 
288ad8b1aafSjsg 		if (wait_for(intel_uncore_read_fw(gt->uncore,
289ad8b1aafSjsg 						  GEN6_RP_CUR_UP_EI),
290ad8b1aafSjsg 			     10)) {
291ad8b1aafSjsg 			/* Just skip the test; assume lack of HW support */
292ad8b1aafSjsg 			pr_notice("%s: rps evaluation interval not ticking\n",
293ad8b1aafSjsg 				  engine->name);
294ad8b1aafSjsg 			err = -ENODEV;
295ad8b1aafSjsg 		} else {
296ad8b1aafSjsg 			ktime_t dt_[5];
297ad8b1aafSjsg 			u32 cycles_[5];
298ad8b1aafSjsg 			int i;
299ad8b1aafSjsg 
300ad8b1aafSjsg 			for (i = 0; i < 5; i++) {
301ad8b1aafSjsg 				preempt_disable();
302ad8b1aafSjsg 
303ad8b1aafSjsg 				cycles_[i] = -intel_uncore_read_fw(gt->uncore, GEN6_RP_CUR_UP_EI);
304*f005ef32Sjsg 				dt_[i] = ktime_get();
305ad8b1aafSjsg 
306ad8b1aafSjsg 				udelay(1000);
307ad8b1aafSjsg 
308ad8b1aafSjsg 				cycles_[i] += intel_uncore_read_fw(gt->uncore, GEN6_RP_CUR_UP_EI);
309*f005ef32Sjsg 				dt_[i] = ktime_sub(ktime_get(), dt_[i]);
310ad8b1aafSjsg 
311ad8b1aafSjsg 				preempt_enable();
312ad8b1aafSjsg 			}
313ad8b1aafSjsg 
314ad8b1aafSjsg 			/* Use the median of both cycle/dt; close enough */
315ad8b1aafSjsg 			sort(cycles_, 5, sizeof(*cycles_), cmp_u32, NULL);
316ad8b1aafSjsg 			cycles = (cycles_[1] + 2 * cycles_[2] + cycles_[3]) / 4;
317ad8b1aafSjsg 			sort(dt_, 5, sizeof(*dt_), cmp_u64, NULL);
318ad8b1aafSjsg 			dt = div_u64(dt_[1] + 2 * dt_[2] + dt_[3], 4);
319ad8b1aafSjsg 		}
320ad8b1aafSjsg 
321ad8b1aafSjsg 		intel_uncore_write_fw(gt->uncore, GEN6_RP_CONTROL, 0);
322ad8b1aafSjsg 		intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL);
323ad8b1aafSjsg 
324ad8b1aafSjsg 		igt_spinner_end(&spin);
325ad8b1aafSjsg 		st_engine_heartbeat_enable(engine);
326ad8b1aafSjsg 
327ad8b1aafSjsg 		if (err == 0) {
328ad8b1aafSjsg 			u64 time = intel_gt_pm_interval_to_ns(gt, cycles);
329ad8b1aafSjsg 			u32 expected =
330ad8b1aafSjsg 				intel_gt_ns_to_pm_interval(gt, dt);
331ad8b1aafSjsg 
332ad8b1aafSjsg 			pr_info("%s: rps counted %d C0 cycles [%lldns] in %lldns [%d cycles], using GT clock frequency of %uKHz\n",
333ad8b1aafSjsg 				engine->name, cycles, time, dt, expected,
334ad8b1aafSjsg 				gt->clock_frequency / 1000);
335ad8b1aafSjsg 
336ad8b1aafSjsg 			if (10 * time < 8 * dt ||
337ad8b1aafSjsg 			    8 * time > 10 * dt) {
338ad8b1aafSjsg 				pr_err("%s: rps clock time does not match walltime!\n",
339ad8b1aafSjsg 				       engine->name);
340ad8b1aafSjsg 				err = -EINVAL;
341ad8b1aafSjsg 			}
342ad8b1aafSjsg 
343ad8b1aafSjsg 			if (10 * expected < 8 * cycles ||
344ad8b1aafSjsg 			    8 * expected > 10 * cycles) {
345ad8b1aafSjsg 				pr_err("%s: walltime does not match rps clock ticks!\n",
346ad8b1aafSjsg 				       engine->name);
347ad8b1aafSjsg 				err = -EINVAL;
348ad8b1aafSjsg 			}
349ad8b1aafSjsg 		}
350ad8b1aafSjsg 
351ad8b1aafSjsg 		if (igt_flush_test(gt->i915))
352ad8b1aafSjsg 			err = -EIO;
353ad8b1aafSjsg 
354ad8b1aafSjsg 		break; /* once is enough */
355ad8b1aafSjsg 	}
356ad8b1aafSjsg 
357ad8b1aafSjsg 	intel_rps_enable(&gt->rps);
358ad8b1aafSjsg 	intel_gt_pm_put(gt);
359ad8b1aafSjsg 
360ad8b1aafSjsg 	igt_spinner_fini(&spin);
361ad8b1aafSjsg 
362ad8b1aafSjsg 	intel_gt_pm_wait_for_idle(gt);
363ad8b1aafSjsg 	rps->work.func = saved_work;
364ad8b1aafSjsg 
365ad8b1aafSjsg 	if (err == -ENODEV) /* skipped, don't report a fail */
366ad8b1aafSjsg 		err = 0;
367ad8b1aafSjsg 
368ad8b1aafSjsg 	return err;
369ad8b1aafSjsg }
370ad8b1aafSjsg 
live_rps_control(void * arg)371ad8b1aafSjsg int live_rps_control(void *arg)
372ad8b1aafSjsg {
373ad8b1aafSjsg 	struct intel_gt *gt = arg;
374ad8b1aafSjsg 	struct intel_rps *rps = &gt->rps;
375ad8b1aafSjsg 	void (*saved_work)(struct work_struct *wrk);
376ad8b1aafSjsg 	struct intel_engine_cs *engine;
377ad8b1aafSjsg 	enum intel_engine_id id;
378ad8b1aafSjsg 	struct igt_spinner spin;
379ad8b1aafSjsg 	int err = 0;
380ad8b1aafSjsg 
381ad8b1aafSjsg 	/*
382ad8b1aafSjsg 	 * Check that the actual frequency matches our requested frequency,
383ad8b1aafSjsg 	 * to verify our control mechanism. We have to be careful that the
384ad8b1aafSjsg 	 * PCU may throttle the GPU in which case the actual frequency used
385ad8b1aafSjsg 	 * will be lowered than requested.
386ad8b1aafSjsg 	 */
387ad8b1aafSjsg 
388ad8b1aafSjsg 	if (!intel_rps_is_enabled(rps))
389ad8b1aafSjsg 		return 0;
390ad8b1aafSjsg 
391ad8b1aafSjsg 	if (IS_CHERRYVIEW(gt->i915)) /* XXX fragile PCU */
392ad8b1aafSjsg 		return 0;
393ad8b1aafSjsg 
394ad8b1aafSjsg 	if (igt_spinner_init(&spin, gt))
395ad8b1aafSjsg 		return -ENOMEM;
396ad8b1aafSjsg 
397ad8b1aafSjsg 	intel_gt_pm_wait_for_idle(gt);
398ad8b1aafSjsg 	saved_work = rps->work.func;
399ad8b1aafSjsg 	rps->work.func = dummy_rps_work;
400ad8b1aafSjsg 
401ad8b1aafSjsg 	intel_gt_pm_get(gt);
402ad8b1aafSjsg 	for_each_engine(engine, gt, id) {
403ad8b1aafSjsg 		struct i915_request *rq;
404ad8b1aafSjsg 		ktime_t min_dt, max_dt;
405ad8b1aafSjsg 		int f, limit;
406ad8b1aafSjsg 		int min, max;
407ad8b1aafSjsg 
408ad8b1aafSjsg 		if (!intel_engine_can_store_dword(engine))
409ad8b1aafSjsg 			continue;
410ad8b1aafSjsg 
411ad8b1aafSjsg 		st_engine_heartbeat_disable(engine);
412ad8b1aafSjsg 
413ad8b1aafSjsg 		rq = igt_spinner_create_request(&spin,
414ad8b1aafSjsg 						engine->kernel_context,
415ad8b1aafSjsg 						MI_NOOP);
416ad8b1aafSjsg 		if (IS_ERR(rq)) {
417ad8b1aafSjsg 			err = PTR_ERR(rq);
418ad8b1aafSjsg 			break;
419ad8b1aafSjsg 		}
420ad8b1aafSjsg 
421ad8b1aafSjsg 		i915_request_add(rq);
422ad8b1aafSjsg 
423ad8b1aafSjsg 		if (!igt_wait_for_spinner(&spin, rq)) {
424ad8b1aafSjsg 			pr_err("%s: RPS spinner did not start\n",
425ad8b1aafSjsg 			       engine->name);
426ad8b1aafSjsg 			igt_spinner_end(&spin);
427ad8b1aafSjsg 			st_engine_heartbeat_enable(engine);
428ad8b1aafSjsg 			intel_gt_set_wedged(engine->gt);
429ad8b1aafSjsg 			err = -EIO;
430ad8b1aafSjsg 			break;
431ad8b1aafSjsg 		}
432ad8b1aafSjsg 
433ad8b1aafSjsg 		if (rps_set_check(rps, rps->min_freq) != rps->min_freq) {
434ad8b1aafSjsg 			pr_err("%s: could not set minimum frequency [%x], only %x!\n",
435ad8b1aafSjsg 			       engine->name, rps->min_freq, read_cagf(rps));
436ad8b1aafSjsg 			igt_spinner_end(&spin);
437ad8b1aafSjsg 			st_engine_heartbeat_enable(engine);
438ad8b1aafSjsg 			show_pstate_limits(rps);
439ad8b1aafSjsg 			err = -EINVAL;
440ad8b1aafSjsg 			break;
441ad8b1aafSjsg 		}
442ad8b1aafSjsg 
443ad8b1aafSjsg 		for (f = rps->min_freq + 1; f < rps->max_freq; f++) {
444ad8b1aafSjsg 			if (rps_set_check(rps, f) < f)
445ad8b1aafSjsg 				break;
446ad8b1aafSjsg 		}
447ad8b1aafSjsg 
448ad8b1aafSjsg 		limit = rps_set_check(rps, f);
449ad8b1aafSjsg 
450ad8b1aafSjsg 		if (rps_set_check(rps, rps->min_freq) != rps->min_freq) {
451ad8b1aafSjsg 			pr_err("%s: could not restore minimum frequency [%x], only %x!\n",
452ad8b1aafSjsg 			       engine->name, rps->min_freq, read_cagf(rps));
453ad8b1aafSjsg 			igt_spinner_end(&spin);
454ad8b1aafSjsg 			st_engine_heartbeat_enable(engine);
455ad8b1aafSjsg 			show_pstate_limits(rps);
456ad8b1aafSjsg 			err = -EINVAL;
457ad8b1aafSjsg 			break;
458ad8b1aafSjsg 		}
459ad8b1aafSjsg 
460ad8b1aafSjsg 		max_dt = ktime_get();
461ad8b1aafSjsg 		max = rps_set_check(rps, limit);
462ad8b1aafSjsg 		max_dt = ktime_sub(ktime_get(), max_dt);
463ad8b1aafSjsg 
464ad8b1aafSjsg 		min_dt = ktime_get();
465ad8b1aafSjsg 		min = rps_set_check(rps, rps->min_freq);
466ad8b1aafSjsg 		min_dt = ktime_sub(ktime_get(), min_dt);
467ad8b1aafSjsg 
468ad8b1aafSjsg 		igt_spinner_end(&spin);
469ad8b1aafSjsg 		st_engine_heartbeat_enable(engine);
470ad8b1aafSjsg 
471ad8b1aafSjsg 		pr_info("%s: range:[%x:%uMHz, %x:%uMHz] limit:[%x:%uMHz], %x:%x response %lluns:%lluns\n",
472ad8b1aafSjsg 			engine->name,
473ad8b1aafSjsg 			rps->min_freq, intel_gpu_freq(rps, rps->min_freq),
474ad8b1aafSjsg 			rps->max_freq, intel_gpu_freq(rps, rps->max_freq),
475ad8b1aafSjsg 			limit, intel_gpu_freq(rps, limit),
476ad8b1aafSjsg 			min, max, ktime_to_ns(min_dt), ktime_to_ns(max_dt));
477ad8b1aafSjsg 
478ad8b1aafSjsg 		if (limit == rps->min_freq) {
479ad8b1aafSjsg 			pr_err("%s: GPU throttled to minimum!\n",
480ad8b1aafSjsg 			       engine->name);
481ad8b1aafSjsg 			show_pstate_limits(rps);
482ad8b1aafSjsg 			err = -ENODEV;
483ad8b1aafSjsg 			break;
484ad8b1aafSjsg 		}
485ad8b1aafSjsg 
486ad8b1aafSjsg 		if (igt_flush_test(gt->i915)) {
487ad8b1aafSjsg 			err = -EIO;
488ad8b1aafSjsg 			break;
489ad8b1aafSjsg 		}
490ad8b1aafSjsg 	}
491ad8b1aafSjsg 	intel_gt_pm_put(gt);
492ad8b1aafSjsg 
493ad8b1aafSjsg 	igt_spinner_fini(&spin);
494ad8b1aafSjsg 
495ad8b1aafSjsg 	intel_gt_pm_wait_for_idle(gt);
496ad8b1aafSjsg 	rps->work.func = saved_work;
497ad8b1aafSjsg 
498ad8b1aafSjsg 	return err;
499ad8b1aafSjsg }
500ad8b1aafSjsg 
show_pcu_config(struct intel_rps * rps)501ad8b1aafSjsg static void show_pcu_config(struct intel_rps *rps)
502ad8b1aafSjsg {
503ad8b1aafSjsg 	struct drm_i915_private *i915 = rps_to_i915(rps);
504ad8b1aafSjsg 	unsigned int max_gpu_freq, min_gpu_freq;
505ad8b1aafSjsg 	intel_wakeref_t wakeref;
506ad8b1aafSjsg 	int gpu_freq;
507ad8b1aafSjsg 
508ad8b1aafSjsg 	if (!HAS_LLC(i915))
509ad8b1aafSjsg 		return;
510ad8b1aafSjsg 
511ad8b1aafSjsg 	min_gpu_freq = rps->min_freq;
512ad8b1aafSjsg 	max_gpu_freq = rps->max_freq;
5135ca02815Sjsg 	if (GRAPHICS_VER(i915) >= 9) {
514ad8b1aafSjsg 		/* Convert GT frequency to 50 HZ units */
515ad8b1aafSjsg 		min_gpu_freq /= GEN9_FREQ_SCALER;
516ad8b1aafSjsg 		max_gpu_freq /= GEN9_FREQ_SCALER;
517ad8b1aafSjsg 	}
518ad8b1aafSjsg 
519ad8b1aafSjsg 	wakeref = intel_runtime_pm_get(rps_to_uncore(rps)->rpm);
520ad8b1aafSjsg 
521ad8b1aafSjsg 	pr_info("%5s  %5s  %5s\n", "GPU", "eCPU", "eRing");
522ad8b1aafSjsg 	for (gpu_freq = min_gpu_freq; gpu_freq <= max_gpu_freq; gpu_freq++) {
523ad8b1aafSjsg 		int ia_freq = gpu_freq;
524ad8b1aafSjsg 
5251bb76ff1Sjsg 		snb_pcode_read(rps_to_gt(rps)->uncore, GEN6_PCODE_READ_MIN_FREQ_TABLE,
526ad8b1aafSjsg 			       &ia_freq, NULL);
527ad8b1aafSjsg 
528ad8b1aafSjsg 		pr_info("%5d  %5d  %5d\n",
529ad8b1aafSjsg 			gpu_freq * 50,
530ad8b1aafSjsg 			((ia_freq >> 0) & 0xff) * 100,
531ad8b1aafSjsg 			((ia_freq >> 8) & 0xff) * 100);
532ad8b1aafSjsg 	}
533ad8b1aafSjsg 
534ad8b1aafSjsg 	intel_runtime_pm_put(rps_to_uncore(rps)->rpm, wakeref);
535ad8b1aafSjsg }
536ad8b1aafSjsg 
__measure_frequency(u32 * cntr,int duration_ms)537ad8b1aafSjsg static u64 __measure_frequency(u32 *cntr, int duration_ms)
538ad8b1aafSjsg {
539ad8b1aafSjsg 	u64 dc, dt;
540ad8b1aafSjsg 
541ad8b1aafSjsg 	dc = READ_ONCE(*cntr);
542*f005ef32Sjsg 	dt = ktime_get();
543ad8b1aafSjsg 	usleep_range(1000 * duration_ms, 2000 * duration_ms);
544ad8b1aafSjsg 	dc = READ_ONCE(*cntr) - dc;
545ad8b1aafSjsg 	dt = ktime_get() - dt;
546ad8b1aafSjsg 
547ad8b1aafSjsg 	return div64_u64(1000 * 1000 * dc, dt);
548ad8b1aafSjsg }
549ad8b1aafSjsg 
measure_frequency_at(struct intel_rps * rps,u32 * cntr,int * freq)550ad8b1aafSjsg static u64 measure_frequency_at(struct intel_rps *rps, u32 *cntr, int *freq)
551ad8b1aafSjsg {
552ad8b1aafSjsg 	u64 x[5];
553ad8b1aafSjsg 	int i;
554ad8b1aafSjsg 
555ad8b1aafSjsg 	*freq = rps_set_check(rps, *freq);
556ad8b1aafSjsg 	for (i = 0; i < 5; i++)
557ad8b1aafSjsg 		x[i] = __measure_frequency(cntr, 2);
558ad8b1aafSjsg 	*freq = (*freq + read_cagf(rps)) / 2;
559ad8b1aafSjsg 
560ad8b1aafSjsg 	/* A simple triangle filter for better result stability */
561ad8b1aafSjsg 	sort(x, 5, sizeof(*x), cmp_u64, NULL);
562ad8b1aafSjsg 	return div_u64(x[1] + 2 * x[2] + x[3], 4);
563ad8b1aafSjsg }
564ad8b1aafSjsg 
__measure_cs_frequency(struct intel_engine_cs * engine,int duration_ms)565ad8b1aafSjsg static u64 __measure_cs_frequency(struct intel_engine_cs *engine,
566ad8b1aafSjsg 				  int duration_ms)
567ad8b1aafSjsg {
568ad8b1aafSjsg 	u64 dc, dt;
569ad8b1aafSjsg 
570ad8b1aafSjsg 	dc = intel_uncore_read_fw(engine->uncore, CS_GPR(0));
571*f005ef32Sjsg 	dt = ktime_get();
572ad8b1aafSjsg 	usleep_range(1000 * duration_ms, 2000 * duration_ms);
573ad8b1aafSjsg 	dc = intel_uncore_read_fw(engine->uncore, CS_GPR(0)) - dc;
574ad8b1aafSjsg 	dt = ktime_get() - dt;
575ad8b1aafSjsg 
576ad8b1aafSjsg 	return div64_u64(1000 * 1000 * dc, dt);
577ad8b1aafSjsg }
578ad8b1aafSjsg 
measure_cs_frequency_at(struct intel_rps * rps,struct intel_engine_cs * engine,int * freq)579ad8b1aafSjsg static u64 measure_cs_frequency_at(struct intel_rps *rps,
580ad8b1aafSjsg 				   struct intel_engine_cs *engine,
581ad8b1aafSjsg 				   int *freq)
582ad8b1aafSjsg {
583ad8b1aafSjsg 	u64 x[5];
584ad8b1aafSjsg 	int i;
585ad8b1aafSjsg 
586ad8b1aafSjsg 	*freq = rps_set_check(rps, *freq);
587ad8b1aafSjsg 	for (i = 0; i < 5; i++)
588ad8b1aafSjsg 		x[i] = __measure_cs_frequency(engine, 2);
589ad8b1aafSjsg 	*freq = (*freq + read_cagf(rps)) / 2;
590ad8b1aafSjsg 
591ad8b1aafSjsg 	/* A simple triangle filter for better result stability */
592ad8b1aafSjsg 	sort(x, 5, sizeof(*x), cmp_u64, NULL);
593ad8b1aafSjsg 	return div_u64(x[1] + 2 * x[2] + x[3], 4);
594ad8b1aafSjsg }
595ad8b1aafSjsg 
scaled_within(u64 x,u64 y,u32 f_n,u32 f_d)596ad8b1aafSjsg static bool scaled_within(u64 x, u64 y, u32 f_n, u32 f_d)
597ad8b1aafSjsg {
598ad8b1aafSjsg 	return f_d * x > f_n * y && f_n * x < f_d * y;
599ad8b1aafSjsg }
600ad8b1aafSjsg 
live_rps_frequency_cs(void * arg)601ad8b1aafSjsg int live_rps_frequency_cs(void *arg)
602ad8b1aafSjsg {
603ad8b1aafSjsg 	void (*saved_work)(struct work_struct *wrk);
604ad8b1aafSjsg 	struct intel_gt *gt = arg;
605ad8b1aafSjsg 	struct intel_rps *rps = &gt->rps;
606ad8b1aafSjsg 	struct intel_engine_cs *engine;
607ad8b1aafSjsg 	struct pm_qos_request qos;
608ad8b1aafSjsg 	enum intel_engine_id id;
609ad8b1aafSjsg 	int err = 0;
610ad8b1aafSjsg 
611ad8b1aafSjsg 	/*
6125ca02815Sjsg 	 * The premise is that the GPU does change frequency at our behest.
613ad8b1aafSjsg 	 * Let's check there is a correspondence between the requested
614ad8b1aafSjsg 	 * frequency, the actual frequency, and the observed clock rate.
615ad8b1aafSjsg 	 */
616ad8b1aafSjsg 
617ad8b1aafSjsg 	if (!intel_rps_is_enabled(rps))
618ad8b1aafSjsg 		return 0;
619ad8b1aafSjsg 
6205ca02815Sjsg 	if (GRAPHICS_VER(gt->i915) < 8) /* for CS simplicity */
621ad8b1aafSjsg 		return 0;
622ad8b1aafSjsg 
623ad8b1aafSjsg 	if (CPU_LATENCY >= 0)
624ad8b1aafSjsg 		cpu_latency_qos_add_request(&qos, CPU_LATENCY);
625ad8b1aafSjsg 
626ad8b1aafSjsg 	intel_gt_pm_wait_for_idle(gt);
627ad8b1aafSjsg 	saved_work = rps->work.func;
628ad8b1aafSjsg 	rps->work.func = dummy_rps_work;
629ad8b1aafSjsg 
630ad8b1aafSjsg 	for_each_engine(engine, gt, id) {
631ad8b1aafSjsg 		struct i915_request *rq;
632ad8b1aafSjsg 		struct i915_vma *vma;
633ad8b1aafSjsg 		u32 *cancel, *cntr;
634ad8b1aafSjsg 		struct {
635ad8b1aafSjsg 			u64 count;
636ad8b1aafSjsg 			int freq;
637ad8b1aafSjsg 		} min, max;
638ad8b1aafSjsg 
639ad8b1aafSjsg 		st_engine_heartbeat_disable(engine);
640ad8b1aafSjsg 
641ad8b1aafSjsg 		vma = create_spin_counter(engine,
642ad8b1aafSjsg 					  engine->kernel_context->vm, false,
643ad8b1aafSjsg 					  &cancel, &cntr);
644ad8b1aafSjsg 		if (IS_ERR(vma)) {
645ad8b1aafSjsg 			err = PTR_ERR(vma);
646ad8b1aafSjsg 			st_engine_heartbeat_enable(engine);
647ad8b1aafSjsg 			break;
648ad8b1aafSjsg 		}
649ad8b1aafSjsg 
650ad8b1aafSjsg 		rq = intel_engine_create_kernel_request(engine);
651ad8b1aafSjsg 		if (IS_ERR(rq)) {
652ad8b1aafSjsg 			err = PTR_ERR(rq);
653ad8b1aafSjsg 			goto err_vma;
654ad8b1aafSjsg 		}
655ad8b1aafSjsg 
656ad8b1aafSjsg 		err = i915_vma_move_to_active(vma, rq, 0);
657ad8b1aafSjsg 		if (!err)
658ad8b1aafSjsg 			err = rq->engine->emit_bb_start(rq,
659*f005ef32Sjsg 							i915_vma_offset(vma),
660ad8b1aafSjsg 							PAGE_SIZE, 0);
661ad8b1aafSjsg 		i915_request_add(rq);
662ad8b1aafSjsg 		if (err)
663ad8b1aafSjsg 			goto err_vma;
664ad8b1aafSjsg 
665ad8b1aafSjsg 		if (wait_for(intel_uncore_read(engine->uncore, CS_GPR(0)),
666ad8b1aafSjsg 			     10)) {
667ad8b1aafSjsg 			pr_err("%s: timed loop did not start\n",
668ad8b1aafSjsg 			       engine->name);
669ad8b1aafSjsg 			goto err_vma;
670ad8b1aafSjsg 		}
671ad8b1aafSjsg 
672ad8b1aafSjsg 		min.freq = rps->min_freq;
673ad8b1aafSjsg 		min.count = measure_cs_frequency_at(rps, engine, &min.freq);
674ad8b1aafSjsg 
675ad8b1aafSjsg 		max.freq = rps->max_freq;
676ad8b1aafSjsg 		max.count = measure_cs_frequency_at(rps, engine, &max.freq);
677ad8b1aafSjsg 
678ad8b1aafSjsg 		pr_info("%s: min:%lluKHz @ %uMHz, max:%lluKHz @ %uMHz [%d%%]\n",
679ad8b1aafSjsg 			engine->name,
680ad8b1aafSjsg 			min.count, intel_gpu_freq(rps, min.freq),
681ad8b1aafSjsg 			max.count, intel_gpu_freq(rps, max.freq),
682ad8b1aafSjsg 			(int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * max.count,
683ad8b1aafSjsg 						     max.freq * min.count));
684ad8b1aafSjsg 
685ad8b1aafSjsg 		if (!scaled_within(max.freq * min.count,
686ad8b1aafSjsg 				   min.freq * max.count,
687ad8b1aafSjsg 				   2, 3)) {
688ad8b1aafSjsg 			int f;
689ad8b1aafSjsg 
690ad8b1aafSjsg 			pr_err("%s: CS did not scale with frequency! scaled min:%llu, max:%llu\n",
691ad8b1aafSjsg 			       engine->name,
692ad8b1aafSjsg 			       max.freq * min.count,
693ad8b1aafSjsg 			       min.freq * max.count);
694ad8b1aafSjsg 			show_pcu_config(rps);
695ad8b1aafSjsg 
696ad8b1aafSjsg 			for (f = min.freq + 1; f <= rps->max_freq; f++) {
697ad8b1aafSjsg 				int act = f;
698ad8b1aafSjsg 				u64 count;
699ad8b1aafSjsg 
700ad8b1aafSjsg 				count = measure_cs_frequency_at(rps, engine, &act);
701ad8b1aafSjsg 				if (act < f)
702ad8b1aafSjsg 					break;
703ad8b1aafSjsg 
704ad8b1aafSjsg 				pr_info("%s: %x:%uMHz: %lluKHz [%d%%]\n",
705ad8b1aafSjsg 					engine->name,
706ad8b1aafSjsg 					act, intel_gpu_freq(rps, act), count,
707ad8b1aafSjsg 					(int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * count,
708ad8b1aafSjsg 								     act * min.count));
709ad8b1aafSjsg 
710ad8b1aafSjsg 				f = act; /* may skip ahead [pcu granularity] */
711ad8b1aafSjsg 			}
712ad8b1aafSjsg 
713ad8b1aafSjsg 			err = -EINTR; /* ignore error, continue on with test */
714ad8b1aafSjsg 		}
715ad8b1aafSjsg 
716ad8b1aafSjsg err_vma:
717ad8b1aafSjsg 		*cancel = MI_BATCH_BUFFER_END;
718ad8b1aafSjsg 		i915_gem_object_flush_map(vma->obj);
719ad8b1aafSjsg 		i915_gem_object_unpin_map(vma->obj);
720ad8b1aafSjsg 		i915_vma_unpin(vma);
721ad8b1aafSjsg 		i915_vma_unlock(vma);
722ad8b1aafSjsg 		i915_vma_put(vma);
723ad8b1aafSjsg 
724ad8b1aafSjsg 		st_engine_heartbeat_enable(engine);
725ad8b1aafSjsg 		if (igt_flush_test(gt->i915))
726ad8b1aafSjsg 			err = -EIO;
727ad8b1aafSjsg 		if (err)
728ad8b1aafSjsg 			break;
729ad8b1aafSjsg 	}
730ad8b1aafSjsg 
731ad8b1aafSjsg 	intel_gt_pm_wait_for_idle(gt);
732ad8b1aafSjsg 	rps->work.func = saved_work;
733ad8b1aafSjsg 
734ad8b1aafSjsg 	if (CPU_LATENCY >= 0)
735ad8b1aafSjsg 		cpu_latency_qos_remove_request(&qos);
736ad8b1aafSjsg 
737ad8b1aafSjsg 	return err;
738ad8b1aafSjsg }
739ad8b1aafSjsg 
live_rps_frequency_srm(void * arg)740ad8b1aafSjsg int live_rps_frequency_srm(void *arg)
741ad8b1aafSjsg {
742ad8b1aafSjsg 	void (*saved_work)(struct work_struct *wrk);
743ad8b1aafSjsg 	struct intel_gt *gt = arg;
744ad8b1aafSjsg 	struct intel_rps *rps = &gt->rps;
745ad8b1aafSjsg 	struct intel_engine_cs *engine;
746ad8b1aafSjsg 	struct pm_qos_request qos;
747ad8b1aafSjsg 	enum intel_engine_id id;
748ad8b1aafSjsg 	int err = 0;
749ad8b1aafSjsg 
750ad8b1aafSjsg 	/*
7515ca02815Sjsg 	 * The premise is that the GPU does change frequency at our behest.
752ad8b1aafSjsg 	 * Let's check there is a correspondence between the requested
753ad8b1aafSjsg 	 * frequency, the actual frequency, and the observed clock rate.
754ad8b1aafSjsg 	 */
755ad8b1aafSjsg 
756ad8b1aafSjsg 	if (!intel_rps_is_enabled(rps))
757ad8b1aafSjsg 		return 0;
758ad8b1aafSjsg 
7595ca02815Sjsg 	if (GRAPHICS_VER(gt->i915) < 8) /* for CS simplicity */
760ad8b1aafSjsg 		return 0;
761ad8b1aafSjsg 
762ad8b1aafSjsg 	if (CPU_LATENCY >= 0)
763ad8b1aafSjsg 		cpu_latency_qos_add_request(&qos, CPU_LATENCY);
764ad8b1aafSjsg 
765ad8b1aafSjsg 	intel_gt_pm_wait_for_idle(gt);
766ad8b1aafSjsg 	saved_work = rps->work.func;
767ad8b1aafSjsg 	rps->work.func = dummy_rps_work;
768ad8b1aafSjsg 
769ad8b1aafSjsg 	for_each_engine(engine, gt, id) {
770ad8b1aafSjsg 		struct i915_request *rq;
771ad8b1aafSjsg 		struct i915_vma *vma;
772ad8b1aafSjsg 		u32 *cancel, *cntr;
773ad8b1aafSjsg 		struct {
774ad8b1aafSjsg 			u64 count;
775ad8b1aafSjsg 			int freq;
776ad8b1aafSjsg 		} min, max;
777ad8b1aafSjsg 
778ad8b1aafSjsg 		st_engine_heartbeat_disable(engine);
779ad8b1aafSjsg 
780ad8b1aafSjsg 		vma = create_spin_counter(engine,
781ad8b1aafSjsg 					  engine->kernel_context->vm, true,
782ad8b1aafSjsg 					  &cancel, &cntr);
783ad8b1aafSjsg 		if (IS_ERR(vma)) {
784ad8b1aafSjsg 			err = PTR_ERR(vma);
785ad8b1aafSjsg 			st_engine_heartbeat_enable(engine);
786ad8b1aafSjsg 			break;
787ad8b1aafSjsg 		}
788ad8b1aafSjsg 
789ad8b1aafSjsg 		rq = intel_engine_create_kernel_request(engine);
790ad8b1aafSjsg 		if (IS_ERR(rq)) {
791ad8b1aafSjsg 			err = PTR_ERR(rq);
792ad8b1aafSjsg 			goto err_vma;
793ad8b1aafSjsg 		}
794ad8b1aafSjsg 
795ad8b1aafSjsg 		err = i915_vma_move_to_active(vma, rq, 0);
796ad8b1aafSjsg 		if (!err)
797ad8b1aafSjsg 			err = rq->engine->emit_bb_start(rq,
798*f005ef32Sjsg 							i915_vma_offset(vma),
799ad8b1aafSjsg 							PAGE_SIZE, 0);
800ad8b1aafSjsg 		i915_request_add(rq);
801ad8b1aafSjsg 		if (err)
802ad8b1aafSjsg 			goto err_vma;
803ad8b1aafSjsg 
804ad8b1aafSjsg 		if (wait_for(READ_ONCE(*cntr), 10)) {
805ad8b1aafSjsg 			pr_err("%s: timed loop did not start\n",
806ad8b1aafSjsg 			       engine->name);
807ad8b1aafSjsg 			goto err_vma;
808ad8b1aafSjsg 		}
809ad8b1aafSjsg 
810ad8b1aafSjsg 		min.freq = rps->min_freq;
811ad8b1aafSjsg 		min.count = measure_frequency_at(rps, cntr, &min.freq);
812ad8b1aafSjsg 
813ad8b1aafSjsg 		max.freq = rps->max_freq;
814ad8b1aafSjsg 		max.count = measure_frequency_at(rps, cntr, &max.freq);
815ad8b1aafSjsg 
816ad8b1aafSjsg 		pr_info("%s: min:%lluKHz @ %uMHz, max:%lluKHz @ %uMHz [%d%%]\n",
817ad8b1aafSjsg 			engine->name,
818ad8b1aafSjsg 			min.count, intel_gpu_freq(rps, min.freq),
819ad8b1aafSjsg 			max.count, intel_gpu_freq(rps, max.freq),
820ad8b1aafSjsg 			(int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * max.count,
821ad8b1aafSjsg 						     max.freq * min.count));
822ad8b1aafSjsg 
823ad8b1aafSjsg 		if (!scaled_within(max.freq * min.count,
824ad8b1aafSjsg 				   min.freq * max.count,
825ad8b1aafSjsg 				   1, 2)) {
826ad8b1aafSjsg 			int f;
827ad8b1aafSjsg 
828ad8b1aafSjsg 			pr_err("%s: CS did not scale with frequency! scaled min:%llu, max:%llu\n",
829ad8b1aafSjsg 			       engine->name,
830ad8b1aafSjsg 			       max.freq * min.count,
831ad8b1aafSjsg 			       min.freq * max.count);
832ad8b1aafSjsg 			show_pcu_config(rps);
833ad8b1aafSjsg 
834ad8b1aafSjsg 			for (f = min.freq + 1; f <= rps->max_freq; f++) {
835ad8b1aafSjsg 				int act = f;
836ad8b1aafSjsg 				u64 count;
837ad8b1aafSjsg 
838ad8b1aafSjsg 				count = measure_frequency_at(rps, cntr, &act);
839ad8b1aafSjsg 				if (act < f)
840ad8b1aafSjsg 					break;
841ad8b1aafSjsg 
842ad8b1aafSjsg 				pr_info("%s: %x:%uMHz: %lluKHz [%d%%]\n",
843ad8b1aafSjsg 					engine->name,
844ad8b1aafSjsg 					act, intel_gpu_freq(rps, act), count,
845ad8b1aafSjsg 					(int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * count,
846ad8b1aafSjsg 								     act * min.count));
847ad8b1aafSjsg 
848ad8b1aafSjsg 				f = act; /* may skip ahead [pcu granularity] */
849ad8b1aafSjsg 			}
850ad8b1aafSjsg 
851ad8b1aafSjsg 			err = -EINTR; /* ignore error, continue on with test */
852ad8b1aafSjsg 		}
853ad8b1aafSjsg 
854ad8b1aafSjsg err_vma:
855ad8b1aafSjsg 		*cancel = MI_BATCH_BUFFER_END;
856ad8b1aafSjsg 		i915_gem_object_flush_map(vma->obj);
857ad8b1aafSjsg 		i915_gem_object_unpin_map(vma->obj);
858ad8b1aafSjsg 		i915_vma_unpin(vma);
859ad8b1aafSjsg 		i915_vma_unlock(vma);
860ad8b1aafSjsg 		i915_vma_put(vma);
861ad8b1aafSjsg 
862ad8b1aafSjsg 		st_engine_heartbeat_enable(engine);
863ad8b1aafSjsg 		if (igt_flush_test(gt->i915))
864ad8b1aafSjsg 			err = -EIO;
865ad8b1aafSjsg 		if (err)
866ad8b1aafSjsg 			break;
867ad8b1aafSjsg 	}
868ad8b1aafSjsg 
869ad8b1aafSjsg 	intel_gt_pm_wait_for_idle(gt);
870ad8b1aafSjsg 	rps->work.func = saved_work;
871ad8b1aafSjsg 
872ad8b1aafSjsg 	if (CPU_LATENCY >= 0)
873ad8b1aafSjsg 		cpu_latency_qos_remove_request(&qos);
874ad8b1aafSjsg 
875ad8b1aafSjsg 	return err;
876ad8b1aafSjsg }
877ad8b1aafSjsg 
sleep_for_ei(struct intel_rps * rps,int timeout_us)878ad8b1aafSjsg static void sleep_for_ei(struct intel_rps *rps, int timeout_us)
879ad8b1aafSjsg {
880ad8b1aafSjsg 	/* Flush any previous EI */
881ad8b1aafSjsg 	usleep_range(timeout_us, 2 * timeout_us);
882ad8b1aafSjsg 
883ad8b1aafSjsg 	/* Reset the interrupt status */
884ad8b1aafSjsg 	rps_disable_interrupts(rps);
885ad8b1aafSjsg 	GEM_BUG_ON(rps->pm_iir);
886ad8b1aafSjsg 	rps_enable_interrupts(rps);
887ad8b1aafSjsg 
888ad8b1aafSjsg 	/* And then wait for the timeout, for real this time */
889ad8b1aafSjsg 	usleep_range(2 * timeout_us, 3 * timeout_us);
890ad8b1aafSjsg }
891ad8b1aafSjsg 
__rps_up_interrupt(struct intel_rps * rps,struct intel_engine_cs * engine,struct igt_spinner * spin)892ad8b1aafSjsg static int __rps_up_interrupt(struct intel_rps *rps,
893ad8b1aafSjsg 			      struct intel_engine_cs *engine,
894ad8b1aafSjsg 			      struct igt_spinner *spin)
895ad8b1aafSjsg {
896ad8b1aafSjsg 	struct intel_uncore *uncore = engine->uncore;
897ad8b1aafSjsg 	struct i915_request *rq;
898ad8b1aafSjsg 	u32 timeout;
899ad8b1aafSjsg 
900ad8b1aafSjsg 	if (!intel_engine_can_store_dword(engine))
901ad8b1aafSjsg 		return 0;
902ad8b1aafSjsg 
903ad8b1aafSjsg 	rps_set_check(rps, rps->min_freq);
904ad8b1aafSjsg 
905ad8b1aafSjsg 	rq = igt_spinner_create_request(spin, engine->kernel_context, MI_NOOP);
906ad8b1aafSjsg 	if (IS_ERR(rq))
907ad8b1aafSjsg 		return PTR_ERR(rq);
908ad8b1aafSjsg 
909ad8b1aafSjsg 	i915_request_get(rq);
910ad8b1aafSjsg 	i915_request_add(rq);
911ad8b1aafSjsg 
912ad8b1aafSjsg 	if (!igt_wait_for_spinner(spin, rq)) {
913ad8b1aafSjsg 		pr_err("%s: RPS spinner did not start\n",
914ad8b1aafSjsg 		       engine->name);
915ad8b1aafSjsg 		i915_request_put(rq);
916ad8b1aafSjsg 		intel_gt_set_wedged(engine->gt);
917ad8b1aafSjsg 		return -EIO;
918ad8b1aafSjsg 	}
919ad8b1aafSjsg 
920ad8b1aafSjsg 	if (!intel_rps_is_active(rps)) {
921ad8b1aafSjsg 		pr_err("%s: RPS not enabled on starting spinner\n",
922ad8b1aafSjsg 		       engine->name);
923ad8b1aafSjsg 		igt_spinner_end(spin);
924ad8b1aafSjsg 		i915_request_put(rq);
925ad8b1aafSjsg 		return -EINVAL;
926ad8b1aafSjsg 	}
927ad8b1aafSjsg 
928ad8b1aafSjsg 	if (!(rps->pm_events & GEN6_PM_RP_UP_THRESHOLD)) {
929ad8b1aafSjsg 		pr_err("%s: RPS did not register UP interrupt\n",
930ad8b1aafSjsg 		       engine->name);
931ad8b1aafSjsg 		i915_request_put(rq);
932ad8b1aafSjsg 		return -EINVAL;
933ad8b1aafSjsg 	}
934ad8b1aafSjsg 
935ad8b1aafSjsg 	if (rps->last_freq != rps->min_freq) {
936ad8b1aafSjsg 		pr_err("%s: RPS did not program min frequency\n",
937ad8b1aafSjsg 		       engine->name);
938ad8b1aafSjsg 		i915_request_put(rq);
939ad8b1aafSjsg 		return -EINVAL;
940ad8b1aafSjsg 	}
941ad8b1aafSjsg 
942ad8b1aafSjsg 	timeout = intel_uncore_read(uncore, GEN6_RP_UP_EI);
943ad8b1aafSjsg 	timeout = intel_gt_pm_interval_to_ns(engine->gt, timeout);
944ad8b1aafSjsg 	timeout = DIV_ROUND_UP(timeout, 1000);
945ad8b1aafSjsg 
946ad8b1aafSjsg 	sleep_for_ei(rps, timeout);
947ad8b1aafSjsg 	GEM_BUG_ON(i915_request_completed(rq));
948ad8b1aafSjsg 
949ad8b1aafSjsg 	igt_spinner_end(spin);
950ad8b1aafSjsg 	i915_request_put(rq);
951ad8b1aafSjsg 
952ad8b1aafSjsg 	if (rps->cur_freq != rps->min_freq) {
953ad8b1aafSjsg 		pr_err("%s: Frequency unexpectedly changed [up], now %d!\n",
954ad8b1aafSjsg 		       engine->name, intel_rps_read_actual_frequency(rps));
955ad8b1aafSjsg 		return -EINVAL;
956ad8b1aafSjsg 	}
957ad8b1aafSjsg 
958ad8b1aafSjsg 	if (!(rps->pm_iir & GEN6_PM_RP_UP_THRESHOLD)) {
959ad8b1aafSjsg 		pr_err("%s: UP interrupt not recorded for spinner, pm_iir:%x, prev_up:%x, up_threshold:%x, up_ei:%x\n",
960ad8b1aafSjsg 		       engine->name, rps->pm_iir,
961ad8b1aafSjsg 		       intel_uncore_read(uncore, GEN6_RP_PREV_UP),
962ad8b1aafSjsg 		       intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD),
963ad8b1aafSjsg 		       intel_uncore_read(uncore, GEN6_RP_UP_EI));
964ad8b1aafSjsg 		return -EINVAL;
965ad8b1aafSjsg 	}
966ad8b1aafSjsg 
967ad8b1aafSjsg 	return 0;
968ad8b1aafSjsg }
969ad8b1aafSjsg 
__rps_down_interrupt(struct intel_rps * rps,struct intel_engine_cs * engine)970ad8b1aafSjsg static int __rps_down_interrupt(struct intel_rps *rps,
971ad8b1aafSjsg 				struct intel_engine_cs *engine)
972ad8b1aafSjsg {
973ad8b1aafSjsg 	struct intel_uncore *uncore = engine->uncore;
974ad8b1aafSjsg 	u32 timeout;
975ad8b1aafSjsg 
976ad8b1aafSjsg 	rps_set_check(rps, rps->max_freq);
977ad8b1aafSjsg 
978ad8b1aafSjsg 	if (!(rps->pm_events & GEN6_PM_RP_DOWN_THRESHOLD)) {
979ad8b1aafSjsg 		pr_err("%s: RPS did not register DOWN interrupt\n",
980ad8b1aafSjsg 		       engine->name);
981ad8b1aafSjsg 		return -EINVAL;
982ad8b1aafSjsg 	}
983ad8b1aafSjsg 
984ad8b1aafSjsg 	if (rps->last_freq != rps->max_freq) {
985ad8b1aafSjsg 		pr_err("%s: RPS did not program max frequency\n",
986ad8b1aafSjsg 		       engine->name);
987ad8b1aafSjsg 		return -EINVAL;
988ad8b1aafSjsg 	}
989ad8b1aafSjsg 
990ad8b1aafSjsg 	timeout = intel_uncore_read(uncore, GEN6_RP_DOWN_EI);
991ad8b1aafSjsg 	timeout = intel_gt_pm_interval_to_ns(engine->gt, timeout);
992ad8b1aafSjsg 	timeout = DIV_ROUND_UP(timeout, 1000);
993ad8b1aafSjsg 
994ad8b1aafSjsg 	sleep_for_ei(rps, timeout);
995ad8b1aafSjsg 
996ad8b1aafSjsg 	if (rps->cur_freq != rps->max_freq) {
997ad8b1aafSjsg 		pr_err("%s: Frequency unexpectedly changed [down], now %d!\n",
998ad8b1aafSjsg 		       engine->name,
999ad8b1aafSjsg 		       intel_rps_read_actual_frequency(rps));
1000ad8b1aafSjsg 		return -EINVAL;
1001ad8b1aafSjsg 	}
1002ad8b1aafSjsg 
1003ad8b1aafSjsg 	if (!(rps->pm_iir & (GEN6_PM_RP_DOWN_THRESHOLD | GEN6_PM_RP_DOWN_TIMEOUT))) {
1004ad8b1aafSjsg 		pr_err("%s: DOWN interrupt not recorded for idle, pm_iir:%x, prev_down:%x, down_threshold:%x, down_ei:%x [prev_up:%x, up_threshold:%x, up_ei:%x]\n",
1005ad8b1aafSjsg 		       engine->name, rps->pm_iir,
1006ad8b1aafSjsg 		       intel_uncore_read(uncore, GEN6_RP_PREV_DOWN),
1007ad8b1aafSjsg 		       intel_uncore_read(uncore, GEN6_RP_DOWN_THRESHOLD),
1008ad8b1aafSjsg 		       intel_uncore_read(uncore, GEN6_RP_DOWN_EI),
1009ad8b1aafSjsg 		       intel_uncore_read(uncore, GEN6_RP_PREV_UP),
1010ad8b1aafSjsg 		       intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD),
1011ad8b1aafSjsg 		       intel_uncore_read(uncore, GEN6_RP_UP_EI));
1012ad8b1aafSjsg 		return -EINVAL;
1013ad8b1aafSjsg 	}
1014ad8b1aafSjsg 
1015ad8b1aafSjsg 	return 0;
1016ad8b1aafSjsg }
1017ad8b1aafSjsg 
live_rps_interrupt(void * arg)1018ad8b1aafSjsg int live_rps_interrupt(void *arg)
1019ad8b1aafSjsg {
1020ad8b1aafSjsg 	struct intel_gt *gt = arg;
1021ad8b1aafSjsg 	struct intel_rps *rps = &gt->rps;
1022ad8b1aafSjsg 	void (*saved_work)(struct work_struct *wrk);
1023ad8b1aafSjsg 	struct intel_engine_cs *engine;
1024ad8b1aafSjsg 	enum intel_engine_id id;
1025ad8b1aafSjsg 	struct igt_spinner spin;
1026ad8b1aafSjsg 	u32 pm_events;
1027ad8b1aafSjsg 	int err = 0;
1028ad8b1aafSjsg 
1029ad8b1aafSjsg 	/*
1030ad8b1aafSjsg 	 * First, let's check whether or not we are receiving interrupts.
1031ad8b1aafSjsg 	 */
1032ad8b1aafSjsg 
10335ca02815Sjsg 	if (!intel_rps_has_interrupts(rps) || GRAPHICS_VER(gt->i915) < 6)
1034ad8b1aafSjsg 		return 0;
1035ad8b1aafSjsg 
1036ad8b1aafSjsg 	intel_gt_pm_get(gt);
1037ad8b1aafSjsg 	pm_events = rps->pm_events;
1038ad8b1aafSjsg 	intel_gt_pm_put(gt);
1039ad8b1aafSjsg 	if (!pm_events) {
1040ad8b1aafSjsg 		pr_err("No RPS PM events registered, but RPS is enabled?\n");
1041ad8b1aafSjsg 		return -ENODEV;
1042ad8b1aafSjsg 	}
1043ad8b1aafSjsg 
1044ad8b1aafSjsg 	if (igt_spinner_init(&spin, gt))
1045ad8b1aafSjsg 		return -ENOMEM;
1046ad8b1aafSjsg 
1047ad8b1aafSjsg 	intel_gt_pm_wait_for_idle(gt);
1048ad8b1aafSjsg 	saved_work = rps->work.func;
1049ad8b1aafSjsg 	rps->work.func = dummy_rps_work;
1050ad8b1aafSjsg 
1051ad8b1aafSjsg 	for_each_engine(engine, gt, id) {
1052ad8b1aafSjsg 		/* Keep the engine busy with a spinner; expect an UP! */
1053ad8b1aafSjsg 		if (pm_events & GEN6_PM_RP_UP_THRESHOLD) {
1054ad8b1aafSjsg 			intel_gt_pm_wait_for_idle(engine->gt);
1055ad8b1aafSjsg 			GEM_BUG_ON(intel_rps_is_active(rps));
1056ad8b1aafSjsg 
1057ad8b1aafSjsg 			st_engine_heartbeat_disable(engine);
1058ad8b1aafSjsg 
1059ad8b1aafSjsg 			err = __rps_up_interrupt(rps, engine, &spin);
1060ad8b1aafSjsg 
1061ad8b1aafSjsg 			st_engine_heartbeat_enable(engine);
1062ad8b1aafSjsg 			if (err)
1063ad8b1aafSjsg 				goto out;
1064ad8b1aafSjsg 
1065ad8b1aafSjsg 			intel_gt_pm_wait_for_idle(engine->gt);
1066ad8b1aafSjsg 		}
1067ad8b1aafSjsg 
1068ad8b1aafSjsg 		/* Keep the engine awake but idle and check for DOWN */
1069ad8b1aafSjsg 		if (pm_events & GEN6_PM_RP_DOWN_THRESHOLD) {
1070ad8b1aafSjsg 			st_engine_heartbeat_disable(engine);
1071ad8b1aafSjsg 			intel_rc6_disable(&gt->rc6);
1072ad8b1aafSjsg 
1073ad8b1aafSjsg 			err = __rps_down_interrupt(rps, engine);
1074ad8b1aafSjsg 
1075ad8b1aafSjsg 			intel_rc6_enable(&gt->rc6);
1076ad8b1aafSjsg 			st_engine_heartbeat_enable(engine);
1077ad8b1aafSjsg 			if (err)
1078ad8b1aafSjsg 				goto out;
1079ad8b1aafSjsg 		}
1080ad8b1aafSjsg 	}
1081ad8b1aafSjsg 
1082ad8b1aafSjsg out:
1083ad8b1aafSjsg 	if (igt_flush_test(gt->i915))
1084ad8b1aafSjsg 		err = -EIO;
1085ad8b1aafSjsg 
1086ad8b1aafSjsg 	igt_spinner_fini(&spin);
1087ad8b1aafSjsg 
1088ad8b1aafSjsg 	intel_gt_pm_wait_for_idle(gt);
1089ad8b1aafSjsg 	rps->work.func = saved_work;
1090ad8b1aafSjsg 
1091ad8b1aafSjsg 	return err;
1092ad8b1aafSjsg }
1093ad8b1aafSjsg 
__measure_power(int duration_ms)1094ad8b1aafSjsg static u64 __measure_power(int duration_ms)
1095ad8b1aafSjsg {
1096ad8b1aafSjsg 	u64 dE, dt;
1097ad8b1aafSjsg 
1098ad8b1aafSjsg 	dE = librapl_energy_uJ();
1099*f005ef32Sjsg 	dt = ktime_get();
1100ad8b1aafSjsg 	usleep_range(1000 * duration_ms, 2000 * duration_ms);
1101ad8b1aafSjsg 	dE = librapl_energy_uJ() - dE;
1102ad8b1aafSjsg 	dt = ktime_get() - dt;
1103ad8b1aafSjsg 
1104ad8b1aafSjsg 	return div64_u64(1000 * 1000 * dE, dt);
1105ad8b1aafSjsg }
1106ad8b1aafSjsg 
measure_power(struct intel_rps * rps,int * freq)1107*f005ef32Sjsg static u64 measure_power(struct intel_rps *rps, int *freq)
1108ad8b1aafSjsg {
1109ad8b1aafSjsg 	u64 x[5];
1110ad8b1aafSjsg 	int i;
1111ad8b1aafSjsg 
1112ad8b1aafSjsg 	for (i = 0; i < 5; i++)
1113ad8b1aafSjsg 		x[i] = __measure_power(5);
1114*f005ef32Sjsg 
1115*f005ef32Sjsg 	*freq = (*freq + intel_rps_read_actual_frequency(rps)) / 2;
1116ad8b1aafSjsg 
1117ad8b1aafSjsg 	/* A simple triangle filter for better result stability */
1118ad8b1aafSjsg 	sort(x, 5, sizeof(*x), cmp_u64, NULL);
1119ad8b1aafSjsg 	return div_u64(x[1] + 2 * x[2] + x[3], 4);
1120ad8b1aafSjsg }
1121ad8b1aafSjsg 
measure_power_at(struct intel_rps * rps,int * freq)1122*f005ef32Sjsg static u64 measure_power_at(struct intel_rps *rps, int *freq)
1123*f005ef32Sjsg {
1124*f005ef32Sjsg 	*freq = rps_set_check(rps, *freq);
1125*f005ef32Sjsg 	return measure_power(rps, freq);
1126*f005ef32Sjsg }
1127*f005ef32Sjsg 
live_rps_power(void * arg)1128ad8b1aafSjsg int live_rps_power(void *arg)
1129ad8b1aafSjsg {
1130ad8b1aafSjsg 	struct intel_gt *gt = arg;
1131ad8b1aafSjsg 	struct intel_rps *rps = &gt->rps;
1132ad8b1aafSjsg 	void (*saved_work)(struct work_struct *wrk);
1133ad8b1aafSjsg 	struct intel_engine_cs *engine;
1134ad8b1aafSjsg 	enum intel_engine_id id;
1135ad8b1aafSjsg 	struct igt_spinner spin;
1136ad8b1aafSjsg 	int err = 0;
1137ad8b1aafSjsg 
1138ad8b1aafSjsg 	/*
1139ad8b1aafSjsg 	 * Our fundamental assumption is that running at lower frequency
1140ad8b1aafSjsg 	 * actually saves power. Let's see if our RAPL measurement support
1141ad8b1aafSjsg 	 * that theory.
1142ad8b1aafSjsg 	 */
1143ad8b1aafSjsg 
11445ca02815Sjsg 	if (!intel_rps_is_enabled(rps) || GRAPHICS_VER(gt->i915) < 6)
1145ad8b1aafSjsg 		return 0;
1146ad8b1aafSjsg 
11475ca02815Sjsg 	if (!librapl_supported(gt->i915))
1148ad8b1aafSjsg 		return 0;
1149ad8b1aafSjsg 
1150ad8b1aafSjsg 	if (igt_spinner_init(&spin, gt))
1151ad8b1aafSjsg 		return -ENOMEM;
1152ad8b1aafSjsg 
1153ad8b1aafSjsg 	intel_gt_pm_wait_for_idle(gt);
1154ad8b1aafSjsg 	saved_work = rps->work.func;
1155ad8b1aafSjsg 	rps->work.func = dummy_rps_work;
1156ad8b1aafSjsg 
1157ad8b1aafSjsg 	for_each_engine(engine, gt, id) {
1158ad8b1aafSjsg 		struct i915_request *rq;
1159ad8b1aafSjsg 		struct {
1160ad8b1aafSjsg 			u64 power;
1161ad8b1aafSjsg 			int freq;
1162ad8b1aafSjsg 		} min, max;
1163ad8b1aafSjsg 
1164ad8b1aafSjsg 		if (!intel_engine_can_store_dword(engine))
1165ad8b1aafSjsg 			continue;
1166ad8b1aafSjsg 
1167ad8b1aafSjsg 		st_engine_heartbeat_disable(engine);
1168ad8b1aafSjsg 
1169ad8b1aafSjsg 		rq = igt_spinner_create_request(&spin,
1170ad8b1aafSjsg 						engine->kernel_context,
1171ad8b1aafSjsg 						MI_NOOP);
1172ad8b1aafSjsg 		if (IS_ERR(rq)) {
1173ad8b1aafSjsg 			st_engine_heartbeat_enable(engine);
1174ad8b1aafSjsg 			err = PTR_ERR(rq);
1175ad8b1aafSjsg 			break;
1176ad8b1aafSjsg 		}
1177ad8b1aafSjsg 
1178ad8b1aafSjsg 		i915_request_add(rq);
1179ad8b1aafSjsg 
1180ad8b1aafSjsg 		if (!igt_wait_for_spinner(&spin, rq)) {
1181ad8b1aafSjsg 			pr_err("%s: RPS spinner did not start\n",
1182ad8b1aafSjsg 			       engine->name);
1183ad8b1aafSjsg 			igt_spinner_end(&spin);
1184ad8b1aafSjsg 			st_engine_heartbeat_enable(engine);
1185ad8b1aafSjsg 			intel_gt_set_wedged(engine->gt);
1186ad8b1aafSjsg 			err = -EIO;
1187ad8b1aafSjsg 			break;
1188ad8b1aafSjsg 		}
1189ad8b1aafSjsg 
1190ad8b1aafSjsg 		max.freq = rps->max_freq;
1191ad8b1aafSjsg 		max.power = measure_power_at(rps, &max.freq);
1192ad8b1aafSjsg 
1193ad8b1aafSjsg 		min.freq = rps->min_freq;
1194ad8b1aafSjsg 		min.power = measure_power_at(rps, &min.freq);
1195ad8b1aafSjsg 
1196ad8b1aafSjsg 		igt_spinner_end(&spin);
1197ad8b1aafSjsg 		st_engine_heartbeat_enable(engine);
1198ad8b1aafSjsg 
1199ad8b1aafSjsg 		pr_info("%s: min:%llumW @ %uMHz, max:%llumW @ %uMHz\n",
1200ad8b1aafSjsg 			engine->name,
1201ad8b1aafSjsg 			min.power, intel_gpu_freq(rps, min.freq),
1202ad8b1aafSjsg 			max.power, intel_gpu_freq(rps, max.freq));
1203ad8b1aafSjsg 
1204ad8b1aafSjsg 		if (10 * min.freq >= 9 * max.freq) {
1205ad8b1aafSjsg 			pr_notice("Could not control frequency, ran at [%d:%uMHz, %d:%uMhz]\n",
1206ad8b1aafSjsg 				  min.freq, intel_gpu_freq(rps, min.freq),
1207ad8b1aafSjsg 				  max.freq, intel_gpu_freq(rps, max.freq));
1208ad8b1aafSjsg 			continue;
1209ad8b1aafSjsg 		}
1210ad8b1aafSjsg 
1211ad8b1aafSjsg 		if (11 * min.power > 10 * max.power) {
1212ad8b1aafSjsg 			pr_err("%s: did not conserve power when setting lower frequency!\n",
1213ad8b1aafSjsg 			       engine->name);
1214ad8b1aafSjsg 			err = -EINVAL;
1215ad8b1aafSjsg 			break;
1216ad8b1aafSjsg 		}
1217ad8b1aafSjsg 
1218ad8b1aafSjsg 		if (igt_flush_test(gt->i915)) {
1219ad8b1aafSjsg 			err = -EIO;
1220ad8b1aafSjsg 			break;
1221ad8b1aafSjsg 		}
1222ad8b1aafSjsg 	}
1223ad8b1aafSjsg 
1224ad8b1aafSjsg 	igt_spinner_fini(&spin);
1225ad8b1aafSjsg 
1226ad8b1aafSjsg 	intel_gt_pm_wait_for_idle(gt);
1227ad8b1aafSjsg 	rps->work.func = saved_work;
1228ad8b1aafSjsg 
1229ad8b1aafSjsg 	return err;
1230ad8b1aafSjsg }
1231ad8b1aafSjsg 
live_rps_dynamic(void * arg)1232ad8b1aafSjsg int live_rps_dynamic(void *arg)
1233ad8b1aafSjsg {
1234ad8b1aafSjsg 	struct intel_gt *gt = arg;
1235ad8b1aafSjsg 	struct intel_rps *rps = &gt->rps;
1236ad8b1aafSjsg 	struct intel_engine_cs *engine;
1237ad8b1aafSjsg 	enum intel_engine_id id;
1238ad8b1aafSjsg 	struct igt_spinner spin;
1239ad8b1aafSjsg 	int err = 0;
1240ad8b1aafSjsg 
1241ad8b1aafSjsg 	/*
1242ad8b1aafSjsg 	 * We've looked at the bascs, and have established that we
1243ad8b1aafSjsg 	 * can change the clock frequency and that the HW will generate
1244ad8b1aafSjsg 	 * interrupts based on load. Now we check how we integrate those
1245ad8b1aafSjsg 	 * moving parts into dynamic reclocking based on load.
1246ad8b1aafSjsg 	 */
1247ad8b1aafSjsg 
12485ca02815Sjsg 	if (!intel_rps_is_enabled(rps) || GRAPHICS_VER(gt->i915) < 6)
1249ad8b1aafSjsg 		return 0;
1250ad8b1aafSjsg 
1251ad8b1aafSjsg 	if (igt_spinner_init(&spin, gt))
1252ad8b1aafSjsg 		return -ENOMEM;
1253ad8b1aafSjsg 
1254ad8b1aafSjsg 	if (intel_rps_has_interrupts(rps))
1255ad8b1aafSjsg 		pr_info("RPS has interrupt support\n");
1256ad8b1aafSjsg 	if (intel_rps_uses_timer(rps))
1257ad8b1aafSjsg 		pr_info("RPS has timer support\n");
1258ad8b1aafSjsg 
1259ad8b1aafSjsg 	for_each_engine(engine, gt, id) {
1260ad8b1aafSjsg 		struct i915_request *rq;
1261ad8b1aafSjsg 		struct {
1262ad8b1aafSjsg 			ktime_t dt;
1263ad8b1aafSjsg 			u8 freq;
1264ad8b1aafSjsg 		} min, max;
1265ad8b1aafSjsg 
1266ad8b1aafSjsg 		if (!intel_engine_can_store_dword(engine))
1267ad8b1aafSjsg 			continue;
1268ad8b1aafSjsg 
1269ad8b1aafSjsg 		intel_gt_pm_wait_for_idle(gt);
1270ad8b1aafSjsg 		GEM_BUG_ON(intel_rps_is_active(rps));
1271ad8b1aafSjsg 		rps->cur_freq = rps->min_freq;
1272ad8b1aafSjsg 
1273ad8b1aafSjsg 		intel_engine_pm_get(engine);
1274ad8b1aafSjsg 		intel_rc6_disable(&gt->rc6);
1275ad8b1aafSjsg 		GEM_BUG_ON(rps->last_freq != rps->min_freq);
1276ad8b1aafSjsg 
1277ad8b1aafSjsg 		rq = igt_spinner_create_request(&spin,
1278ad8b1aafSjsg 						engine->kernel_context,
1279ad8b1aafSjsg 						MI_NOOP);
1280ad8b1aafSjsg 		if (IS_ERR(rq)) {
1281ad8b1aafSjsg 			err = PTR_ERR(rq);
1282ad8b1aafSjsg 			goto err;
1283ad8b1aafSjsg 		}
1284ad8b1aafSjsg 
1285ad8b1aafSjsg 		i915_request_add(rq);
1286ad8b1aafSjsg 
1287ad8b1aafSjsg 		max.dt = ktime_get();
1288ad8b1aafSjsg 		max.freq = wait_for_freq(rps, rps->max_freq, 500);
1289ad8b1aafSjsg 		max.dt = ktime_sub(ktime_get(), max.dt);
1290ad8b1aafSjsg 
1291ad8b1aafSjsg 		igt_spinner_end(&spin);
1292ad8b1aafSjsg 
1293ad8b1aafSjsg 		min.dt = ktime_get();
1294ad8b1aafSjsg 		min.freq = wait_for_freq(rps, rps->min_freq, 2000);
1295ad8b1aafSjsg 		min.dt = ktime_sub(ktime_get(), min.dt);
1296ad8b1aafSjsg 
1297ad8b1aafSjsg 		pr_info("%s: dynamically reclocked to %u:%uMHz while busy in %lluns, and %u:%uMHz while idle in %lluns\n",
1298ad8b1aafSjsg 			engine->name,
1299ad8b1aafSjsg 			max.freq, intel_gpu_freq(rps, max.freq),
1300ad8b1aafSjsg 			ktime_to_ns(max.dt),
1301ad8b1aafSjsg 			min.freq, intel_gpu_freq(rps, min.freq),
1302ad8b1aafSjsg 			ktime_to_ns(min.dt));
1303ad8b1aafSjsg 		if (min.freq >= max.freq) {
1304ad8b1aafSjsg 			pr_err("%s: dynamic reclocking of spinner failed\n!",
1305ad8b1aafSjsg 			       engine->name);
1306ad8b1aafSjsg 			err = -EINVAL;
1307ad8b1aafSjsg 		}
1308ad8b1aafSjsg 
1309ad8b1aafSjsg err:
1310ad8b1aafSjsg 		intel_rc6_enable(&gt->rc6);
1311ad8b1aafSjsg 		intel_engine_pm_put(engine);
1312ad8b1aafSjsg 
1313ad8b1aafSjsg 		if (igt_flush_test(gt->i915))
1314ad8b1aafSjsg 			err = -EIO;
1315ad8b1aafSjsg 		if (err)
1316ad8b1aafSjsg 			break;
1317ad8b1aafSjsg 	}
1318ad8b1aafSjsg 
1319ad8b1aafSjsg 	igt_spinner_fini(&spin);
1320ad8b1aafSjsg 
1321ad8b1aafSjsg 	return err;
1322ad8b1aafSjsg }
1323