1ad8b1aafSjsg // SPDX-License-Identifier: MIT
2ad8b1aafSjsg /*
3ad8b1aafSjsg * Copyright © 2020 Intel Corporation
4ad8b1aafSjsg */
5ad8b1aafSjsg
6ad8b1aafSjsg #include <linux/pm_qos.h>
7ad8b1aafSjsg #include <linux/sort.h>
8ad8b1aafSjsg
91bb76ff1Sjsg #include "gem/i915_gem_internal.h"
101bb76ff1Sjsg
11*f005ef32Sjsg #include "i915_reg.h"
12ad8b1aafSjsg #include "intel_engine_heartbeat.h"
13ad8b1aafSjsg #include "intel_engine_pm.h"
141bb76ff1Sjsg #include "intel_engine_regs.h"
15ad8b1aafSjsg #include "intel_gpu_commands.h"
16ad8b1aafSjsg #include "intel_gt_clock_utils.h"
17ad8b1aafSjsg #include "intel_gt_pm.h"
18ad8b1aafSjsg #include "intel_rc6.h"
19ad8b1aafSjsg #include "selftest_engine_heartbeat.h"
20ad8b1aafSjsg #include "selftest_rps.h"
21ad8b1aafSjsg #include "selftests/igt_flush_test.h"
22ad8b1aafSjsg #include "selftests/igt_spinner.h"
23ad8b1aafSjsg #include "selftests/librapl.h"
24ad8b1aafSjsg
25ad8b1aafSjsg /* Try to isolate the impact of cstates from determing frequency response */
26ad8b1aafSjsg #define CPU_LATENCY 0 /* -1 to disable pm_qos, 0 to disable cstates */
27ad8b1aafSjsg
dummy_rps_work(struct work_struct * wrk)28ad8b1aafSjsg static void dummy_rps_work(struct work_struct *wrk)
29ad8b1aafSjsg {
30ad8b1aafSjsg }
31ad8b1aafSjsg
cmp_u64(const void * A,const void * B)32ad8b1aafSjsg static int cmp_u64(const void *A, const void *B)
33ad8b1aafSjsg {
34ad8b1aafSjsg const u64 *a = A, *b = B;
35ad8b1aafSjsg
36ad8b1aafSjsg if (*a < *b)
37ad8b1aafSjsg return -1;
38ad8b1aafSjsg else if (*a > *b)
39ad8b1aafSjsg return 1;
40ad8b1aafSjsg else
41ad8b1aafSjsg return 0;
42ad8b1aafSjsg }
43ad8b1aafSjsg
cmp_u32(const void * A,const void * B)44ad8b1aafSjsg static int cmp_u32(const void *A, const void *B)
45ad8b1aafSjsg {
46ad8b1aafSjsg const u32 *a = A, *b = B;
47ad8b1aafSjsg
48ad8b1aafSjsg if (*a < *b)
49ad8b1aafSjsg return -1;
50ad8b1aafSjsg else if (*a > *b)
51ad8b1aafSjsg return 1;
52ad8b1aafSjsg else
53ad8b1aafSjsg return 0;
54ad8b1aafSjsg }
55ad8b1aafSjsg
56ad8b1aafSjsg static struct i915_vma *
create_spin_counter(struct intel_engine_cs * engine,struct i915_address_space * vm,bool srm,u32 ** cancel,u32 ** counter)57ad8b1aafSjsg create_spin_counter(struct intel_engine_cs *engine,
58ad8b1aafSjsg struct i915_address_space *vm,
59ad8b1aafSjsg bool srm,
60ad8b1aafSjsg u32 **cancel,
61ad8b1aafSjsg u32 **counter)
62ad8b1aafSjsg {
63ad8b1aafSjsg enum {
64ad8b1aafSjsg COUNT,
65ad8b1aafSjsg INC,
66ad8b1aafSjsg __NGPR__,
67ad8b1aafSjsg };
68ad8b1aafSjsg #define CS_GPR(x) GEN8_RING_CS_GPR(engine->mmio_base, x)
69ad8b1aafSjsg struct drm_i915_gem_object *obj;
70ad8b1aafSjsg struct i915_vma *vma;
71ad8b1aafSjsg unsigned long end;
72ad8b1aafSjsg u32 *base, *cs;
73ad8b1aafSjsg int loop, i;
74ad8b1aafSjsg int err;
75ad8b1aafSjsg
76ad8b1aafSjsg obj = i915_gem_object_create_internal(vm->i915, 64 << 10);
77ad8b1aafSjsg if (IS_ERR(obj))
78ad8b1aafSjsg return ERR_CAST(obj);
79ad8b1aafSjsg
80ad8b1aafSjsg end = obj->base.size / sizeof(u32) - 1;
81ad8b1aafSjsg
82ad8b1aafSjsg vma = i915_vma_instance(obj, vm, NULL);
83ad8b1aafSjsg if (IS_ERR(vma)) {
84ad8b1aafSjsg err = PTR_ERR(vma);
85ad8b1aafSjsg goto err_put;
86ad8b1aafSjsg }
87ad8b1aafSjsg
88ad8b1aafSjsg err = i915_vma_pin(vma, 0, 0, PIN_USER);
89ad8b1aafSjsg if (err)
90ad8b1aafSjsg goto err_unlock;
91ad8b1aafSjsg
92ad8b1aafSjsg i915_vma_lock(vma);
93ad8b1aafSjsg
94ad8b1aafSjsg base = i915_gem_object_pin_map(obj, I915_MAP_WC);
95ad8b1aafSjsg if (IS_ERR(base)) {
96ad8b1aafSjsg err = PTR_ERR(base);
97ad8b1aafSjsg goto err_unpin;
98ad8b1aafSjsg }
99ad8b1aafSjsg cs = base;
100ad8b1aafSjsg
101ad8b1aafSjsg *cs++ = MI_LOAD_REGISTER_IMM(__NGPR__ * 2);
102ad8b1aafSjsg for (i = 0; i < __NGPR__; i++) {
103ad8b1aafSjsg *cs++ = i915_mmio_reg_offset(CS_GPR(i));
104ad8b1aafSjsg *cs++ = 0;
105ad8b1aafSjsg *cs++ = i915_mmio_reg_offset(CS_GPR(i)) + 4;
106ad8b1aafSjsg *cs++ = 0;
107ad8b1aafSjsg }
108ad8b1aafSjsg
109ad8b1aafSjsg *cs++ = MI_LOAD_REGISTER_IMM(1);
110ad8b1aafSjsg *cs++ = i915_mmio_reg_offset(CS_GPR(INC));
111ad8b1aafSjsg *cs++ = 1;
112ad8b1aafSjsg
113ad8b1aafSjsg loop = cs - base;
114ad8b1aafSjsg
115ad8b1aafSjsg /* Unroll the loop to avoid MI_BB_START stalls impacting measurements */
116ad8b1aafSjsg for (i = 0; i < 1024; i++) {
117ad8b1aafSjsg *cs++ = MI_MATH(4);
118ad8b1aafSjsg *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(COUNT));
119ad8b1aafSjsg *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(INC));
120ad8b1aafSjsg *cs++ = MI_MATH_ADD;
121ad8b1aafSjsg *cs++ = MI_MATH_STORE(MI_MATH_REG(COUNT), MI_MATH_REG_ACCU);
122ad8b1aafSjsg
123ad8b1aafSjsg if (srm) {
124ad8b1aafSjsg *cs++ = MI_STORE_REGISTER_MEM_GEN8;
125ad8b1aafSjsg *cs++ = i915_mmio_reg_offset(CS_GPR(COUNT));
126*f005ef32Sjsg *cs++ = lower_32_bits(i915_vma_offset(vma) + end * sizeof(*cs));
127*f005ef32Sjsg *cs++ = upper_32_bits(i915_vma_offset(vma) + end * sizeof(*cs));
128ad8b1aafSjsg }
129ad8b1aafSjsg }
130ad8b1aafSjsg
131ad8b1aafSjsg *cs++ = MI_BATCH_BUFFER_START_GEN8;
132*f005ef32Sjsg *cs++ = lower_32_bits(i915_vma_offset(vma) + loop * sizeof(*cs));
133*f005ef32Sjsg *cs++ = upper_32_bits(i915_vma_offset(vma) + loop * sizeof(*cs));
134ad8b1aafSjsg GEM_BUG_ON(cs - base > end);
135ad8b1aafSjsg
136ad8b1aafSjsg i915_gem_object_flush_map(obj);
137ad8b1aafSjsg
138ad8b1aafSjsg *cancel = base + loop;
139ad8b1aafSjsg *counter = srm ? memset32(base + end, 0, 1) : NULL;
140ad8b1aafSjsg return vma;
141ad8b1aafSjsg
142ad8b1aafSjsg err_unpin:
143ad8b1aafSjsg i915_vma_unpin(vma);
144ad8b1aafSjsg err_unlock:
145ad8b1aafSjsg i915_vma_unlock(vma);
146ad8b1aafSjsg err_put:
147ad8b1aafSjsg i915_gem_object_put(obj);
148ad8b1aafSjsg return ERR_PTR(err);
149ad8b1aafSjsg }
150ad8b1aafSjsg
wait_for_freq(struct intel_rps * rps,u8 freq,int timeout_ms)151ad8b1aafSjsg static u8 wait_for_freq(struct intel_rps *rps, u8 freq, int timeout_ms)
152ad8b1aafSjsg {
153ad8b1aafSjsg u8 history[64], i;
154ad8b1aafSjsg unsigned long end;
155ad8b1aafSjsg int sleep;
156ad8b1aafSjsg
157ad8b1aafSjsg i = 0;
158ad8b1aafSjsg memset(history, freq, sizeof(history));
159ad8b1aafSjsg sleep = 20;
160ad8b1aafSjsg
161ad8b1aafSjsg /* The PCU does not change instantly, but drifts towards the goal? */
162ad8b1aafSjsg end = jiffies + msecs_to_jiffies(timeout_ms);
163ad8b1aafSjsg do {
164ad8b1aafSjsg u8 act;
165ad8b1aafSjsg
166ad8b1aafSjsg act = read_cagf(rps);
167ad8b1aafSjsg if (time_after(jiffies, end))
168ad8b1aafSjsg return act;
169ad8b1aafSjsg
170ad8b1aafSjsg /* Target acquired */
171ad8b1aafSjsg if (act == freq)
172ad8b1aafSjsg return act;
173ad8b1aafSjsg
174ad8b1aafSjsg /* Any change within the last N samples? */
175ad8b1aafSjsg if (!memchr_inv(history, act, sizeof(history)))
176ad8b1aafSjsg return act;
177ad8b1aafSjsg
178ad8b1aafSjsg history[i] = act;
179ad8b1aafSjsg i = (i + 1) % ARRAY_SIZE(history);
180ad8b1aafSjsg
181ad8b1aafSjsg usleep_range(sleep, 2 * sleep);
182ad8b1aafSjsg sleep *= 2;
183ad8b1aafSjsg if (sleep > timeout_ms * 20)
184ad8b1aafSjsg sleep = timeout_ms * 20;
185ad8b1aafSjsg } while (1);
186ad8b1aafSjsg }
187ad8b1aafSjsg
rps_set_check(struct intel_rps * rps,u8 freq)188ad8b1aafSjsg static u8 rps_set_check(struct intel_rps *rps, u8 freq)
189ad8b1aafSjsg {
190ad8b1aafSjsg mutex_lock(&rps->lock);
191ad8b1aafSjsg GEM_BUG_ON(!intel_rps_is_active(rps));
1925ca02815Sjsg if (wait_for(!intel_rps_set(rps, freq), 50)) {
1935ca02815Sjsg mutex_unlock(&rps->lock);
1945ca02815Sjsg return 0;
1955ca02815Sjsg }
196ad8b1aafSjsg GEM_BUG_ON(rps->last_freq != freq);
197ad8b1aafSjsg mutex_unlock(&rps->lock);
198ad8b1aafSjsg
199ad8b1aafSjsg return wait_for_freq(rps, freq, 50);
200ad8b1aafSjsg }
201ad8b1aafSjsg
show_pstate_limits(struct intel_rps * rps)202ad8b1aafSjsg static void show_pstate_limits(struct intel_rps *rps)
203ad8b1aafSjsg {
204ad8b1aafSjsg struct drm_i915_private *i915 = rps_to_i915(rps);
205ad8b1aafSjsg
206ad8b1aafSjsg if (IS_BROXTON(i915)) {
207ad8b1aafSjsg pr_info("P_STATE_CAP[%x]: 0x%08x\n",
208ad8b1aafSjsg i915_mmio_reg_offset(BXT_RP_STATE_CAP),
209ad8b1aafSjsg intel_uncore_read(rps_to_uncore(rps),
210ad8b1aafSjsg BXT_RP_STATE_CAP));
2115ca02815Sjsg } else if (GRAPHICS_VER(i915) == 9) {
212ad8b1aafSjsg pr_info("P_STATE_LIMITS[%x]: 0x%08x\n",
213ad8b1aafSjsg i915_mmio_reg_offset(GEN9_RP_STATE_LIMITS),
214ad8b1aafSjsg intel_uncore_read(rps_to_uncore(rps),
215ad8b1aafSjsg GEN9_RP_STATE_LIMITS));
216ad8b1aafSjsg }
217ad8b1aafSjsg }
218ad8b1aafSjsg
live_rps_clock_interval(void * arg)219ad8b1aafSjsg int live_rps_clock_interval(void *arg)
220ad8b1aafSjsg {
221ad8b1aafSjsg struct intel_gt *gt = arg;
222ad8b1aafSjsg struct intel_rps *rps = >->rps;
223ad8b1aafSjsg void (*saved_work)(struct work_struct *wrk);
224ad8b1aafSjsg struct intel_engine_cs *engine;
225ad8b1aafSjsg enum intel_engine_id id;
226ad8b1aafSjsg struct igt_spinner spin;
227ad8b1aafSjsg int err = 0;
228ad8b1aafSjsg
2295ca02815Sjsg if (!intel_rps_is_enabled(rps) || GRAPHICS_VER(gt->i915) < 6)
230ad8b1aafSjsg return 0;
231ad8b1aafSjsg
232ad8b1aafSjsg if (igt_spinner_init(&spin, gt))
233ad8b1aafSjsg return -ENOMEM;
234ad8b1aafSjsg
235ad8b1aafSjsg intel_gt_pm_wait_for_idle(gt);
236ad8b1aafSjsg saved_work = rps->work.func;
237ad8b1aafSjsg rps->work.func = dummy_rps_work;
238ad8b1aafSjsg
239ad8b1aafSjsg intel_gt_pm_get(gt);
240ad8b1aafSjsg intel_rps_disable(>->rps);
241ad8b1aafSjsg
242ad8b1aafSjsg intel_gt_check_clock_frequency(gt);
243ad8b1aafSjsg
244ad8b1aafSjsg for_each_engine(engine, gt, id) {
245ad8b1aafSjsg struct i915_request *rq;
246ad8b1aafSjsg u32 cycles;
247ad8b1aafSjsg u64 dt;
248ad8b1aafSjsg
249ad8b1aafSjsg if (!intel_engine_can_store_dword(engine))
250ad8b1aafSjsg continue;
251ad8b1aafSjsg
252ad8b1aafSjsg st_engine_heartbeat_disable(engine);
253ad8b1aafSjsg
254ad8b1aafSjsg rq = igt_spinner_create_request(&spin,
255ad8b1aafSjsg engine->kernel_context,
256ad8b1aafSjsg MI_NOOP);
257ad8b1aafSjsg if (IS_ERR(rq)) {
258ad8b1aafSjsg st_engine_heartbeat_enable(engine);
259ad8b1aafSjsg err = PTR_ERR(rq);
260ad8b1aafSjsg break;
261ad8b1aafSjsg }
262ad8b1aafSjsg
263ad8b1aafSjsg i915_request_add(rq);
264ad8b1aafSjsg
265ad8b1aafSjsg if (!igt_wait_for_spinner(&spin, rq)) {
266ad8b1aafSjsg pr_err("%s: RPS spinner did not start\n",
267ad8b1aafSjsg engine->name);
268ad8b1aafSjsg igt_spinner_end(&spin);
269ad8b1aafSjsg st_engine_heartbeat_enable(engine);
270ad8b1aafSjsg intel_gt_set_wedged(engine->gt);
271ad8b1aafSjsg err = -EIO;
272ad8b1aafSjsg break;
273ad8b1aafSjsg }
274ad8b1aafSjsg
275ad8b1aafSjsg intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL);
276ad8b1aafSjsg
277ad8b1aafSjsg intel_uncore_write_fw(gt->uncore, GEN6_RP_CUR_UP_EI, 0);
278ad8b1aafSjsg
279ad8b1aafSjsg /* Set the evaluation interval to infinity! */
280ad8b1aafSjsg intel_uncore_write_fw(gt->uncore,
281ad8b1aafSjsg GEN6_RP_UP_EI, 0xffffffff);
282ad8b1aafSjsg intel_uncore_write_fw(gt->uncore,
283ad8b1aafSjsg GEN6_RP_UP_THRESHOLD, 0xffffffff);
284ad8b1aafSjsg
285ad8b1aafSjsg intel_uncore_write_fw(gt->uncore, GEN6_RP_CONTROL,
286ad8b1aafSjsg GEN6_RP_ENABLE | GEN6_RP_UP_BUSY_AVG);
287ad8b1aafSjsg
288ad8b1aafSjsg if (wait_for(intel_uncore_read_fw(gt->uncore,
289ad8b1aafSjsg GEN6_RP_CUR_UP_EI),
290ad8b1aafSjsg 10)) {
291ad8b1aafSjsg /* Just skip the test; assume lack of HW support */
292ad8b1aafSjsg pr_notice("%s: rps evaluation interval not ticking\n",
293ad8b1aafSjsg engine->name);
294ad8b1aafSjsg err = -ENODEV;
295ad8b1aafSjsg } else {
296ad8b1aafSjsg ktime_t dt_[5];
297ad8b1aafSjsg u32 cycles_[5];
298ad8b1aafSjsg int i;
299ad8b1aafSjsg
300ad8b1aafSjsg for (i = 0; i < 5; i++) {
301ad8b1aafSjsg preempt_disable();
302ad8b1aafSjsg
303ad8b1aafSjsg cycles_[i] = -intel_uncore_read_fw(gt->uncore, GEN6_RP_CUR_UP_EI);
304*f005ef32Sjsg dt_[i] = ktime_get();
305ad8b1aafSjsg
306ad8b1aafSjsg udelay(1000);
307ad8b1aafSjsg
308ad8b1aafSjsg cycles_[i] += intel_uncore_read_fw(gt->uncore, GEN6_RP_CUR_UP_EI);
309*f005ef32Sjsg dt_[i] = ktime_sub(ktime_get(), dt_[i]);
310ad8b1aafSjsg
311ad8b1aafSjsg preempt_enable();
312ad8b1aafSjsg }
313ad8b1aafSjsg
314ad8b1aafSjsg /* Use the median of both cycle/dt; close enough */
315ad8b1aafSjsg sort(cycles_, 5, sizeof(*cycles_), cmp_u32, NULL);
316ad8b1aafSjsg cycles = (cycles_[1] + 2 * cycles_[2] + cycles_[3]) / 4;
317ad8b1aafSjsg sort(dt_, 5, sizeof(*dt_), cmp_u64, NULL);
318ad8b1aafSjsg dt = div_u64(dt_[1] + 2 * dt_[2] + dt_[3], 4);
319ad8b1aafSjsg }
320ad8b1aafSjsg
321ad8b1aafSjsg intel_uncore_write_fw(gt->uncore, GEN6_RP_CONTROL, 0);
322ad8b1aafSjsg intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL);
323ad8b1aafSjsg
324ad8b1aafSjsg igt_spinner_end(&spin);
325ad8b1aafSjsg st_engine_heartbeat_enable(engine);
326ad8b1aafSjsg
327ad8b1aafSjsg if (err == 0) {
328ad8b1aafSjsg u64 time = intel_gt_pm_interval_to_ns(gt, cycles);
329ad8b1aafSjsg u32 expected =
330ad8b1aafSjsg intel_gt_ns_to_pm_interval(gt, dt);
331ad8b1aafSjsg
332ad8b1aafSjsg pr_info("%s: rps counted %d C0 cycles [%lldns] in %lldns [%d cycles], using GT clock frequency of %uKHz\n",
333ad8b1aafSjsg engine->name, cycles, time, dt, expected,
334ad8b1aafSjsg gt->clock_frequency / 1000);
335ad8b1aafSjsg
336ad8b1aafSjsg if (10 * time < 8 * dt ||
337ad8b1aafSjsg 8 * time > 10 * dt) {
338ad8b1aafSjsg pr_err("%s: rps clock time does not match walltime!\n",
339ad8b1aafSjsg engine->name);
340ad8b1aafSjsg err = -EINVAL;
341ad8b1aafSjsg }
342ad8b1aafSjsg
343ad8b1aafSjsg if (10 * expected < 8 * cycles ||
344ad8b1aafSjsg 8 * expected > 10 * cycles) {
345ad8b1aafSjsg pr_err("%s: walltime does not match rps clock ticks!\n",
346ad8b1aafSjsg engine->name);
347ad8b1aafSjsg err = -EINVAL;
348ad8b1aafSjsg }
349ad8b1aafSjsg }
350ad8b1aafSjsg
351ad8b1aafSjsg if (igt_flush_test(gt->i915))
352ad8b1aafSjsg err = -EIO;
353ad8b1aafSjsg
354ad8b1aafSjsg break; /* once is enough */
355ad8b1aafSjsg }
356ad8b1aafSjsg
357ad8b1aafSjsg intel_rps_enable(>->rps);
358ad8b1aafSjsg intel_gt_pm_put(gt);
359ad8b1aafSjsg
360ad8b1aafSjsg igt_spinner_fini(&spin);
361ad8b1aafSjsg
362ad8b1aafSjsg intel_gt_pm_wait_for_idle(gt);
363ad8b1aafSjsg rps->work.func = saved_work;
364ad8b1aafSjsg
365ad8b1aafSjsg if (err == -ENODEV) /* skipped, don't report a fail */
366ad8b1aafSjsg err = 0;
367ad8b1aafSjsg
368ad8b1aafSjsg return err;
369ad8b1aafSjsg }
370ad8b1aafSjsg
live_rps_control(void * arg)371ad8b1aafSjsg int live_rps_control(void *arg)
372ad8b1aafSjsg {
373ad8b1aafSjsg struct intel_gt *gt = arg;
374ad8b1aafSjsg struct intel_rps *rps = >->rps;
375ad8b1aafSjsg void (*saved_work)(struct work_struct *wrk);
376ad8b1aafSjsg struct intel_engine_cs *engine;
377ad8b1aafSjsg enum intel_engine_id id;
378ad8b1aafSjsg struct igt_spinner spin;
379ad8b1aafSjsg int err = 0;
380ad8b1aafSjsg
381ad8b1aafSjsg /*
382ad8b1aafSjsg * Check that the actual frequency matches our requested frequency,
383ad8b1aafSjsg * to verify our control mechanism. We have to be careful that the
384ad8b1aafSjsg * PCU may throttle the GPU in which case the actual frequency used
385ad8b1aafSjsg * will be lowered than requested.
386ad8b1aafSjsg */
387ad8b1aafSjsg
388ad8b1aafSjsg if (!intel_rps_is_enabled(rps))
389ad8b1aafSjsg return 0;
390ad8b1aafSjsg
391ad8b1aafSjsg if (IS_CHERRYVIEW(gt->i915)) /* XXX fragile PCU */
392ad8b1aafSjsg return 0;
393ad8b1aafSjsg
394ad8b1aafSjsg if (igt_spinner_init(&spin, gt))
395ad8b1aafSjsg return -ENOMEM;
396ad8b1aafSjsg
397ad8b1aafSjsg intel_gt_pm_wait_for_idle(gt);
398ad8b1aafSjsg saved_work = rps->work.func;
399ad8b1aafSjsg rps->work.func = dummy_rps_work;
400ad8b1aafSjsg
401ad8b1aafSjsg intel_gt_pm_get(gt);
402ad8b1aafSjsg for_each_engine(engine, gt, id) {
403ad8b1aafSjsg struct i915_request *rq;
404ad8b1aafSjsg ktime_t min_dt, max_dt;
405ad8b1aafSjsg int f, limit;
406ad8b1aafSjsg int min, max;
407ad8b1aafSjsg
408ad8b1aafSjsg if (!intel_engine_can_store_dword(engine))
409ad8b1aafSjsg continue;
410ad8b1aafSjsg
411ad8b1aafSjsg st_engine_heartbeat_disable(engine);
412ad8b1aafSjsg
413ad8b1aafSjsg rq = igt_spinner_create_request(&spin,
414ad8b1aafSjsg engine->kernel_context,
415ad8b1aafSjsg MI_NOOP);
416ad8b1aafSjsg if (IS_ERR(rq)) {
417ad8b1aafSjsg err = PTR_ERR(rq);
418ad8b1aafSjsg break;
419ad8b1aafSjsg }
420ad8b1aafSjsg
421ad8b1aafSjsg i915_request_add(rq);
422ad8b1aafSjsg
423ad8b1aafSjsg if (!igt_wait_for_spinner(&spin, rq)) {
424ad8b1aafSjsg pr_err("%s: RPS spinner did not start\n",
425ad8b1aafSjsg engine->name);
426ad8b1aafSjsg igt_spinner_end(&spin);
427ad8b1aafSjsg st_engine_heartbeat_enable(engine);
428ad8b1aafSjsg intel_gt_set_wedged(engine->gt);
429ad8b1aafSjsg err = -EIO;
430ad8b1aafSjsg break;
431ad8b1aafSjsg }
432ad8b1aafSjsg
433ad8b1aafSjsg if (rps_set_check(rps, rps->min_freq) != rps->min_freq) {
434ad8b1aafSjsg pr_err("%s: could not set minimum frequency [%x], only %x!\n",
435ad8b1aafSjsg engine->name, rps->min_freq, read_cagf(rps));
436ad8b1aafSjsg igt_spinner_end(&spin);
437ad8b1aafSjsg st_engine_heartbeat_enable(engine);
438ad8b1aafSjsg show_pstate_limits(rps);
439ad8b1aafSjsg err = -EINVAL;
440ad8b1aafSjsg break;
441ad8b1aafSjsg }
442ad8b1aafSjsg
443ad8b1aafSjsg for (f = rps->min_freq + 1; f < rps->max_freq; f++) {
444ad8b1aafSjsg if (rps_set_check(rps, f) < f)
445ad8b1aafSjsg break;
446ad8b1aafSjsg }
447ad8b1aafSjsg
448ad8b1aafSjsg limit = rps_set_check(rps, f);
449ad8b1aafSjsg
450ad8b1aafSjsg if (rps_set_check(rps, rps->min_freq) != rps->min_freq) {
451ad8b1aafSjsg pr_err("%s: could not restore minimum frequency [%x], only %x!\n",
452ad8b1aafSjsg engine->name, rps->min_freq, read_cagf(rps));
453ad8b1aafSjsg igt_spinner_end(&spin);
454ad8b1aafSjsg st_engine_heartbeat_enable(engine);
455ad8b1aafSjsg show_pstate_limits(rps);
456ad8b1aafSjsg err = -EINVAL;
457ad8b1aafSjsg break;
458ad8b1aafSjsg }
459ad8b1aafSjsg
460ad8b1aafSjsg max_dt = ktime_get();
461ad8b1aafSjsg max = rps_set_check(rps, limit);
462ad8b1aafSjsg max_dt = ktime_sub(ktime_get(), max_dt);
463ad8b1aafSjsg
464ad8b1aafSjsg min_dt = ktime_get();
465ad8b1aafSjsg min = rps_set_check(rps, rps->min_freq);
466ad8b1aafSjsg min_dt = ktime_sub(ktime_get(), min_dt);
467ad8b1aafSjsg
468ad8b1aafSjsg igt_spinner_end(&spin);
469ad8b1aafSjsg st_engine_heartbeat_enable(engine);
470ad8b1aafSjsg
471ad8b1aafSjsg pr_info("%s: range:[%x:%uMHz, %x:%uMHz] limit:[%x:%uMHz], %x:%x response %lluns:%lluns\n",
472ad8b1aafSjsg engine->name,
473ad8b1aafSjsg rps->min_freq, intel_gpu_freq(rps, rps->min_freq),
474ad8b1aafSjsg rps->max_freq, intel_gpu_freq(rps, rps->max_freq),
475ad8b1aafSjsg limit, intel_gpu_freq(rps, limit),
476ad8b1aafSjsg min, max, ktime_to_ns(min_dt), ktime_to_ns(max_dt));
477ad8b1aafSjsg
478ad8b1aafSjsg if (limit == rps->min_freq) {
479ad8b1aafSjsg pr_err("%s: GPU throttled to minimum!\n",
480ad8b1aafSjsg engine->name);
481ad8b1aafSjsg show_pstate_limits(rps);
482ad8b1aafSjsg err = -ENODEV;
483ad8b1aafSjsg break;
484ad8b1aafSjsg }
485ad8b1aafSjsg
486ad8b1aafSjsg if (igt_flush_test(gt->i915)) {
487ad8b1aafSjsg err = -EIO;
488ad8b1aafSjsg break;
489ad8b1aafSjsg }
490ad8b1aafSjsg }
491ad8b1aafSjsg intel_gt_pm_put(gt);
492ad8b1aafSjsg
493ad8b1aafSjsg igt_spinner_fini(&spin);
494ad8b1aafSjsg
495ad8b1aafSjsg intel_gt_pm_wait_for_idle(gt);
496ad8b1aafSjsg rps->work.func = saved_work;
497ad8b1aafSjsg
498ad8b1aafSjsg return err;
499ad8b1aafSjsg }
500ad8b1aafSjsg
show_pcu_config(struct intel_rps * rps)501ad8b1aafSjsg static void show_pcu_config(struct intel_rps *rps)
502ad8b1aafSjsg {
503ad8b1aafSjsg struct drm_i915_private *i915 = rps_to_i915(rps);
504ad8b1aafSjsg unsigned int max_gpu_freq, min_gpu_freq;
505ad8b1aafSjsg intel_wakeref_t wakeref;
506ad8b1aafSjsg int gpu_freq;
507ad8b1aafSjsg
508ad8b1aafSjsg if (!HAS_LLC(i915))
509ad8b1aafSjsg return;
510ad8b1aafSjsg
511ad8b1aafSjsg min_gpu_freq = rps->min_freq;
512ad8b1aafSjsg max_gpu_freq = rps->max_freq;
5135ca02815Sjsg if (GRAPHICS_VER(i915) >= 9) {
514ad8b1aafSjsg /* Convert GT frequency to 50 HZ units */
515ad8b1aafSjsg min_gpu_freq /= GEN9_FREQ_SCALER;
516ad8b1aafSjsg max_gpu_freq /= GEN9_FREQ_SCALER;
517ad8b1aafSjsg }
518ad8b1aafSjsg
519ad8b1aafSjsg wakeref = intel_runtime_pm_get(rps_to_uncore(rps)->rpm);
520ad8b1aafSjsg
521ad8b1aafSjsg pr_info("%5s %5s %5s\n", "GPU", "eCPU", "eRing");
522ad8b1aafSjsg for (gpu_freq = min_gpu_freq; gpu_freq <= max_gpu_freq; gpu_freq++) {
523ad8b1aafSjsg int ia_freq = gpu_freq;
524ad8b1aafSjsg
5251bb76ff1Sjsg snb_pcode_read(rps_to_gt(rps)->uncore, GEN6_PCODE_READ_MIN_FREQ_TABLE,
526ad8b1aafSjsg &ia_freq, NULL);
527ad8b1aafSjsg
528ad8b1aafSjsg pr_info("%5d %5d %5d\n",
529ad8b1aafSjsg gpu_freq * 50,
530ad8b1aafSjsg ((ia_freq >> 0) & 0xff) * 100,
531ad8b1aafSjsg ((ia_freq >> 8) & 0xff) * 100);
532ad8b1aafSjsg }
533ad8b1aafSjsg
534ad8b1aafSjsg intel_runtime_pm_put(rps_to_uncore(rps)->rpm, wakeref);
535ad8b1aafSjsg }
536ad8b1aafSjsg
__measure_frequency(u32 * cntr,int duration_ms)537ad8b1aafSjsg static u64 __measure_frequency(u32 *cntr, int duration_ms)
538ad8b1aafSjsg {
539ad8b1aafSjsg u64 dc, dt;
540ad8b1aafSjsg
541ad8b1aafSjsg dc = READ_ONCE(*cntr);
542*f005ef32Sjsg dt = ktime_get();
543ad8b1aafSjsg usleep_range(1000 * duration_ms, 2000 * duration_ms);
544ad8b1aafSjsg dc = READ_ONCE(*cntr) - dc;
545ad8b1aafSjsg dt = ktime_get() - dt;
546ad8b1aafSjsg
547ad8b1aafSjsg return div64_u64(1000 * 1000 * dc, dt);
548ad8b1aafSjsg }
549ad8b1aafSjsg
measure_frequency_at(struct intel_rps * rps,u32 * cntr,int * freq)550ad8b1aafSjsg static u64 measure_frequency_at(struct intel_rps *rps, u32 *cntr, int *freq)
551ad8b1aafSjsg {
552ad8b1aafSjsg u64 x[5];
553ad8b1aafSjsg int i;
554ad8b1aafSjsg
555ad8b1aafSjsg *freq = rps_set_check(rps, *freq);
556ad8b1aafSjsg for (i = 0; i < 5; i++)
557ad8b1aafSjsg x[i] = __measure_frequency(cntr, 2);
558ad8b1aafSjsg *freq = (*freq + read_cagf(rps)) / 2;
559ad8b1aafSjsg
560ad8b1aafSjsg /* A simple triangle filter for better result stability */
561ad8b1aafSjsg sort(x, 5, sizeof(*x), cmp_u64, NULL);
562ad8b1aafSjsg return div_u64(x[1] + 2 * x[2] + x[3], 4);
563ad8b1aafSjsg }
564ad8b1aafSjsg
__measure_cs_frequency(struct intel_engine_cs * engine,int duration_ms)565ad8b1aafSjsg static u64 __measure_cs_frequency(struct intel_engine_cs *engine,
566ad8b1aafSjsg int duration_ms)
567ad8b1aafSjsg {
568ad8b1aafSjsg u64 dc, dt;
569ad8b1aafSjsg
570ad8b1aafSjsg dc = intel_uncore_read_fw(engine->uncore, CS_GPR(0));
571*f005ef32Sjsg dt = ktime_get();
572ad8b1aafSjsg usleep_range(1000 * duration_ms, 2000 * duration_ms);
573ad8b1aafSjsg dc = intel_uncore_read_fw(engine->uncore, CS_GPR(0)) - dc;
574ad8b1aafSjsg dt = ktime_get() - dt;
575ad8b1aafSjsg
576ad8b1aafSjsg return div64_u64(1000 * 1000 * dc, dt);
577ad8b1aafSjsg }
578ad8b1aafSjsg
measure_cs_frequency_at(struct intel_rps * rps,struct intel_engine_cs * engine,int * freq)579ad8b1aafSjsg static u64 measure_cs_frequency_at(struct intel_rps *rps,
580ad8b1aafSjsg struct intel_engine_cs *engine,
581ad8b1aafSjsg int *freq)
582ad8b1aafSjsg {
583ad8b1aafSjsg u64 x[5];
584ad8b1aafSjsg int i;
585ad8b1aafSjsg
586ad8b1aafSjsg *freq = rps_set_check(rps, *freq);
587ad8b1aafSjsg for (i = 0; i < 5; i++)
588ad8b1aafSjsg x[i] = __measure_cs_frequency(engine, 2);
589ad8b1aafSjsg *freq = (*freq + read_cagf(rps)) / 2;
590ad8b1aafSjsg
591ad8b1aafSjsg /* A simple triangle filter for better result stability */
592ad8b1aafSjsg sort(x, 5, sizeof(*x), cmp_u64, NULL);
593ad8b1aafSjsg return div_u64(x[1] + 2 * x[2] + x[3], 4);
594ad8b1aafSjsg }
595ad8b1aafSjsg
scaled_within(u64 x,u64 y,u32 f_n,u32 f_d)596ad8b1aafSjsg static bool scaled_within(u64 x, u64 y, u32 f_n, u32 f_d)
597ad8b1aafSjsg {
598ad8b1aafSjsg return f_d * x > f_n * y && f_n * x < f_d * y;
599ad8b1aafSjsg }
600ad8b1aafSjsg
live_rps_frequency_cs(void * arg)601ad8b1aafSjsg int live_rps_frequency_cs(void *arg)
602ad8b1aafSjsg {
603ad8b1aafSjsg void (*saved_work)(struct work_struct *wrk);
604ad8b1aafSjsg struct intel_gt *gt = arg;
605ad8b1aafSjsg struct intel_rps *rps = >->rps;
606ad8b1aafSjsg struct intel_engine_cs *engine;
607ad8b1aafSjsg struct pm_qos_request qos;
608ad8b1aafSjsg enum intel_engine_id id;
609ad8b1aafSjsg int err = 0;
610ad8b1aafSjsg
611ad8b1aafSjsg /*
6125ca02815Sjsg * The premise is that the GPU does change frequency at our behest.
613ad8b1aafSjsg * Let's check there is a correspondence between the requested
614ad8b1aafSjsg * frequency, the actual frequency, and the observed clock rate.
615ad8b1aafSjsg */
616ad8b1aafSjsg
617ad8b1aafSjsg if (!intel_rps_is_enabled(rps))
618ad8b1aafSjsg return 0;
619ad8b1aafSjsg
6205ca02815Sjsg if (GRAPHICS_VER(gt->i915) < 8) /* for CS simplicity */
621ad8b1aafSjsg return 0;
622ad8b1aafSjsg
623ad8b1aafSjsg if (CPU_LATENCY >= 0)
624ad8b1aafSjsg cpu_latency_qos_add_request(&qos, CPU_LATENCY);
625ad8b1aafSjsg
626ad8b1aafSjsg intel_gt_pm_wait_for_idle(gt);
627ad8b1aafSjsg saved_work = rps->work.func;
628ad8b1aafSjsg rps->work.func = dummy_rps_work;
629ad8b1aafSjsg
630ad8b1aafSjsg for_each_engine(engine, gt, id) {
631ad8b1aafSjsg struct i915_request *rq;
632ad8b1aafSjsg struct i915_vma *vma;
633ad8b1aafSjsg u32 *cancel, *cntr;
634ad8b1aafSjsg struct {
635ad8b1aafSjsg u64 count;
636ad8b1aafSjsg int freq;
637ad8b1aafSjsg } min, max;
638ad8b1aafSjsg
639ad8b1aafSjsg st_engine_heartbeat_disable(engine);
640ad8b1aafSjsg
641ad8b1aafSjsg vma = create_spin_counter(engine,
642ad8b1aafSjsg engine->kernel_context->vm, false,
643ad8b1aafSjsg &cancel, &cntr);
644ad8b1aafSjsg if (IS_ERR(vma)) {
645ad8b1aafSjsg err = PTR_ERR(vma);
646ad8b1aafSjsg st_engine_heartbeat_enable(engine);
647ad8b1aafSjsg break;
648ad8b1aafSjsg }
649ad8b1aafSjsg
650ad8b1aafSjsg rq = intel_engine_create_kernel_request(engine);
651ad8b1aafSjsg if (IS_ERR(rq)) {
652ad8b1aafSjsg err = PTR_ERR(rq);
653ad8b1aafSjsg goto err_vma;
654ad8b1aafSjsg }
655ad8b1aafSjsg
656ad8b1aafSjsg err = i915_vma_move_to_active(vma, rq, 0);
657ad8b1aafSjsg if (!err)
658ad8b1aafSjsg err = rq->engine->emit_bb_start(rq,
659*f005ef32Sjsg i915_vma_offset(vma),
660ad8b1aafSjsg PAGE_SIZE, 0);
661ad8b1aafSjsg i915_request_add(rq);
662ad8b1aafSjsg if (err)
663ad8b1aafSjsg goto err_vma;
664ad8b1aafSjsg
665ad8b1aafSjsg if (wait_for(intel_uncore_read(engine->uncore, CS_GPR(0)),
666ad8b1aafSjsg 10)) {
667ad8b1aafSjsg pr_err("%s: timed loop did not start\n",
668ad8b1aafSjsg engine->name);
669ad8b1aafSjsg goto err_vma;
670ad8b1aafSjsg }
671ad8b1aafSjsg
672ad8b1aafSjsg min.freq = rps->min_freq;
673ad8b1aafSjsg min.count = measure_cs_frequency_at(rps, engine, &min.freq);
674ad8b1aafSjsg
675ad8b1aafSjsg max.freq = rps->max_freq;
676ad8b1aafSjsg max.count = measure_cs_frequency_at(rps, engine, &max.freq);
677ad8b1aafSjsg
678ad8b1aafSjsg pr_info("%s: min:%lluKHz @ %uMHz, max:%lluKHz @ %uMHz [%d%%]\n",
679ad8b1aafSjsg engine->name,
680ad8b1aafSjsg min.count, intel_gpu_freq(rps, min.freq),
681ad8b1aafSjsg max.count, intel_gpu_freq(rps, max.freq),
682ad8b1aafSjsg (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * max.count,
683ad8b1aafSjsg max.freq * min.count));
684ad8b1aafSjsg
685ad8b1aafSjsg if (!scaled_within(max.freq * min.count,
686ad8b1aafSjsg min.freq * max.count,
687ad8b1aafSjsg 2, 3)) {
688ad8b1aafSjsg int f;
689ad8b1aafSjsg
690ad8b1aafSjsg pr_err("%s: CS did not scale with frequency! scaled min:%llu, max:%llu\n",
691ad8b1aafSjsg engine->name,
692ad8b1aafSjsg max.freq * min.count,
693ad8b1aafSjsg min.freq * max.count);
694ad8b1aafSjsg show_pcu_config(rps);
695ad8b1aafSjsg
696ad8b1aafSjsg for (f = min.freq + 1; f <= rps->max_freq; f++) {
697ad8b1aafSjsg int act = f;
698ad8b1aafSjsg u64 count;
699ad8b1aafSjsg
700ad8b1aafSjsg count = measure_cs_frequency_at(rps, engine, &act);
701ad8b1aafSjsg if (act < f)
702ad8b1aafSjsg break;
703ad8b1aafSjsg
704ad8b1aafSjsg pr_info("%s: %x:%uMHz: %lluKHz [%d%%]\n",
705ad8b1aafSjsg engine->name,
706ad8b1aafSjsg act, intel_gpu_freq(rps, act), count,
707ad8b1aafSjsg (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * count,
708ad8b1aafSjsg act * min.count));
709ad8b1aafSjsg
710ad8b1aafSjsg f = act; /* may skip ahead [pcu granularity] */
711ad8b1aafSjsg }
712ad8b1aafSjsg
713ad8b1aafSjsg err = -EINTR; /* ignore error, continue on with test */
714ad8b1aafSjsg }
715ad8b1aafSjsg
716ad8b1aafSjsg err_vma:
717ad8b1aafSjsg *cancel = MI_BATCH_BUFFER_END;
718ad8b1aafSjsg i915_gem_object_flush_map(vma->obj);
719ad8b1aafSjsg i915_gem_object_unpin_map(vma->obj);
720ad8b1aafSjsg i915_vma_unpin(vma);
721ad8b1aafSjsg i915_vma_unlock(vma);
722ad8b1aafSjsg i915_vma_put(vma);
723ad8b1aafSjsg
724ad8b1aafSjsg st_engine_heartbeat_enable(engine);
725ad8b1aafSjsg if (igt_flush_test(gt->i915))
726ad8b1aafSjsg err = -EIO;
727ad8b1aafSjsg if (err)
728ad8b1aafSjsg break;
729ad8b1aafSjsg }
730ad8b1aafSjsg
731ad8b1aafSjsg intel_gt_pm_wait_for_idle(gt);
732ad8b1aafSjsg rps->work.func = saved_work;
733ad8b1aafSjsg
734ad8b1aafSjsg if (CPU_LATENCY >= 0)
735ad8b1aafSjsg cpu_latency_qos_remove_request(&qos);
736ad8b1aafSjsg
737ad8b1aafSjsg return err;
738ad8b1aafSjsg }
739ad8b1aafSjsg
live_rps_frequency_srm(void * arg)740ad8b1aafSjsg int live_rps_frequency_srm(void *arg)
741ad8b1aafSjsg {
742ad8b1aafSjsg void (*saved_work)(struct work_struct *wrk);
743ad8b1aafSjsg struct intel_gt *gt = arg;
744ad8b1aafSjsg struct intel_rps *rps = >->rps;
745ad8b1aafSjsg struct intel_engine_cs *engine;
746ad8b1aafSjsg struct pm_qos_request qos;
747ad8b1aafSjsg enum intel_engine_id id;
748ad8b1aafSjsg int err = 0;
749ad8b1aafSjsg
750ad8b1aafSjsg /*
7515ca02815Sjsg * The premise is that the GPU does change frequency at our behest.
752ad8b1aafSjsg * Let's check there is a correspondence between the requested
753ad8b1aafSjsg * frequency, the actual frequency, and the observed clock rate.
754ad8b1aafSjsg */
755ad8b1aafSjsg
756ad8b1aafSjsg if (!intel_rps_is_enabled(rps))
757ad8b1aafSjsg return 0;
758ad8b1aafSjsg
7595ca02815Sjsg if (GRAPHICS_VER(gt->i915) < 8) /* for CS simplicity */
760ad8b1aafSjsg return 0;
761ad8b1aafSjsg
762ad8b1aafSjsg if (CPU_LATENCY >= 0)
763ad8b1aafSjsg cpu_latency_qos_add_request(&qos, CPU_LATENCY);
764ad8b1aafSjsg
765ad8b1aafSjsg intel_gt_pm_wait_for_idle(gt);
766ad8b1aafSjsg saved_work = rps->work.func;
767ad8b1aafSjsg rps->work.func = dummy_rps_work;
768ad8b1aafSjsg
769ad8b1aafSjsg for_each_engine(engine, gt, id) {
770ad8b1aafSjsg struct i915_request *rq;
771ad8b1aafSjsg struct i915_vma *vma;
772ad8b1aafSjsg u32 *cancel, *cntr;
773ad8b1aafSjsg struct {
774ad8b1aafSjsg u64 count;
775ad8b1aafSjsg int freq;
776ad8b1aafSjsg } min, max;
777ad8b1aafSjsg
778ad8b1aafSjsg st_engine_heartbeat_disable(engine);
779ad8b1aafSjsg
780ad8b1aafSjsg vma = create_spin_counter(engine,
781ad8b1aafSjsg engine->kernel_context->vm, true,
782ad8b1aafSjsg &cancel, &cntr);
783ad8b1aafSjsg if (IS_ERR(vma)) {
784ad8b1aafSjsg err = PTR_ERR(vma);
785ad8b1aafSjsg st_engine_heartbeat_enable(engine);
786ad8b1aafSjsg break;
787ad8b1aafSjsg }
788ad8b1aafSjsg
789ad8b1aafSjsg rq = intel_engine_create_kernel_request(engine);
790ad8b1aafSjsg if (IS_ERR(rq)) {
791ad8b1aafSjsg err = PTR_ERR(rq);
792ad8b1aafSjsg goto err_vma;
793ad8b1aafSjsg }
794ad8b1aafSjsg
795ad8b1aafSjsg err = i915_vma_move_to_active(vma, rq, 0);
796ad8b1aafSjsg if (!err)
797ad8b1aafSjsg err = rq->engine->emit_bb_start(rq,
798*f005ef32Sjsg i915_vma_offset(vma),
799ad8b1aafSjsg PAGE_SIZE, 0);
800ad8b1aafSjsg i915_request_add(rq);
801ad8b1aafSjsg if (err)
802ad8b1aafSjsg goto err_vma;
803ad8b1aafSjsg
804ad8b1aafSjsg if (wait_for(READ_ONCE(*cntr), 10)) {
805ad8b1aafSjsg pr_err("%s: timed loop did not start\n",
806ad8b1aafSjsg engine->name);
807ad8b1aafSjsg goto err_vma;
808ad8b1aafSjsg }
809ad8b1aafSjsg
810ad8b1aafSjsg min.freq = rps->min_freq;
811ad8b1aafSjsg min.count = measure_frequency_at(rps, cntr, &min.freq);
812ad8b1aafSjsg
813ad8b1aafSjsg max.freq = rps->max_freq;
814ad8b1aafSjsg max.count = measure_frequency_at(rps, cntr, &max.freq);
815ad8b1aafSjsg
816ad8b1aafSjsg pr_info("%s: min:%lluKHz @ %uMHz, max:%lluKHz @ %uMHz [%d%%]\n",
817ad8b1aafSjsg engine->name,
818ad8b1aafSjsg min.count, intel_gpu_freq(rps, min.freq),
819ad8b1aafSjsg max.count, intel_gpu_freq(rps, max.freq),
820ad8b1aafSjsg (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * max.count,
821ad8b1aafSjsg max.freq * min.count));
822ad8b1aafSjsg
823ad8b1aafSjsg if (!scaled_within(max.freq * min.count,
824ad8b1aafSjsg min.freq * max.count,
825ad8b1aafSjsg 1, 2)) {
826ad8b1aafSjsg int f;
827ad8b1aafSjsg
828ad8b1aafSjsg pr_err("%s: CS did not scale with frequency! scaled min:%llu, max:%llu\n",
829ad8b1aafSjsg engine->name,
830ad8b1aafSjsg max.freq * min.count,
831ad8b1aafSjsg min.freq * max.count);
832ad8b1aafSjsg show_pcu_config(rps);
833ad8b1aafSjsg
834ad8b1aafSjsg for (f = min.freq + 1; f <= rps->max_freq; f++) {
835ad8b1aafSjsg int act = f;
836ad8b1aafSjsg u64 count;
837ad8b1aafSjsg
838ad8b1aafSjsg count = measure_frequency_at(rps, cntr, &act);
839ad8b1aafSjsg if (act < f)
840ad8b1aafSjsg break;
841ad8b1aafSjsg
842ad8b1aafSjsg pr_info("%s: %x:%uMHz: %lluKHz [%d%%]\n",
843ad8b1aafSjsg engine->name,
844ad8b1aafSjsg act, intel_gpu_freq(rps, act), count,
845ad8b1aafSjsg (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * count,
846ad8b1aafSjsg act * min.count));
847ad8b1aafSjsg
848ad8b1aafSjsg f = act; /* may skip ahead [pcu granularity] */
849ad8b1aafSjsg }
850ad8b1aafSjsg
851ad8b1aafSjsg err = -EINTR; /* ignore error, continue on with test */
852ad8b1aafSjsg }
853ad8b1aafSjsg
854ad8b1aafSjsg err_vma:
855ad8b1aafSjsg *cancel = MI_BATCH_BUFFER_END;
856ad8b1aafSjsg i915_gem_object_flush_map(vma->obj);
857ad8b1aafSjsg i915_gem_object_unpin_map(vma->obj);
858ad8b1aafSjsg i915_vma_unpin(vma);
859ad8b1aafSjsg i915_vma_unlock(vma);
860ad8b1aafSjsg i915_vma_put(vma);
861ad8b1aafSjsg
862ad8b1aafSjsg st_engine_heartbeat_enable(engine);
863ad8b1aafSjsg if (igt_flush_test(gt->i915))
864ad8b1aafSjsg err = -EIO;
865ad8b1aafSjsg if (err)
866ad8b1aafSjsg break;
867ad8b1aafSjsg }
868ad8b1aafSjsg
869ad8b1aafSjsg intel_gt_pm_wait_for_idle(gt);
870ad8b1aafSjsg rps->work.func = saved_work;
871ad8b1aafSjsg
872ad8b1aafSjsg if (CPU_LATENCY >= 0)
873ad8b1aafSjsg cpu_latency_qos_remove_request(&qos);
874ad8b1aafSjsg
875ad8b1aafSjsg return err;
876ad8b1aafSjsg }
877ad8b1aafSjsg
sleep_for_ei(struct intel_rps * rps,int timeout_us)878ad8b1aafSjsg static void sleep_for_ei(struct intel_rps *rps, int timeout_us)
879ad8b1aafSjsg {
880ad8b1aafSjsg /* Flush any previous EI */
881ad8b1aafSjsg usleep_range(timeout_us, 2 * timeout_us);
882ad8b1aafSjsg
883ad8b1aafSjsg /* Reset the interrupt status */
884ad8b1aafSjsg rps_disable_interrupts(rps);
885ad8b1aafSjsg GEM_BUG_ON(rps->pm_iir);
886ad8b1aafSjsg rps_enable_interrupts(rps);
887ad8b1aafSjsg
888ad8b1aafSjsg /* And then wait for the timeout, for real this time */
889ad8b1aafSjsg usleep_range(2 * timeout_us, 3 * timeout_us);
890ad8b1aafSjsg }
891ad8b1aafSjsg
__rps_up_interrupt(struct intel_rps * rps,struct intel_engine_cs * engine,struct igt_spinner * spin)892ad8b1aafSjsg static int __rps_up_interrupt(struct intel_rps *rps,
893ad8b1aafSjsg struct intel_engine_cs *engine,
894ad8b1aafSjsg struct igt_spinner *spin)
895ad8b1aafSjsg {
896ad8b1aafSjsg struct intel_uncore *uncore = engine->uncore;
897ad8b1aafSjsg struct i915_request *rq;
898ad8b1aafSjsg u32 timeout;
899ad8b1aafSjsg
900ad8b1aafSjsg if (!intel_engine_can_store_dword(engine))
901ad8b1aafSjsg return 0;
902ad8b1aafSjsg
903ad8b1aafSjsg rps_set_check(rps, rps->min_freq);
904ad8b1aafSjsg
905ad8b1aafSjsg rq = igt_spinner_create_request(spin, engine->kernel_context, MI_NOOP);
906ad8b1aafSjsg if (IS_ERR(rq))
907ad8b1aafSjsg return PTR_ERR(rq);
908ad8b1aafSjsg
909ad8b1aafSjsg i915_request_get(rq);
910ad8b1aafSjsg i915_request_add(rq);
911ad8b1aafSjsg
912ad8b1aafSjsg if (!igt_wait_for_spinner(spin, rq)) {
913ad8b1aafSjsg pr_err("%s: RPS spinner did not start\n",
914ad8b1aafSjsg engine->name);
915ad8b1aafSjsg i915_request_put(rq);
916ad8b1aafSjsg intel_gt_set_wedged(engine->gt);
917ad8b1aafSjsg return -EIO;
918ad8b1aafSjsg }
919ad8b1aafSjsg
920ad8b1aafSjsg if (!intel_rps_is_active(rps)) {
921ad8b1aafSjsg pr_err("%s: RPS not enabled on starting spinner\n",
922ad8b1aafSjsg engine->name);
923ad8b1aafSjsg igt_spinner_end(spin);
924ad8b1aafSjsg i915_request_put(rq);
925ad8b1aafSjsg return -EINVAL;
926ad8b1aafSjsg }
927ad8b1aafSjsg
928ad8b1aafSjsg if (!(rps->pm_events & GEN6_PM_RP_UP_THRESHOLD)) {
929ad8b1aafSjsg pr_err("%s: RPS did not register UP interrupt\n",
930ad8b1aafSjsg engine->name);
931ad8b1aafSjsg i915_request_put(rq);
932ad8b1aafSjsg return -EINVAL;
933ad8b1aafSjsg }
934ad8b1aafSjsg
935ad8b1aafSjsg if (rps->last_freq != rps->min_freq) {
936ad8b1aafSjsg pr_err("%s: RPS did not program min frequency\n",
937ad8b1aafSjsg engine->name);
938ad8b1aafSjsg i915_request_put(rq);
939ad8b1aafSjsg return -EINVAL;
940ad8b1aafSjsg }
941ad8b1aafSjsg
942ad8b1aafSjsg timeout = intel_uncore_read(uncore, GEN6_RP_UP_EI);
943ad8b1aafSjsg timeout = intel_gt_pm_interval_to_ns(engine->gt, timeout);
944ad8b1aafSjsg timeout = DIV_ROUND_UP(timeout, 1000);
945ad8b1aafSjsg
946ad8b1aafSjsg sleep_for_ei(rps, timeout);
947ad8b1aafSjsg GEM_BUG_ON(i915_request_completed(rq));
948ad8b1aafSjsg
949ad8b1aafSjsg igt_spinner_end(spin);
950ad8b1aafSjsg i915_request_put(rq);
951ad8b1aafSjsg
952ad8b1aafSjsg if (rps->cur_freq != rps->min_freq) {
953ad8b1aafSjsg pr_err("%s: Frequency unexpectedly changed [up], now %d!\n",
954ad8b1aafSjsg engine->name, intel_rps_read_actual_frequency(rps));
955ad8b1aafSjsg return -EINVAL;
956ad8b1aafSjsg }
957ad8b1aafSjsg
958ad8b1aafSjsg if (!(rps->pm_iir & GEN6_PM_RP_UP_THRESHOLD)) {
959ad8b1aafSjsg pr_err("%s: UP interrupt not recorded for spinner, pm_iir:%x, prev_up:%x, up_threshold:%x, up_ei:%x\n",
960ad8b1aafSjsg engine->name, rps->pm_iir,
961ad8b1aafSjsg intel_uncore_read(uncore, GEN6_RP_PREV_UP),
962ad8b1aafSjsg intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD),
963ad8b1aafSjsg intel_uncore_read(uncore, GEN6_RP_UP_EI));
964ad8b1aafSjsg return -EINVAL;
965ad8b1aafSjsg }
966ad8b1aafSjsg
967ad8b1aafSjsg return 0;
968ad8b1aafSjsg }
969ad8b1aafSjsg
__rps_down_interrupt(struct intel_rps * rps,struct intel_engine_cs * engine)970ad8b1aafSjsg static int __rps_down_interrupt(struct intel_rps *rps,
971ad8b1aafSjsg struct intel_engine_cs *engine)
972ad8b1aafSjsg {
973ad8b1aafSjsg struct intel_uncore *uncore = engine->uncore;
974ad8b1aafSjsg u32 timeout;
975ad8b1aafSjsg
976ad8b1aafSjsg rps_set_check(rps, rps->max_freq);
977ad8b1aafSjsg
978ad8b1aafSjsg if (!(rps->pm_events & GEN6_PM_RP_DOWN_THRESHOLD)) {
979ad8b1aafSjsg pr_err("%s: RPS did not register DOWN interrupt\n",
980ad8b1aafSjsg engine->name);
981ad8b1aafSjsg return -EINVAL;
982ad8b1aafSjsg }
983ad8b1aafSjsg
984ad8b1aafSjsg if (rps->last_freq != rps->max_freq) {
985ad8b1aafSjsg pr_err("%s: RPS did not program max frequency\n",
986ad8b1aafSjsg engine->name);
987ad8b1aafSjsg return -EINVAL;
988ad8b1aafSjsg }
989ad8b1aafSjsg
990ad8b1aafSjsg timeout = intel_uncore_read(uncore, GEN6_RP_DOWN_EI);
991ad8b1aafSjsg timeout = intel_gt_pm_interval_to_ns(engine->gt, timeout);
992ad8b1aafSjsg timeout = DIV_ROUND_UP(timeout, 1000);
993ad8b1aafSjsg
994ad8b1aafSjsg sleep_for_ei(rps, timeout);
995ad8b1aafSjsg
996ad8b1aafSjsg if (rps->cur_freq != rps->max_freq) {
997ad8b1aafSjsg pr_err("%s: Frequency unexpectedly changed [down], now %d!\n",
998ad8b1aafSjsg engine->name,
999ad8b1aafSjsg intel_rps_read_actual_frequency(rps));
1000ad8b1aafSjsg return -EINVAL;
1001ad8b1aafSjsg }
1002ad8b1aafSjsg
1003ad8b1aafSjsg if (!(rps->pm_iir & (GEN6_PM_RP_DOWN_THRESHOLD | GEN6_PM_RP_DOWN_TIMEOUT))) {
1004ad8b1aafSjsg pr_err("%s: DOWN interrupt not recorded for idle, pm_iir:%x, prev_down:%x, down_threshold:%x, down_ei:%x [prev_up:%x, up_threshold:%x, up_ei:%x]\n",
1005ad8b1aafSjsg engine->name, rps->pm_iir,
1006ad8b1aafSjsg intel_uncore_read(uncore, GEN6_RP_PREV_DOWN),
1007ad8b1aafSjsg intel_uncore_read(uncore, GEN6_RP_DOWN_THRESHOLD),
1008ad8b1aafSjsg intel_uncore_read(uncore, GEN6_RP_DOWN_EI),
1009ad8b1aafSjsg intel_uncore_read(uncore, GEN6_RP_PREV_UP),
1010ad8b1aafSjsg intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD),
1011ad8b1aafSjsg intel_uncore_read(uncore, GEN6_RP_UP_EI));
1012ad8b1aafSjsg return -EINVAL;
1013ad8b1aafSjsg }
1014ad8b1aafSjsg
1015ad8b1aafSjsg return 0;
1016ad8b1aafSjsg }
1017ad8b1aafSjsg
live_rps_interrupt(void * arg)1018ad8b1aafSjsg int live_rps_interrupt(void *arg)
1019ad8b1aafSjsg {
1020ad8b1aafSjsg struct intel_gt *gt = arg;
1021ad8b1aafSjsg struct intel_rps *rps = >->rps;
1022ad8b1aafSjsg void (*saved_work)(struct work_struct *wrk);
1023ad8b1aafSjsg struct intel_engine_cs *engine;
1024ad8b1aafSjsg enum intel_engine_id id;
1025ad8b1aafSjsg struct igt_spinner spin;
1026ad8b1aafSjsg u32 pm_events;
1027ad8b1aafSjsg int err = 0;
1028ad8b1aafSjsg
1029ad8b1aafSjsg /*
1030ad8b1aafSjsg * First, let's check whether or not we are receiving interrupts.
1031ad8b1aafSjsg */
1032ad8b1aafSjsg
10335ca02815Sjsg if (!intel_rps_has_interrupts(rps) || GRAPHICS_VER(gt->i915) < 6)
1034ad8b1aafSjsg return 0;
1035ad8b1aafSjsg
1036ad8b1aafSjsg intel_gt_pm_get(gt);
1037ad8b1aafSjsg pm_events = rps->pm_events;
1038ad8b1aafSjsg intel_gt_pm_put(gt);
1039ad8b1aafSjsg if (!pm_events) {
1040ad8b1aafSjsg pr_err("No RPS PM events registered, but RPS is enabled?\n");
1041ad8b1aafSjsg return -ENODEV;
1042ad8b1aafSjsg }
1043ad8b1aafSjsg
1044ad8b1aafSjsg if (igt_spinner_init(&spin, gt))
1045ad8b1aafSjsg return -ENOMEM;
1046ad8b1aafSjsg
1047ad8b1aafSjsg intel_gt_pm_wait_for_idle(gt);
1048ad8b1aafSjsg saved_work = rps->work.func;
1049ad8b1aafSjsg rps->work.func = dummy_rps_work;
1050ad8b1aafSjsg
1051ad8b1aafSjsg for_each_engine(engine, gt, id) {
1052ad8b1aafSjsg /* Keep the engine busy with a spinner; expect an UP! */
1053ad8b1aafSjsg if (pm_events & GEN6_PM_RP_UP_THRESHOLD) {
1054ad8b1aafSjsg intel_gt_pm_wait_for_idle(engine->gt);
1055ad8b1aafSjsg GEM_BUG_ON(intel_rps_is_active(rps));
1056ad8b1aafSjsg
1057ad8b1aafSjsg st_engine_heartbeat_disable(engine);
1058ad8b1aafSjsg
1059ad8b1aafSjsg err = __rps_up_interrupt(rps, engine, &spin);
1060ad8b1aafSjsg
1061ad8b1aafSjsg st_engine_heartbeat_enable(engine);
1062ad8b1aafSjsg if (err)
1063ad8b1aafSjsg goto out;
1064ad8b1aafSjsg
1065ad8b1aafSjsg intel_gt_pm_wait_for_idle(engine->gt);
1066ad8b1aafSjsg }
1067ad8b1aafSjsg
1068ad8b1aafSjsg /* Keep the engine awake but idle and check for DOWN */
1069ad8b1aafSjsg if (pm_events & GEN6_PM_RP_DOWN_THRESHOLD) {
1070ad8b1aafSjsg st_engine_heartbeat_disable(engine);
1071ad8b1aafSjsg intel_rc6_disable(>->rc6);
1072ad8b1aafSjsg
1073ad8b1aafSjsg err = __rps_down_interrupt(rps, engine);
1074ad8b1aafSjsg
1075ad8b1aafSjsg intel_rc6_enable(>->rc6);
1076ad8b1aafSjsg st_engine_heartbeat_enable(engine);
1077ad8b1aafSjsg if (err)
1078ad8b1aafSjsg goto out;
1079ad8b1aafSjsg }
1080ad8b1aafSjsg }
1081ad8b1aafSjsg
1082ad8b1aafSjsg out:
1083ad8b1aafSjsg if (igt_flush_test(gt->i915))
1084ad8b1aafSjsg err = -EIO;
1085ad8b1aafSjsg
1086ad8b1aafSjsg igt_spinner_fini(&spin);
1087ad8b1aafSjsg
1088ad8b1aafSjsg intel_gt_pm_wait_for_idle(gt);
1089ad8b1aafSjsg rps->work.func = saved_work;
1090ad8b1aafSjsg
1091ad8b1aafSjsg return err;
1092ad8b1aafSjsg }
1093ad8b1aafSjsg
__measure_power(int duration_ms)1094ad8b1aafSjsg static u64 __measure_power(int duration_ms)
1095ad8b1aafSjsg {
1096ad8b1aafSjsg u64 dE, dt;
1097ad8b1aafSjsg
1098ad8b1aafSjsg dE = librapl_energy_uJ();
1099*f005ef32Sjsg dt = ktime_get();
1100ad8b1aafSjsg usleep_range(1000 * duration_ms, 2000 * duration_ms);
1101ad8b1aafSjsg dE = librapl_energy_uJ() - dE;
1102ad8b1aafSjsg dt = ktime_get() - dt;
1103ad8b1aafSjsg
1104ad8b1aafSjsg return div64_u64(1000 * 1000 * dE, dt);
1105ad8b1aafSjsg }
1106ad8b1aafSjsg
measure_power(struct intel_rps * rps,int * freq)1107*f005ef32Sjsg static u64 measure_power(struct intel_rps *rps, int *freq)
1108ad8b1aafSjsg {
1109ad8b1aafSjsg u64 x[5];
1110ad8b1aafSjsg int i;
1111ad8b1aafSjsg
1112ad8b1aafSjsg for (i = 0; i < 5; i++)
1113ad8b1aafSjsg x[i] = __measure_power(5);
1114*f005ef32Sjsg
1115*f005ef32Sjsg *freq = (*freq + intel_rps_read_actual_frequency(rps)) / 2;
1116ad8b1aafSjsg
1117ad8b1aafSjsg /* A simple triangle filter for better result stability */
1118ad8b1aafSjsg sort(x, 5, sizeof(*x), cmp_u64, NULL);
1119ad8b1aafSjsg return div_u64(x[1] + 2 * x[2] + x[3], 4);
1120ad8b1aafSjsg }
1121ad8b1aafSjsg
measure_power_at(struct intel_rps * rps,int * freq)1122*f005ef32Sjsg static u64 measure_power_at(struct intel_rps *rps, int *freq)
1123*f005ef32Sjsg {
1124*f005ef32Sjsg *freq = rps_set_check(rps, *freq);
1125*f005ef32Sjsg return measure_power(rps, freq);
1126*f005ef32Sjsg }
1127*f005ef32Sjsg
live_rps_power(void * arg)1128ad8b1aafSjsg int live_rps_power(void *arg)
1129ad8b1aafSjsg {
1130ad8b1aafSjsg struct intel_gt *gt = arg;
1131ad8b1aafSjsg struct intel_rps *rps = >->rps;
1132ad8b1aafSjsg void (*saved_work)(struct work_struct *wrk);
1133ad8b1aafSjsg struct intel_engine_cs *engine;
1134ad8b1aafSjsg enum intel_engine_id id;
1135ad8b1aafSjsg struct igt_spinner spin;
1136ad8b1aafSjsg int err = 0;
1137ad8b1aafSjsg
1138ad8b1aafSjsg /*
1139ad8b1aafSjsg * Our fundamental assumption is that running at lower frequency
1140ad8b1aafSjsg * actually saves power. Let's see if our RAPL measurement support
1141ad8b1aafSjsg * that theory.
1142ad8b1aafSjsg */
1143ad8b1aafSjsg
11445ca02815Sjsg if (!intel_rps_is_enabled(rps) || GRAPHICS_VER(gt->i915) < 6)
1145ad8b1aafSjsg return 0;
1146ad8b1aafSjsg
11475ca02815Sjsg if (!librapl_supported(gt->i915))
1148ad8b1aafSjsg return 0;
1149ad8b1aafSjsg
1150ad8b1aafSjsg if (igt_spinner_init(&spin, gt))
1151ad8b1aafSjsg return -ENOMEM;
1152ad8b1aafSjsg
1153ad8b1aafSjsg intel_gt_pm_wait_for_idle(gt);
1154ad8b1aafSjsg saved_work = rps->work.func;
1155ad8b1aafSjsg rps->work.func = dummy_rps_work;
1156ad8b1aafSjsg
1157ad8b1aafSjsg for_each_engine(engine, gt, id) {
1158ad8b1aafSjsg struct i915_request *rq;
1159ad8b1aafSjsg struct {
1160ad8b1aafSjsg u64 power;
1161ad8b1aafSjsg int freq;
1162ad8b1aafSjsg } min, max;
1163ad8b1aafSjsg
1164ad8b1aafSjsg if (!intel_engine_can_store_dword(engine))
1165ad8b1aafSjsg continue;
1166ad8b1aafSjsg
1167ad8b1aafSjsg st_engine_heartbeat_disable(engine);
1168ad8b1aafSjsg
1169ad8b1aafSjsg rq = igt_spinner_create_request(&spin,
1170ad8b1aafSjsg engine->kernel_context,
1171ad8b1aafSjsg MI_NOOP);
1172ad8b1aafSjsg if (IS_ERR(rq)) {
1173ad8b1aafSjsg st_engine_heartbeat_enable(engine);
1174ad8b1aafSjsg err = PTR_ERR(rq);
1175ad8b1aafSjsg break;
1176ad8b1aafSjsg }
1177ad8b1aafSjsg
1178ad8b1aafSjsg i915_request_add(rq);
1179ad8b1aafSjsg
1180ad8b1aafSjsg if (!igt_wait_for_spinner(&spin, rq)) {
1181ad8b1aafSjsg pr_err("%s: RPS spinner did not start\n",
1182ad8b1aafSjsg engine->name);
1183ad8b1aafSjsg igt_spinner_end(&spin);
1184ad8b1aafSjsg st_engine_heartbeat_enable(engine);
1185ad8b1aafSjsg intel_gt_set_wedged(engine->gt);
1186ad8b1aafSjsg err = -EIO;
1187ad8b1aafSjsg break;
1188ad8b1aafSjsg }
1189ad8b1aafSjsg
1190ad8b1aafSjsg max.freq = rps->max_freq;
1191ad8b1aafSjsg max.power = measure_power_at(rps, &max.freq);
1192ad8b1aafSjsg
1193ad8b1aafSjsg min.freq = rps->min_freq;
1194ad8b1aafSjsg min.power = measure_power_at(rps, &min.freq);
1195ad8b1aafSjsg
1196ad8b1aafSjsg igt_spinner_end(&spin);
1197ad8b1aafSjsg st_engine_heartbeat_enable(engine);
1198ad8b1aafSjsg
1199ad8b1aafSjsg pr_info("%s: min:%llumW @ %uMHz, max:%llumW @ %uMHz\n",
1200ad8b1aafSjsg engine->name,
1201ad8b1aafSjsg min.power, intel_gpu_freq(rps, min.freq),
1202ad8b1aafSjsg max.power, intel_gpu_freq(rps, max.freq));
1203ad8b1aafSjsg
1204ad8b1aafSjsg if (10 * min.freq >= 9 * max.freq) {
1205ad8b1aafSjsg pr_notice("Could not control frequency, ran at [%d:%uMHz, %d:%uMhz]\n",
1206ad8b1aafSjsg min.freq, intel_gpu_freq(rps, min.freq),
1207ad8b1aafSjsg max.freq, intel_gpu_freq(rps, max.freq));
1208ad8b1aafSjsg continue;
1209ad8b1aafSjsg }
1210ad8b1aafSjsg
1211ad8b1aafSjsg if (11 * min.power > 10 * max.power) {
1212ad8b1aafSjsg pr_err("%s: did not conserve power when setting lower frequency!\n",
1213ad8b1aafSjsg engine->name);
1214ad8b1aafSjsg err = -EINVAL;
1215ad8b1aafSjsg break;
1216ad8b1aafSjsg }
1217ad8b1aafSjsg
1218ad8b1aafSjsg if (igt_flush_test(gt->i915)) {
1219ad8b1aafSjsg err = -EIO;
1220ad8b1aafSjsg break;
1221ad8b1aafSjsg }
1222ad8b1aafSjsg }
1223ad8b1aafSjsg
1224ad8b1aafSjsg igt_spinner_fini(&spin);
1225ad8b1aafSjsg
1226ad8b1aafSjsg intel_gt_pm_wait_for_idle(gt);
1227ad8b1aafSjsg rps->work.func = saved_work;
1228ad8b1aafSjsg
1229ad8b1aafSjsg return err;
1230ad8b1aafSjsg }
1231ad8b1aafSjsg
live_rps_dynamic(void * arg)1232ad8b1aafSjsg int live_rps_dynamic(void *arg)
1233ad8b1aafSjsg {
1234ad8b1aafSjsg struct intel_gt *gt = arg;
1235ad8b1aafSjsg struct intel_rps *rps = >->rps;
1236ad8b1aafSjsg struct intel_engine_cs *engine;
1237ad8b1aafSjsg enum intel_engine_id id;
1238ad8b1aafSjsg struct igt_spinner spin;
1239ad8b1aafSjsg int err = 0;
1240ad8b1aafSjsg
1241ad8b1aafSjsg /*
1242ad8b1aafSjsg * We've looked at the bascs, and have established that we
1243ad8b1aafSjsg * can change the clock frequency and that the HW will generate
1244ad8b1aafSjsg * interrupts based on load. Now we check how we integrate those
1245ad8b1aafSjsg * moving parts into dynamic reclocking based on load.
1246ad8b1aafSjsg */
1247ad8b1aafSjsg
12485ca02815Sjsg if (!intel_rps_is_enabled(rps) || GRAPHICS_VER(gt->i915) < 6)
1249ad8b1aafSjsg return 0;
1250ad8b1aafSjsg
1251ad8b1aafSjsg if (igt_spinner_init(&spin, gt))
1252ad8b1aafSjsg return -ENOMEM;
1253ad8b1aafSjsg
1254ad8b1aafSjsg if (intel_rps_has_interrupts(rps))
1255ad8b1aafSjsg pr_info("RPS has interrupt support\n");
1256ad8b1aafSjsg if (intel_rps_uses_timer(rps))
1257ad8b1aafSjsg pr_info("RPS has timer support\n");
1258ad8b1aafSjsg
1259ad8b1aafSjsg for_each_engine(engine, gt, id) {
1260ad8b1aafSjsg struct i915_request *rq;
1261ad8b1aafSjsg struct {
1262ad8b1aafSjsg ktime_t dt;
1263ad8b1aafSjsg u8 freq;
1264ad8b1aafSjsg } min, max;
1265ad8b1aafSjsg
1266ad8b1aafSjsg if (!intel_engine_can_store_dword(engine))
1267ad8b1aafSjsg continue;
1268ad8b1aafSjsg
1269ad8b1aafSjsg intel_gt_pm_wait_for_idle(gt);
1270ad8b1aafSjsg GEM_BUG_ON(intel_rps_is_active(rps));
1271ad8b1aafSjsg rps->cur_freq = rps->min_freq;
1272ad8b1aafSjsg
1273ad8b1aafSjsg intel_engine_pm_get(engine);
1274ad8b1aafSjsg intel_rc6_disable(>->rc6);
1275ad8b1aafSjsg GEM_BUG_ON(rps->last_freq != rps->min_freq);
1276ad8b1aafSjsg
1277ad8b1aafSjsg rq = igt_spinner_create_request(&spin,
1278ad8b1aafSjsg engine->kernel_context,
1279ad8b1aafSjsg MI_NOOP);
1280ad8b1aafSjsg if (IS_ERR(rq)) {
1281ad8b1aafSjsg err = PTR_ERR(rq);
1282ad8b1aafSjsg goto err;
1283ad8b1aafSjsg }
1284ad8b1aafSjsg
1285ad8b1aafSjsg i915_request_add(rq);
1286ad8b1aafSjsg
1287ad8b1aafSjsg max.dt = ktime_get();
1288ad8b1aafSjsg max.freq = wait_for_freq(rps, rps->max_freq, 500);
1289ad8b1aafSjsg max.dt = ktime_sub(ktime_get(), max.dt);
1290ad8b1aafSjsg
1291ad8b1aafSjsg igt_spinner_end(&spin);
1292ad8b1aafSjsg
1293ad8b1aafSjsg min.dt = ktime_get();
1294ad8b1aafSjsg min.freq = wait_for_freq(rps, rps->min_freq, 2000);
1295ad8b1aafSjsg min.dt = ktime_sub(ktime_get(), min.dt);
1296ad8b1aafSjsg
1297ad8b1aafSjsg pr_info("%s: dynamically reclocked to %u:%uMHz while busy in %lluns, and %u:%uMHz while idle in %lluns\n",
1298ad8b1aafSjsg engine->name,
1299ad8b1aafSjsg max.freq, intel_gpu_freq(rps, max.freq),
1300ad8b1aafSjsg ktime_to_ns(max.dt),
1301ad8b1aafSjsg min.freq, intel_gpu_freq(rps, min.freq),
1302ad8b1aafSjsg ktime_to_ns(min.dt));
1303ad8b1aafSjsg if (min.freq >= max.freq) {
1304ad8b1aafSjsg pr_err("%s: dynamic reclocking of spinner failed\n!",
1305ad8b1aafSjsg engine->name);
1306ad8b1aafSjsg err = -EINVAL;
1307ad8b1aafSjsg }
1308ad8b1aafSjsg
1309ad8b1aafSjsg err:
1310ad8b1aafSjsg intel_rc6_enable(>->rc6);
1311ad8b1aafSjsg intel_engine_pm_put(engine);
1312ad8b1aafSjsg
1313ad8b1aafSjsg if (igt_flush_test(gt->i915))
1314ad8b1aafSjsg err = -EIO;
1315ad8b1aafSjsg if (err)
1316ad8b1aafSjsg break;
1317ad8b1aafSjsg }
1318ad8b1aafSjsg
1319ad8b1aafSjsg igt_spinner_fini(&spin);
1320ad8b1aafSjsg
1321ad8b1aafSjsg return err;
1322ad8b1aafSjsg }
1323