xref: /openbsd-src/sys/dev/pci/drm/i915/gt/intel_rps.c (revision c1a45aed656e7d5627c30c92421893a76f370ccb)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2019 Intel Corporation
4  */
5 
6 #include <drm/i915_drm.h>
7 
8 #include "i915_drv.h"
9 #include "intel_breadcrumbs.h"
10 #include "intel_gt.h"
11 #include "intel_gt_clock_utils.h"
12 #include "intel_gt_irq.h"
13 #include "intel_gt_pm_irq.h"
14 #include "intel_rps.h"
15 #include "intel_sideband.h"
16 #ifdef __linux__
17 #include "../../../platform/x86/intel_ips.h"
18 #endif
19 
20 #define BUSY_MAX_EI	20u /* ms */
21 
22 /*
23  * Lock protecting IPS related data structures
24  */
25 static DEFINE_SPINLOCK(mchdev_lock);
26 
27 static struct intel_gt *rps_to_gt(struct intel_rps *rps)
28 {
29 	return container_of(rps, struct intel_gt, rps);
30 }
31 
32 static struct drm_i915_private *rps_to_i915(struct intel_rps *rps)
33 {
34 	return rps_to_gt(rps)->i915;
35 }
36 
37 static struct intel_uncore *rps_to_uncore(struct intel_rps *rps)
38 {
39 	return rps_to_gt(rps)->uncore;
40 }
41 
42 static struct intel_guc_slpc *rps_to_slpc(struct intel_rps *rps)
43 {
44 	struct intel_gt *gt = rps_to_gt(rps);
45 
46 	return &gt->uc.guc.slpc;
47 }
48 
49 static bool rps_uses_slpc(struct intel_rps *rps)
50 {
51 	struct intel_gt *gt = rps_to_gt(rps);
52 
53 	return intel_uc_uses_guc_slpc(&gt->uc);
54 }
55 
56 static u32 rps_pm_sanitize_mask(struct intel_rps *rps, u32 mask)
57 {
58 	return mask & ~rps->pm_intrmsk_mbz;
59 }
60 
61 static void set(struct intel_uncore *uncore, i915_reg_t reg, u32 val)
62 {
63 	intel_uncore_write_fw(uncore, reg, val);
64 }
65 
66 static void rps_timer(void *arg)
67 {
68 	struct intel_rps *rps = arg;
69 	struct intel_engine_cs *engine;
70 	ktime_t dt, last, timestamp;
71 	enum intel_engine_id id;
72 	s64 max_busy[3] = {};
73 
74 	timestamp = 0;
75 	for_each_engine(engine, rps_to_gt(rps), id) {
76 		s64 busy;
77 		int i;
78 
79 		dt = intel_engine_get_busy_time(engine, &timestamp);
80 		last = engine->stats.rps;
81 		engine->stats.rps = dt;
82 
83 		busy = ktime_to_ns(ktime_sub(dt, last));
84 		for (i = 0; i < ARRAY_SIZE(max_busy); i++) {
85 			if (busy > max_busy[i])
86 				swap(busy, max_busy[i]);
87 		}
88 	}
89 	last = rps->pm_timestamp;
90 	rps->pm_timestamp = timestamp;
91 
92 	if (intel_rps_is_active(rps)) {
93 		s64 busy;
94 		int i;
95 
96 		dt = ktime_sub(timestamp, last);
97 
98 		/*
99 		 * Our goal is to evaluate each engine independently, so we run
100 		 * at the lowest clocks required to sustain the heaviest
101 		 * workload. However, a task may be split into sequential
102 		 * dependent operations across a set of engines, such that
103 		 * the independent contributions do not account for high load,
104 		 * but overall the task is GPU bound. For example, consider
105 		 * video decode on vcs followed by colour post-processing
106 		 * on vecs, followed by general post-processing on rcs.
107 		 * Since multi-engines being active does imply a single
108 		 * continuous workload across all engines, we hedge our
109 		 * bets by only contributing a factor of the distributed
110 		 * load into our busyness calculation.
111 		 */
112 		busy = max_busy[0];
113 		for (i = 1; i < ARRAY_SIZE(max_busy); i++) {
114 			if (!max_busy[i])
115 				break;
116 
117 			busy += div_u64(max_busy[i], 1 << i);
118 		}
119 		GT_TRACE(rps_to_gt(rps),
120 			 "busy:%lld [%d%%], max:[%lld, %lld, %lld], interval:%d\n",
121 			 busy, (int)div64_u64(100 * busy, dt),
122 			 max_busy[0], max_busy[1], max_busy[2],
123 			 rps->pm_interval);
124 
125 		if (100 * busy > rps->power.up_threshold * dt &&
126 		    rps->cur_freq < rps->max_freq_softlimit) {
127 			rps->pm_iir |= GEN6_PM_RP_UP_THRESHOLD;
128 			rps->pm_interval = 1;
129 			schedule_work(&rps->work);
130 		} else if (100 * busy < rps->power.down_threshold * dt &&
131 			   rps->cur_freq > rps->min_freq_softlimit) {
132 			rps->pm_iir |= GEN6_PM_RP_DOWN_THRESHOLD;
133 			rps->pm_interval = 1;
134 			schedule_work(&rps->work);
135 		} else {
136 			rps->last_adj = 0;
137 		}
138 
139 		mod_timer(&rps->timer,
140 			  jiffies + msecs_to_jiffies(rps->pm_interval));
141 		rps->pm_interval = min(rps->pm_interval * 2, BUSY_MAX_EI);
142 	}
143 }
144 
145 static void rps_start_timer(struct intel_rps *rps)
146 {
147 	rps->pm_timestamp = ktime_sub(ktime_get(), rps->pm_timestamp);
148 	rps->pm_interval = 1;
149 	mod_timer(&rps->timer, jiffies + 1);
150 }
151 
152 static void rps_stop_timer(struct intel_rps *rps)
153 {
154 	del_timer_sync(&rps->timer);
155 	rps->pm_timestamp = ktime_sub(ktime_get(), rps->pm_timestamp);
156 	cancel_work_sync(&rps->work);
157 }
158 
159 static u32 rps_pm_mask(struct intel_rps *rps, u8 val)
160 {
161 	u32 mask = 0;
162 
163 	/* We use UP_EI_EXPIRED interrupts for both up/down in manual mode */
164 	if (val > rps->min_freq_softlimit)
165 		mask |= (GEN6_PM_RP_UP_EI_EXPIRED |
166 			 GEN6_PM_RP_DOWN_THRESHOLD |
167 			 GEN6_PM_RP_DOWN_TIMEOUT);
168 
169 	if (val < rps->max_freq_softlimit)
170 		mask |= GEN6_PM_RP_UP_EI_EXPIRED | GEN6_PM_RP_UP_THRESHOLD;
171 
172 	mask &= rps->pm_events;
173 
174 	return rps_pm_sanitize_mask(rps, ~mask);
175 }
176 
177 static void rps_reset_ei(struct intel_rps *rps)
178 {
179 	memset(&rps->ei, 0, sizeof(rps->ei));
180 }
181 
182 static void rps_enable_interrupts(struct intel_rps *rps)
183 {
184 	struct intel_gt *gt = rps_to_gt(rps);
185 
186 	GEM_BUG_ON(rps_uses_slpc(rps));
187 
188 	GT_TRACE(gt, "interrupts:on rps->pm_events: %x, rps_pm_mask:%x\n",
189 		 rps->pm_events, rps_pm_mask(rps, rps->last_freq));
190 
191 	rps_reset_ei(rps);
192 
193 	spin_lock_irq(&gt->irq_lock);
194 	gen6_gt_pm_enable_irq(gt, rps->pm_events);
195 	spin_unlock_irq(&gt->irq_lock);
196 
197 	intel_uncore_write(gt->uncore,
198 			   GEN6_PMINTRMSK, rps_pm_mask(rps, rps->last_freq));
199 }
200 
201 static void gen6_rps_reset_interrupts(struct intel_rps *rps)
202 {
203 	gen6_gt_pm_reset_iir(rps_to_gt(rps), GEN6_PM_RPS_EVENTS);
204 }
205 
206 static void gen11_rps_reset_interrupts(struct intel_rps *rps)
207 {
208 	while (gen11_gt_reset_one_iir(rps_to_gt(rps), 0, GEN11_GTPM))
209 		;
210 }
211 
212 static void rps_reset_interrupts(struct intel_rps *rps)
213 {
214 	struct intel_gt *gt = rps_to_gt(rps);
215 
216 	spin_lock_irq(&gt->irq_lock);
217 	if (GRAPHICS_VER(gt->i915) >= 11)
218 		gen11_rps_reset_interrupts(rps);
219 	else
220 		gen6_rps_reset_interrupts(rps);
221 
222 	rps->pm_iir = 0;
223 	spin_unlock_irq(&gt->irq_lock);
224 }
225 
226 static void rps_disable_interrupts(struct intel_rps *rps)
227 {
228 	struct intel_gt *gt = rps_to_gt(rps);
229 
230 	intel_uncore_write(gt->uncore,
231 			   GEN6_PMINTRMSK, rps_pm_sanitize_mask(rps, ~0u));
232 
233 	spin_lock_irq(&gt->irq_lock);
234 	gen6_gt_pm_disable_irq(gt, GEN6_PM_RPS_EVENTS);
235 	spin_unlock_irq(&gt->irq_lock);
236 
237 	intel_synchronize_irq(gt->i915);
238 
239 	/*
240 	 * Now that we will not be generating any more work, flush any
241 	 * outstanding tasks. As we are called on the RPS idle path,
242 	 * we will reset the GPU to minimum frequencies, so the current
243 	 * state of the worker can be discarded.
244 	 */
245 	cancel_work_sync(&rps->work);
246 
247 	rps_reset_interrupts(rps);
248 	GT_TRACE(gt, "interrupts:off\n");
249 }
250 
251 static const struct cparams {
252 	u16 i;
253 	u16 t;
254 	u16 m;
255 	u16 c;
256 } cparams[] = {
257 	{ 1, 1333, 301, 28664 },
258 	{ 1, 1066, 294, 24460 },
259 	{ 1, 800, 294, 25192 },
260 	{ 0, 1333, 276, 27605 },
261 	{ 0, 1066, 276, 27605 },
262 	{ 0, 800, 231, 23784 },
263 };
264 
265 static void gen5_rps_init(struct intel_rps *rps)
266 {
267 	struct drm_i915_private *i915 = rps_to_i915(rps);
268 	struct intel_uncore *uncore = rps_to_uncore(rps);
269 	u8 fmax, fmin, fstart;
270 	u32 rgvmodectl;
271 	int c_m, i;
272 
273 	if (i915->fsb_freq <= 3200)
274 		c_m = 0;
275 	else if (i915->fsb_freq <= 4800)
276 		c_m = 1;
277 	else
278 		c_m = 2;
279 
280 	for (i = 0; i < ARRAY_SIZE(cparams); i++) {
281 		if (cparams[i].i == c_m && cparams[i].t == i915->mem_freq) {
282 			rps->ips.m = cparams[i].m;
283 			rps->ips.c = cparams[i].c;
284 			break;
285 		}
286 	}
287 
288 	rgvmodectl = intel_uncore_read(uncore, MEMMODECTL);
289 
290 	/* Set up min, max, and cur for interrupt handling */
291 	fmax = (rgvmodectl & MEMMODE_FMAX_MASK) >> MEMMODE_FMAX_SHIFT;
292 	fmin = (rgvmodectl & MEMMODE_FMIN_MASK);
293 	fstart = (rgvmodectl & MEMMODE_FSTART_MASK) >>
294 		MEMMODE_FSTART_SHIFT;
295 	drm_dbg(&i915->drm, "fmax: %d, fmin: %d, fstart: %d\n",
296 		fmax, fmin, fstart);
297 
298 	rps->min_freq = fmax;
299 	rps->efficient_freq = fstart;
300 	rps->max_freq = fmin;
301 }
302 
303 static unsigned long
304 __ips_chipset_val(struct intel_ips *ips)
305 {
306 	struct intel_uncore *uncore =
307 		rps_to_uncore(container_of(ips, struct intel_rps, ips));
308 	unsigned long now = jiffies_to_msecs(jiffies), dt;
309 	unsigned long result;
310 	u64 total, delta;
311 
312 	lockdep_assert_held(&mchdev_lock);
313 
314 	/*
315 	 * Prevent division-by-zero if we are asking too fast.
316 	 * Also, we don't get interesting results if we are polling
317 	 * faster than once in 10ms, so just return the saved value
318 	 * in such cases.
319 	 */
320 	dt = now - ips->last_time1;
321 	if (dt <= 10)
322 		return ips->chipset_power;
323 
324 	/* FIXME: handle per-counter overflow */
325 	total = intel_uncore_read(uncore, DMIEC);
326 	total += intel_uncore_read(uncore, DDREC);
327 	total += intel_uncore_read(uncore, CSIEC);
328 
329 	delta = total - ips->last_count1;
330 
331 	result = div_u64(div_u64(ips->m * delta, dt) + ips->c, 10);
332 
333 	ips->last_count1 = total;
334 	ips->last_time1 = now;
335 
336 	ips->chipset_power = result;
337 
338 	return result;
339 }
340 
341 static unsigned long ips_mch_val(struct intel_uncore *uncore)
342 {
343 	unsigned int m, x, b;
344 	u32 tsfs;
345 
346 	tsfs = intel_uncore_read(uncore, TSFS);
347 	x = intel_uncore_read8(uncore, TR1);
348 
349 	b = tsfs & TSFS_INTR_MASK;
350 	m = (tsfs & TSFS_SLOPE_MASK) >> TSFS_SLOPE_SHIFT;
351 
352 	return m * x / 127 - b;
353 }
354 
355 static int _pxvid_to_vd(u8 pxvid)
356 {
357 	if (pxvid == 0)
358 		return 0;
359 
360 	if (pxvid >= 8 && pxvid < 31)
361 		pxvid = 31;
362 
363 	return (pxvid + 2) * 125;
364 }
365 
366 static u32 pvid_to_extvid(struct drm_i915_private *i915, u8 pxvid)
367 {
368 	const int vd = _pxvid_to_vd(pxvid);
369 
370 	if (INTEL_INFO(i915)->is_mobile)
371 		return max(vd - 1125, 0);
372 
373 	return vd;
374 }
375 
376 static void __gen5_ips_update(struct intel_ips *ips)
377 {
378 	struct intel_uncore *uncore =
379 		rps_to_uncore(container_of(ips, struct intel_rps, ips));
380 	u64 now, delta, dt;
381 	u32 count;
382 
383 	lockdep_assert_held(&mchdev_lock);
384 
385 	now = ktime_get_raw_ns();
386 	dt = now - ips->last_time2;
387 	do_div(dt, NSEC_PER_MSEC);
388 
389 	/* Don't divide by 0 */
390 	if (dt <= 10)
391 		return;
392 
393 	count = intel_uncore_read(uncore, GFXEC);
394 	delta = count - ips->last_count2;
395 
396 	ips->last_count2 = count;
397 	ips->last_time2 = now;
398 
399 	/* More magic constants... */
400 	ips->gfx_power = div_u64(delta * 1181, dt * 10);
401 }
402 
403 static void gen5_rps_update(struct intel_rps *rps)
404 {
405 	spin_lock_irq(&mchdev_lock);
406 	__gen5_ips_update(&rps->ips);
407 	spin_unlock_irq(&mchdev_lock);
408 }
409 
410 static unsigned int gen5_invert_freq(struct intel_rps *rps,
411 				     unsigned int val)
412 {
413 	/* Invert the frequency bin into an ips delay */
414 	val = rps->max_freq - val;
415 	val = rps->min_freq + val;
416 
417 	return val;
418 }
419 
420 static int __gen5_rps_set(struct intel_rps *rps, u8 val)
421 {
422 	struct intel_uncore *uncore = rps_to_uncore(rps);
423 	u16 rgvswctl;
424 
425 	lockdep_assert_held(&mchdev_lock);
426 
427 	rgvswctl = intel_uncore_read16(uncore, MEMSWCTL);
428 	if (rgvswctl & MEMCTL_CMD_STS) {
429 		DRM_DEBUG("gpu busy, RCS change rejected\n");
430 		return -EBUSY; /* still busy with another command */
431 	}
432 
433 	/* Invert the frequency bin into an ips delay */
434 	val = gen5_invert_freq(rps, val);
435 
436 	rgvswctl =
437 		(MEMCTL_CMD_CHFREQ << MEMCTL_CMD_SHIFT) |
438 		(val << MEMCTL_FREQ_SHIFT) |
439 		MEMCTL_SFCAVM;
440 	intel_uncore_write16(uncore, MEMSWCTL, rgvswctl);
441 	intel_uncore_posting_read16(uncore, MEMSWCTL);
442 
443 	rgvswctl |= MEMCTL_CMD_STS;
444 	intel_uncore_write16(uncore, MEMSWCTL, rgvswctl);
445 
446 	return 0;
447 }
448 
449 static int gen5_rps_set(struct intel_rps *rps, u8 val)
450 {
451 	int err;
452 
453 	spin_lock_irq(&mchdev_lock);
454 	err = __gen5_rps_set(rps, val);
455 	spin_unlock_irq(&mchdev_lock);
456 
457 	return err;
458 }
459 
460 static unsigned long intel_pxfreq(u32 vidfreq)
461 {
462 	int div = (vidfreq & 0x3f0000) >> 16;
463 	int post = (vidfreq & 0x3000) >> 12;
464 	int pre = (vidfreq & 0x7);
465 
466 	if (!pre)
467 		return 0;
468 
469 	return div * 133333 / (pre << post);
470 }
471 
472 static unsigned int init_emon(struct intel_uncore *uncore)
473 {
474 	u8 pxw[16];
475 	int i;
476 
477 	/* Disable to program */
478 	intel_uncore_write(uncore, ECR, 0);
479 	intel_uncore_posting_read(uncore, ECR);
480 
481 	/* Program energy weights for various events */
482 	intel_uncore_write(uncore, SDEW, 0x15040d00);
483 	intel_uncore_write(uncore, CSIEW0, 0x007f0000);
484 	intel_uncore_write(uncore, CSIEW1, 0x1e220004);
485 	intel_uncore_write(uncore, CSIEW2, 0x04000004);
486 
487 	for (i = 0; i < 5; i++)
488 		intel_uncore_write(uncore, PEW(i), 0);
489 	for (i = 0; i < 3; i++)
490 		intel_uncore_write(uncore, DEW(i), 0);
491 
492 	/* Program P-state weights to account for frequency power adjustment */
493 	for (i = 0; i < 16; i++) {
494 		u32 pxvidfreq = intel_uncore_read(uncore, PXVFREQ(i));
495 		unsigned int freq = intel_pxfreq(pxvidfreq);
496 		unsigned int vid =
497 			(pxvidfreq & PXVFREQ_PX_MASK) >> PXVFREQ_PX_SHIFT;
498 		unsigned int val;
499 
500 		val = vid * vid * freq / 1000 * 255;
501 		val /= 127 * 127 * 900;
502 
503 		pxw[i] = val;
504 	}
505 	/* Render standby states get 0 weight */
506 	pxw[14] = 0;
507 	pxw[15] = 0;
508 
509 	for (i = 0; i < 4; i++) {
510 		intel_uncore_write(uncore, PXW(i),
511 				   pxw[i * 4 + 0] << 24 |
512 				   pxw[i * 4 + 1] << 16 |
513 				   pxw[i * 4 + 2] <<  8 |
514 				   pxw[i * 4 + 3] <<  0);
515 	}
516 
517 	/* Adjust magic regs to magic values (more experimental results) */
518 	intel_uncore_write(uncore, OGW0, 0);
519 	intel_uncore_write(uncore, OGW1, 0);
520 	intel_uncore_write(uncore, EG0, 0x00007f00);
521 	intel_uncore_write(uncore, EG1, 0x0000000e);
522 	intel_uncore_write(uncore, EG2, 0x000e0000);
523 	intel_uncore_write(uncore, EG3, 0x68000300);
524 	intel_uncore_write(uncore, EG4, 0x42000000);
525 	intel_uncore_write(uncore, EG5, 0x00140031);
526 	intel_uncore_write(uncore, EG6, 0);
527 	intel_uncore_write(uncore, EG7, 0);
528 
529 	for (i = 0; i < 8; i++)
530 		intel_uncore_write(uncore, PXWL(i), 0);
531 
532 	/* Enable PMON + select events */
533 	intel_uncore_write(uncore, ECR, 0x80000019);
534 
535 	return intel_uncore_read(uncore, LCFUSE02) & LCFUSE_HIV_MASK;
536 }
537 
538 static bool gen5_rps_enable(struct intel_rps *rps)
539 {
540 	struct drm_i915_private *i915 = rps_to_i915(rps);
541 	struct intel_uncore *uncore = rps_to_uncore(rps);
542 	u8 fstart, vstart;
543 	u32 rgvmodectl;
544 
545 	spin_lock_irq(&mchdev_lock);
546 
547 	rgvmodectl = intel_uncore_read(uncore, MEMMODECTL);
548 
549 	/* Enable temp reporting */
550 	intel_uncore_write16(uncore, PMMISC,
551 			     intel_uncore_read16(uncore, PMMISC) | MCPPCE_EN);
552 	intel_uncore_write16(uncore, TSC1,
553 			     intel_uncore_read16(uncore, TSC1) | TSE);
554 
555 	/* 100ms RC evaluation intervals */
556 	intel_uncore_write(uncore, RCUPEI, 100000);
557 	intel_uncore_write(uncore, RCDNEI, 100000);
558 
559 	/* Set max/min thresholds to 90ms and 80ms respectively */
560 	intel_uncore_write(uncore, RCBMAXAVG, 90000);
561 	intel_uncore_write(uncore, RCBMINAVG, 80000);
562 
563 	intel_uncore_write(uncore, MEMIHYST, 1);
564 
565 	/* Set up min, max, and cur for interrupt handling */
566 	fstart = (rgvmodectl & MEMMODE_FSTART_MASK) >>
567 		MEMMODE_FSTART_SHIFT;
568 
569 	vstart = (intel_uncore_read(uncore, PXVFREQ(fstart)) &
570 		  PXVFREQ_PX_MASK) >> PXVFREQ_PX_SHIFT;
571 
572 	intel_uncore_write(uncore,
573 			   MEMINTREN,
574 			   MEMINT_CX_SUPR_EN | MEMINT_EVAL_CHG_EN);
575 
576 	intel_uncore_write(uncore, VIDSTART, vstart);
577 	intel_uncore_posting_read(uncore, VIDSTART);
578 
579 	rgvmodectl |= MEMMODE_SWMODE_EN;
580 	intel_uncore_write(uncore, MEMMODECTL, rgvmodectl);
581 
582 	if (wait_for_atomic((intel_uncore_read(uncore, MEMSWCTL) &
583 			     MEMCTL_CMD_STS) == 0, 10))
584 		drm_err(&uncore->i915->drm,
585 			"stuck trying to change perf mode\n");
586 	mdelay(1);
587 
588 	__gen5_rps_set(rps, rps->cur_freq);
589 
590 	rps->ips.last_count1 = intel_uncore_read(uncore, DMIEC);
591 	rps->ips.last_count1 += intel_uncore_read(uncore, DDREC);
592 	rps->ips.last_count1 += intel_uncore_read(uncore, CSIEC);
593 	rps->ips.last_time1 = jiffies_to_msecs(jiffies);
594 
595 	rps->ips.last_count2 = intel_uncore_read(uncore, GFXEC);
596 	rps->ips.last_time2 = ktime_get_raw_ns();
597 
598 	spin_lock(&i915->irq_lock);
599 	ilk_enable_display_irq(i915, DE_PCU_EVENT);
600 	spin_unlock(&i915->irq_lock);
601 
602 	spin_unlock_irq(&mchdev_lock);
603 
604 	rps->ips.corr = init_emon(uncore);
605 
606 	return true;
607 }
608 
609 static void gen5_rps_disable(struct intel_rps *rps)
610 {
611 	struct drm_i915_private *i915 = rps_to_i915(rps);
612 	struct intel_uncore *uncore = rps_to_uncore(rps);
613 	u16 rgvswctl;
614 
615 	spin_lock_irq(&mchdev_lock);
616 
617 	spin_lock(&i915->irq_lock);
618 	ilk_disable_display_irq(i915, DE_PCU_EVENT);
619 	spin_unlock(&i915->irq_lock);
620 
621 	rgvswctl = intel_uncore_read16(uncore, MEMSWCTL);
622 
623 	/* Ack interrupts, disable EFC interrupt */
624 	intel_uncore_write(uncore, MEMINTREN,
625 			   intel_uncore_read(uncore, MEMINTREN) &
626 			   ~MEMINT_EVAL_CHG_EN);
627 	intel_uncore_write(uncore, MEMINTRSTS, MEMINT_EVAL_CHG);
628 
629 	/* Go back to the starting frequency */
630 	__gen5_rps_set(rps, rps->idle_freq);
631 	mdelay(1);
632 	rgvswctl |= MEMCTL_CMD_STS;
633 	intel_uncore_write(uncore, MEMSWCTL, rgvswctl);
634 	mdelay(1);
635 
636 	spin_unlock_irq(&mchdev_lock);
637 }
638 
639 static u32 rps_limits(struct intel_rps *rps, u8 val)
640 {
641 	u32 limits;
642 
643 	/*
644 	 * Only set the down limit when we've reached the lowest level to avoid
645 	 * getting more interrupts, otherwise leave this clear. This prevents a
646 	 * race in the hw when coming out of rc6: There's a tiny window where
647 	 * the hw runs at the minimal clock before selecting the desired
648 	 * frequency, if the down threshold expires in that window we will not
649 	 * receive a down interrupt.
650 	 */
651 	if (GRAPHICS_VER(rps_to_i915(rps)) >= 9) {
652 		limits = rps->max_freq_softlimit << 23;
653 		if (val <= rps->min_freq_softlimit)
654 			limits |= rps->min_freq_softlimit << 14;
655 	} else {
656 		limits = rps->max_freq_softlimit << 24;
657 		if (val <= rps->min_freq_softlimit)
658 			limits |= rps->min_freq_softlimit << 16;
659 	}
660 
661 	return limits;
662 }
663 
664 static void rps_set_power(struct intel_rps *rps, int new_power)
665 {
666 	struct intel_gt *gt = rps_to_gt(rps);
667 	struct intel_uncore *uncore = gt->uncore;
668 	u32 threshold_up = 0, threshold_down = 0; /* in % */
669 	u32 ei_up = 0, ei_down = 0;
670 
671 	lockdep_assert_held(&rps->power.mutex);
672 
673 	if (new_power == rps->power.mode)
674 		return;
675 
676 	threshold_up = 95;
677 	threshold_down = 85;
678 
679 	/* Note the units here are not exactly 1us, but 1280ns. */
680 	switch (new_power) {
681 	case LOW_POWER:
682 		ei_up = 16000;
683 		ei_down = 32000;
684 		break;
685 
686 	case BETWEEN:
687 		ei_up = 13000;
688 		ei_down = 32000;
689 		break;
690 
691 	case HIGH_POWER:
692 		ei_up = 10000;
693 		ei_down = 32000;
694 		break;
695 	}
696 
697 	/* When byt can survive without system hang with dynamic
698 	 * sw freq adjustments, this restriction can be lifted.
699 	 */
700 	if (IS_VALLEYVIEW(gt->i915))
701 		goto skip_hw_write;
702 
703 	GT_TRACE(gt,
704 		 "changing power mode [%d], up %d%% @ %dus, down %d%% @ %dus\n",
705 		 new_power, threshold_up, ei_up, threshold_down, ei_down);
706 
707 	set(uncore, GEN6_RP_UP_EI,
708 	    intel_gt_ns_to_pm_interval(gt, ei_up * 1000));
709 	set(uncore, GEN6_RP_UP_THRESHOLD,
710 	    intel_gt_ns_to_pm_interval(gt, ei_up * threshold_up * 10));
711 
712 	set(uncore, GEN6_RP_DOWN_EI,
713 	    intel_gt_ns_to_pm_interval(gt, ei_down * 1000));
714 	set(uncore, GEN6_RP_DOWN_THRESHOLD,
715 	    intel_gt_ns_to_pm_interval(gt, ei_down * threshold_down * 10));
716 
717 	set(uncore, GEN6_RP_CONTROL,
718 	    (GRAPHICS_VER(gt->i915) > 9 ? 0 : GEN6_RP_MEDIA_TURBO) |
719 	    GEN6_RP_MEDIA_HW_NORMAL_MODE |
720 	    GEN6_RP_MEDIA_IS_GFX |
721 	    GEN6_RP_ENABLE |
722 	    GEN6_RP_UP_BUSY_AVG |
723 	    GEN6_RP_DOWN_IDLE_AVG);
724 
725 skip_hw_write:
726 	rps->power.mode = new_power;
727 	rps->power.up_threshold = threshold_up;
728 	rps->power.down_threshold = threshold_down;
729 }
730 
731 static void gen6_rps_set_thresholds(struct intel_rps *rps, u8 val)
732 {
733 	int new_power;
734 
735 	new_power = rps->power.mode;
736 	switch (rps->power.mode) {
737 	case LOW_POWER:
738 		if (val > rps->efficient_freq + 1 &&
739 		    val > rps->cur_freq)
740 			new_power = BETWEEN;
741 		break;
742 
743 	case BETWEEN:
744 		if (val <= rps->efficient_freq &&
745 		    val < rps->cur_freq)
746 			new_power = LOW_POWER;
747 		else if (val >= rps->rp0_freq &&
748 			 val > rps->cur_freq)
749 			new_power = HIGH_POWER;
750 		break;
751 
752 	case HIGH_POWER:
753 		if (val < (rps->rp1_freq + rps->rp0_freq) >> 1 &&
754 		    val < rps->cur_freq)
755 			new_power = BETWEEN;
756 		break;
757 	}
758 	/* Max/min bins are special */
759 	if (val <= rps->min_freq_softlimit)
760 		new_power = LOW_POWER;
761 	if (val >= rps->max_freq_softlimit)
762 		new_power = HIGH_POWER;
763 
764 	mutex_lock(&rps->power.mutex);
765 	if (rps->power.interactive)
766 		new_power = HIGH_POWER;
767 	rps_set_power(rps, new_power);
768 	mutex_unlock(&rps->power.mutex);
769 }
770 
771 void intel_rps_mark_interactive(struct intel_rps *rps, bool interactive)
772 {
773 	GT_TRACE(rps_to_gt(rps), "mark interactive: %s\n", yesno(interactive));
774 
775 	mutex_lock(&rps->power.mutex);
776 	if (interactive) {
777 		if (!rps->power.interactive++ && intel_rps_is_active(rps))
778 			rps_set_power(rps, HIGH_POWER);
779 	} else {
780 		GEM_BUG_ON(!rps->power.interactive);
781 		rps->power.interactive--;
782 	}
783 	mutex_unlock(&rps->power.mutex);
784 }
785 
786 static int gen6_rps_set(struct intel_rps *rps, u8 val)
787 {
788 	struct intel_uncore *uncore = rps_to_uncore(rps);
789 	struct drm_i915_private *i915 = rps_to_i915(rps);
790 	u32 swreq;
791 
792 	GEM_BUG_ON(rps_uses_slpc(rps));
793 
794 	if (GRAPHICS_VER(i915) >= 9)
795 		swreq = GEN9_FREQUENCY(val);
796 	else if (IS_HASWELL(i915) || IS_BROADWELL(i915))
797 		swreq = HSW_FREQUENCY(val);
798 	else
799 		swreq = (GEN6_FREQUENCY(val) |
800 			 GEN6_OFFSET(0) |
801 			 GEN6_AGGRESSIVE_TURBO);
802 	set(uncore, GEN6_RPNSWREQ, swreq);
803 
804 	GT_TRACE(rps_to_gt(rps), "set val:%x, freq:%d, swreq:%x\n",
805 		 val, intel_gpu_freq(rps, val), swreq);
806 
807 	return 0;
808 }
809 
810 static int vlv_rps_set(struct intel_rps *rps, u8 val)
811 {
812 	struct drm_i915_private *i915 = rps_to_i915(rps);
813 	int err;
814 
815 	vlv_punit_get(i915);
816 	err = vlv_punit_write(i915, PUNIT_REG_GPU_FREQ_REQ, val);
817 	vlv_punit_put(i915);
818 
819 	GT_TRACE(rps_to_gt(rps), "set val:%x, freq:%d\n",
820 		 val, intel_gpu_freq(rps, val));
821 
822 	return err;
823 }
824 
825 static int rps_set(struct intel_rps *rps, u8 val, bool update)
826 {
827 	struct drm_i915_private *i915 = rps_to_i915(rps);
828 	int err;
829 
830 	if (val == rps->last_freq)
831 		return 0;
832 
833 	if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915))
834 		err = vlv_rps_set(rps, val);
835 	else if (GRAPHICS_VER(i915) >= 6)
836 		err = gen6_rps_set(rps, val);
837 	else
838 		err = gen5_rps_set(rps, val);
839 	if (err)
840 		return err;
841 
842 	if (update && GRAPHICS_VER(i915) >= 6)
843 		gen6_rps_set_thresholds(rps, val);
844 	rps->last_freq = val;
845 
846 	return 0;
847 }
848 
849 void intel_rps_unpark(struct intel_rps *rps)
850 {
851 	if (!intel_rps_is_enabled(rps))
852 		return;
853 
854 	GT_TRACE(rps_to_gt(rps), "unpark:%x\n", rps->cur_freq);
855 
856 	/*
857 	 * Use the user's desired frequency as a guide, but for better
858 	 * performance, jump directly to RPe as our starting frequency.
859 	 */
860 	mutex_lock(&rps->lock);
861 
862 	intel_rps_set_active(rps);
863 	intel_rps_set(rps,
864 		      clamp(rps->cur_freq,
865 			    rps->min_freq_softlimit,
866 			    rps->max_freq_softlimit));
867 
868 	mutex_unlock(&rps->lock);
869 
870 	rps->pm_iir = 0;
871 	if (intel_rps_has_interrupts(rps))
872 		rps_enable_interrupts(rps);
873 	if (intel_rps_uses_timer(rps))
874 		rps_start_timer(rps);
875 
876 	if (GRAPHICS_VER(rps_to_i915(rps)) == 5)
877 		gen5_rps_update(rps);
878 }
879 
880 void intel_rps_park(struct intel_rps *rps)
881 {
882 	int adj;
883 
884 	if (!intel_rps_is_enabled(rps))
885 		return;
886 
887 	if (!intel_rps_clear_active(rps))
888 		return;
889 
890 	if (intel_rps_uses_timer(rps))
891 		rps_stop_timer(rps);
892 	if (intel_rps_has_interrupts(rps))
893 		rps_disable_interrupts(rps);
894 
895 	if (rps->last_freq <= rps->idle_freq)
896 		return;
897 
898 	/*
899 	 * The punit delays the write of the frequency and voltage until it
900 	 * determines the GPU is awake. During normal usage we don't want to
901 	 * waste power changing the frequency if the GPU is sleeping (rc6).
902 	 * However, the GPU and driver is now idle and we do not want to delay
903 	 * switching to minimum voltage (reducing power whilst idle) as we do
904 	 * not expect to be woken in the near future and so must flush the
905 	 * change by waking the device.
906 	 *
907 	 * We choose to take the media powerwell (either would do to trick the
908 	 * punit into committing the voltage change) as that takes a lot less
909 	 * power than the render powerwell.
910 	 */
911 	intel_uncore_forcewake_get(rps_to_uncore(rps), FORCEWAKE_MEDIA);
912 	rps_set(rps, rps->idle_freq, false);
913 	intel_uncore_forcewake_put(rps_to_uncore(rps), FORCEWAKE_MEDIA);
914 
915 	/*
916 	 * Since we will try and restart from the previously requested
917 	 * frequency on unparking, treat this idle point as a downclock
918 	 * interrupt and reduce the frequency for resume. If we park/unpark
919 	 * more frequently than the rps worker can run, we will not respond
920 	 * to any EI and never see a change in frequency.
921 	 *
922 	 * (Note we accommodate Cherryview's limitation of only using an
923 	 * even bin by applying it to all.)
924 	 */
925 	adj = rps->last_adj;
926 	if (adj < 0)
927 		adj *= 2;
928 	else /* CHV needs even encode values */
929 		adj = -2;
930 	rps->last_adj = adj;
931 	rps->cur_freq = max_t(int, rps->cur_freq + adj, rps->min_freq);
932 	if (rps->cur_freq < rps->efficient_freq) {
933 		rps->cur_freq = rps->efficient_freq;
934 		rps->last_adj = 0;
935 	}
936 
937 	GT_TRACE(rps_to_gt(rps), "park:%x\n", rps->cur_freq);
938 }
939 
940 void intel_rps_boost(struct i915_request *rq)
941 {
942 	if (i915_request_signaled(rq) || i915_request_has_waitboost(rq))
943 		return;
944 
945 	/* Serializes with i915_request_retire() */
946 	if (!test_and_set_bit(I915_FENCE_FLAG_BOOST, &rq->fence.flags)) {
947 		struct intel_rps *rps = &READ_ONCE(rq->engine)->gt->rps;
948 
949 		if (atomic_fetch_inc(&rps->num_waiters))
950 			return;
951 
952 		if (!intel_rps_is_active(rps))
953 			return;
954 
955 		GT_TRACE(rps_to_gt(rps), "boost fence:%llx:%llx\n",
956 			 rq->fence.context, rq->fence.seqno);
957 
958 		if (READ_ONCE(rps->cur_freq) < rps->boost_freq)
959 			schedule_work(&rps->work);
960 
961 		WRITE_ONCE(rps->boosts, rps->boosts + 1); /* debug only */
962 	}
963 }
964 
965 int intel_rps_set(struct intel_rps *rps, u8 val)
966 {
967 	int err;
968 
969 	lockdep_assert_held(&rps->lock);
970 	GEM_BUG_ON(val > rps->max_freq);
971 	GEM_BUG_ON(val < rps->min_freq);
972 
973 	if (intel_rps_is_active(rps)) {
974 		err = rps_set(rps, val, true);
975 		if (err)
976 			return err;
977 
978 		/*
979 		 * Make sure we continue to get interrupts
980 		 * until we hit the minimum or maximum frequencies.
981 		 */
982 		if (intel_rps_has_interrupts(rps)) {
983 			struct intel_uncore *uncore = rps_to_uncore(rps);
984 
985 			set(uncore,
986 			    GEN6_RP_INTERRUPT_LIMITS, rps_limits(rps, val));
987 
988 			set(uncore, GEN6_PMINTRMSK, rps_pm_mask(rps, val));
989 		}
990 	}
991 
992 	rps->cur_freq = val;
993 	return 0;
994 }
995 
996 static void gen6_rps_init(struct intel_rps *rps)
997 {
998 	struct drm_i915_private *i915 = rps_to_i915(rps);
999 	struct intel_uncore *uncore = rps_to_uncore(rps);
1000 
1001 	/* All of these values are in units of 50MHz */
1002 
1003 	/* static values from HW: RP0 > RP1 > RPn (min_freq) */
1004 	if (IS_GEN9_LP(i915)) {
1005 		u32 rp_state_cap = intel_uncore_read(uncore, BXT_RP_STATE_CAP);
1006 
1007 		rps->rp0_freq = (rp_state_cap >> 16) & 0xff;
1008 		rps->rp1_freq = (rp_state_cap >>  8) & 0xff;
1009 		rps->min_freq = (rp_state_cap >>  0) & 0xff;
1010 	} else {
1011 		u32 rp_state_cap = intel_uncore_read(uncore, GEN6_RP_STATE_CAP);
1012 
1013 		rps->rp0_freq = (rp_state_cap >>  0) & 0xff;
1014 		rps->rp1_freq = (rp_state_cap >>  8) & 0xff;
1015 		rps->min_freq = (rp_state_cap >> 16) & 0xff;
1016 	}
1017 
1018 	/* hw_max = RP0 until we check for overclocking */
1019 	rps->max_freq = rps->rp0_freq;
1020 
1021 	rps->efficient_freq = rps->rp1_freq;
1022 	if (IS_HASWELL(i915) || IS_BROADWELL(i915) ||
1023 	    IS_GEN9_BC(i915) || GRAPHICS_VER(i915) >= 11) {
1024 		u32 ddcc_status = 0;
1025 
1026 		if (sandybridge_pcode_read(i915,
1027 					   HSW_PCODE_DYNAMIC_DUTY_CYCLE_CONTROL,
1028 					   &ddcc_status, NULL) == 0)
1029 			rps->efficient_freq =
1030 				clamp_t(u8,
1031 					(ddcc_status >> 8) & 0xff,
1032 					rps->min_freq,
1033 					rps->max_freq);
1034 	}
1035 
1036 	if (IS_GEN9_BC(i915) || GRAPHICS_VER(i915) >= 11) {
1037 		/* Store the frequency values in 16.66 MHZ units, which is
1038 		 * the natural hardware unit for SKL
1039 		 */
1040 		rps->rp0_freq *= GEN9_FREQ_SCALER;
1041 		rps->rp1_freq *= GEN9_FREQ_SCALER;
1042 		rps->min_freq *= GEN9_FREQ_SCALER;
1043 		rps->max_freq *= GEN9_FREQ_SCALER;
1044 		rps->efficient_freq *= GEN9_FREQ_SCALER;
1045 	}
1046 }
1047 
1048 static bool rps_reset(struct intel_rps *rps)
1049 {
1050 	struct drm_i915_private *i915 = rps_to_i915(rps);
1051 
1052 	/* force a reset */
1053 	rps->power.mode = -1;
1054 	rps->last_freq = -1;
1055 
1056 	if (rps_set(rps, rps->min_freq, true)) {
1057 		drm_err(&i915->drm, "Failed to reset RPS to initial values\n");
1058 		return false;
1059 	}
1060 
1061 	rps->cur_freq = rps->min_freq;
1062 	return true;
1063 }
1064 
1065 /* See the Gen9_GT_PM_Programming_Guide doc for the below */
1066 static bool gen9_rps_enable(struct intel_rps *rps)
1067 {
1068 	struct intel_gt *gt = rps_to_gt(rps);
1069 	struct intel_uncore *uncore = gt->uncore;
1070 
1071 	/* Program defaults and thresholds for RPS */
1072 	if (GRAPHICS_VER(gt->i915) == 9)
1073 		intel_uncore_write_fw(uncore, GEN6_RC_VIDEO_FREQ,
1074 				      GEN9_FREQUENCY(rps->rp1_freq));
1075 
1076 	intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 0xa);
1077 
1078 	rps->pm_events = GEN6_PM_RP_UP_THRESHOLD | GEN6_PM_RP_DOWN_THRESHOLD;
1079 
1080 	return rps_reset(rps);
1081 }
1082 
1083 static bool gen8_rps_enable(struct intel_rps *rps)
1084 {
1085 	struct intel_uncore *uncore = rps_to_uncore(rps);
1086 
1087 	intel_uncore_write_fw(uncore, GEN6_RC_VIDEO_FREQ,
1088 			      HSW_FREQUENCY(rps->rp1_freq));
1089 
1090 	intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 10);
1091 
1092 	rps->pm_events = GEN6_PM_RP_UP_THRESHOLD | GEN6_PM_RP_DOWN_THRESHOLD;
1093 
1094 	return rps_reset(rps);
1095 }
1096 
1097 static bool gen6_rps_enable(struct intel_rps *rps)
1098 {
1099 	struct intel_uncore *uncore = rps_to_uncore(rps);
1100 
1101 	/* Power down if completely idle for over 50ms */
1102 	intel_uncore_write_fw(uncore, GEN6_RP_DOWN_TIMEOUT, 50000);
1103 	intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 10);
1104 
1105 	rps->pm_events = (GEN6_PM_RP_UP_THRESHOLD |
1106 			  GEN6_PM_RP_DOWN_THRESHOLD |
1107 			  GEN6_PM_RP_DOWN_TIMEOUT);
1108 
1109 	return rps_reset(rps);
1110 }
1111 
1112 static int chv_rps_max_freq(struct intel_rps *rps)
1113 {
1114 	struct drm_i915_private *i915 = rps_to_i915(rps);
1115 	struct intel_gt *gt = rps_to_gt(rps);
1116 	u32 val;
1117 
1118 	val = vlv_punit_read(i915, FB_GFX_FMAX_AT_VMAX_FUSE);
1119 
1120 	switch (gt->info.sseu.eu_total) {
1121 	case 8:
1122 		/* (2 * 4) config */
1123 		val >>= FB_GFX_FMAX_AT_VMAX_2SS4EU_FUSE_SHIFT;
1124 		break;
1125 	case 12:
1126 		/* (2 * 6) config */
1127 		val >>= FB_GFX_FMAX_AT_VMAX_2SS6EU_FUSE_SHIFT;
1128 		break;
1129 	case 16:
1130 		/* (2 * 8) config */
1131 	default:
1132 		/* Setting (2 * 8) Min RP0 for any other combination */
1133 		val >>= FB_GFX_FMAX_AT_VMAX_2SS8EU_FUSE_SHIFT;
1134 		break;
1135 	}
1136 
1137 	return val & FB_GFX_FREQ_FUSE_MASK;
1138 }
1139 
1140 static int chv_rps_rpe_freq(struct intel_rps *rps)
1141 {
1142 	struct drm_i915_private *i915 = rps_to_i915(rps);
1143 	u32 val;
1144 
1145 	val = vlv_punit_read(i915, PUNIT_GPU_DUTYCYCLE_REG);
1146 	val >>= PUNIT_GPU_DUTYCYCLE_RPE_FREQ_SHIFT;
1147 
1148 	return val & PUNIT_GPU_DUTYCYCLE_RPE_FREQ_MASK;
1149 }
1150 
1151 static int chv_rps_guar_freq(struct intel_rps *rps)
1152 {
1153 	struct drm_i915_private *i915 = rps_to_i915(rps);
1154 	u32 val;
1155 
1156 	val = vlv_punit_read(i915, FB_GFX_FMAX_AT_VMAX_FUSE);
1157 
1158 	return val & FB_GFX_FREQ_FUSE_MASK;
1159 }
1160 
1161 static u32 chv_rps_min_freq(struct intel_rps *rps)
1162 {
1163 	struct drm_i915_private *i915 = rps_to_i915(rps);
1164 	u32 val;
1165 
1166 	val = vlv_punit_read(i915, FB_GFX_FMIN_AT_VMIN_FUSE);
1167 	val >>= FB_GFX_FMIN_AT_VMIN_FUSE_SHIFT;
1168 
1169 	return val & FB_GFX_FREQ_FUSE_MASK;
1170 }
1171 
1172 static bool chv_rps_enable(struct intel_rps *rps)
1173 {
1174 	struct intel_uncore *uncore = rps_to_uncore(rps);
1175 	struct drm_i915_private *i915 = rps_to_i915(rps);
1176 	u32 val;
1177 
1178 	/* 1: Program defaults and thresholds for RPS*/
1179 	intel_uncore_write_fw(uncore, GEN6_RP_DOWN_TIMEOUT, 1000000);
1180 	intel_uncore_write_fw(uncore, GEN6_RP_UP_THRESHOLD, 59400);
1181 	intel_uncore_write_fw(uncore, GEN6_RP_DOWN_THRESHOLD, 245000);
1182 	intel_uncore_write_fw(uncore, GEN6_RP_UP_EI, 66000);
1183 	intel_uncore_write_fw(uncore, GEN6_RP_DOWN_EI, 350000);
1184 
1185 	intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 10);
1186 
1187 	/* 2: Enable RPS */
1188 	intel_uncore_write_fw(uncore, GEN6_RP_CONTROL,
1189 			      GEN6_RP_MEDIA_HW_NORMAL_MODE |
1190 			      GEN6_RP_MEDIA_IS_GFX |
1191 			      GEN6_RP_ENABLE |
1192 			      GEN6_RP_UP_BUSY_AVG |
1193 			      GEN6_RP_DOWN_IDLE_AVG);
1194 
1195 	rps->pm_events = (GEN6_PM_RP_UP_THRESHOLD |
1196 			  GEN6_PM_RP_DOWN_THRESHOLD |
1197 			  GEN6_PM_RP_DOWN_TIMEOUT);
1198 
1199 	/* Setting Fixed Bias */
1200 	vlv_punit_get(i915);
1201 
1202 	val = VLV_OVERRIDE_EN | VLV_SOC_TDP_EN | CHV_BIAS_CPU_50_SOC_50;
1203 	vlv_punit_write(i915, VLV_TURBO_SOC_OVERRIDE, val);
1204 
1205 	val = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS);
1206 
1207 	vlv_punit_put(i915);
1208 
1209 	/* RPS code assumes GPLL is used */
1210 	drm_WARN_ONCE(&i915->drm, (val & GPLLENABLE) == 0,
1211 		      "GPLL not enabled\n");
1212 
1213 	drm_dbg(&i915->drm, "GPLL enabled? %s\n", yesno(val & GPLLENABLE));
1214 	drm_dbg(&i915->drm, "GPU status: 0x%08x\n", val);
1215 
1216 	return rps_reset(rps);
1217 }
1218 
1219 static int vlv_rps_guar_freq(struct intel_rps *rps)
1220 {
1221 	struct drm_i915_private *i915 = rps_to_i915(rps);
1222 	u32 val, rp1;
1223 
1224 	val = vlv_nc_read(i915, IOSF_NC_FB_GFX_FREQ_FUSE);
1225 
1226 	rp1 = val & FB_GFX_FGUARANTEED_FREQ_FUSE_MASK;
1227 	rp1 >>= FB_GFX_FGUARANTEED_FREQ_FUSE_SHIFT;
1228 
1229 	return rp1;
1230 }
1231 
1232 static int vlv_rps_max_freq(struct intel_rps *rps)
1233 {
1234 	struct drm_i915_private *i915 = rps_to_i915(rps);
1235 	u32 val, rp0;
1236 
1237 	val = vlv_nc_read(i915, IOSF_NC_FB_GFX_FREQ_FUSE);
1238 
1239 	rp0 = (val & FB_GFX_MAX_FREQ_FUSE_MASK) >> FB_GFX_MAX_FREQ_FUSE_SHIFT;
1240 	/* Clamp to max */
1241 	rp0 = min_t(u32, rp0, 0xea);
1242 
1243 	return rp0;
1244 }
1245 
1246 static int vlv_rps_rpe_freq(struct intel_rps *rps)
1247 {
1248 	struct drm_i915_private *i915 = rps_to_i915(rps);
1249 	u32 val, rpe;
1250 
1251 	val = vlv_nc_read(i915, IOSF_NC_FB_GFX_FMAX_FUSE_LO);
1252 	rpe = (val & FB_FMAX_VMIN_FREQ_LO_MASK) >> FB_FMAX_VMIN_FREQ_LO_SHIFT;
1253 	val = vlv_nc_read(i915, IOSF_NC_FB_GFX_FMAX_FUSE_HI);
1254 	rpe |= (val & FB_FMAX_VMIN_FREQ_HI_MASK) << 5;
1255 
1256 	return rpe;
1257 }
1258 
1259 static int vlv_rps_min_freq(struct intel_rps *rps)
1260 {
1261 	struct drm_i915_private *i915 = rps_to_i915(rps);
1262 	u32 val;
1263 
1264 	val = vlv_punit_read(i915, PUNIT_REG_GPU_LFM) & 0xff;
1265 	/*
1266 	 * According to the BYT Punit GPU turbo HAS 1.1.6.3 the minimum value
1267 	 * for the minimum frequency in GPLL mode is 0xc1. Contrary to this on
1268 	 * a BYT-M B0 the above register contains 0xbf. Moreover when setting
1269 	 * a frequency Punit will not allow values below 0xc0. Clamp it 0xc0
1270 	 * to make sure it matches what Punit accepts.
1271 	 */
1272 	return max_t(u32, val, 0xc0);
1273 }
1274 
1275 static bool vlv_rps_enable(struct intel_rps *rps)
1276 {
1277 	struct intel_uncore *uncore = rps_to_uncore(rps);
1278 	struct drm_i915_private *i915 = rps_to_i915(rps);
1279 	u32 val;
1280 
1281 	intel_uncore_write_fw(uncore, GEN6_RP_DOWN_TIMEOUT, 1000000);
1282 	intel_uncore_write_fw(uncore, GEN6_RP_UP_THRESHOLD, 59400);
1283 	intel_uncore_write_fw(uncore, GEN6_RP_DOWN_THRESHOLD, 245000);
1284 	intel_uncore_write_fw(uncore, GEN6_RP_UP_EI, 66000);
1285 	intel_uncore_write_fw(uncore, GEN6_RP_DOWN_EI, 350000);
1286 
1287 	intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 10);
1288 
1289 	intel_uncore_write_fw(uncore, GEN6_RP_CONTROL,
1290 			      GEN6_RP_MEDIA_TURBO |
1291 			      GEN6_RP_MEDIA_HW_NORMAL_MODE |
1292 			      GEN6_RP_MEDIA_IS_GFX |
1293 			      GEN6_RP_ENABLE |
1294 			      GEN6_RP_UP_BUSY_AVG |
1295 			      GEN6_RP_DOWN_IDLE_CONT);
1296 
1297 	/* WaGsvRC0ResidencyMethod:vlv */
1298 	rps->pm_events = GEN6_PM_RP_UP_EI_EXPIRED;
1299 
1300 	vlv_punit_get(i915);
1301 
1302 	/* Setting Fixed Bias */
1303 	val = VLV_OVERRIDE_EN | VLV_SOC_TDP_EN | VLV_BIAS_CPU_125_SOC_875;
1304 	vlv_punit_write(i915, VLV_TURBO_SOC_OVERRIDE, val);
1305 
1306 	val = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS);
1307 
1308 	vlv_punit_put(i915);
1309 
1310 	/* RPS code assumes GPLL is used */
1311 	drm_WARN_ONCE(&i915->drm, (val & GPLLENABLE) == 0,
1312 		      "GPLL not enabled\n");
1313 
1314 	drm_dbg(&i915->drm, "GPLL enabled? %s\n", yesno(val & GPLLENABLE));
1315 	drm_dbg(&i915->drm, "GPU status: 0x%08x\n", val);
1316 
1317 	return rps_reset(rps);
1318 }
1319 
1320 static unsigned long __ips_gfx_val(struct intel_ips *ips)
1321 {
1322 	struct intel_rps *rps = container_of(ips, typeof(*rps), ips);
1323 	struct intel_uncore *uncore = rps_to_uncore(rps);
1324 	unsigned int t, state1, state2;
1325 	u32 pxvid, ext_v;
1326 	u64 corr, corr2;
1327 
1328 	lockdep_assert_held(&mchdev_lock);
1329 
1330 	pxvid = intel_uncore_read(uncore, PXVFREQ(rps->cur_freq));
1331 	pxvid = (pxvid >> 24) & 0x7f;
1332 	ext_v = pvid_to_extvid(rps_to_i915(rps), pxvid);
1333 
1334 	state1 = ext_v;
1335 
1336 	/* Revel in the empirically derived constants */
1337 
1338 	/* Correction factor in 1/100000 units */
1339 	t = ips_mch_val(uncore);
1340 	if (t > 80)
1341 		corr = t * 2349 + 135940;
1342 	else if (t >= 50)
1343 		corr = t * 964 + 29317;
1344 	else /* < 50 */
1345 		corr = t * 301 + 1004;
1346 
1347 	corr = div_u64(corr * 150142 * state1, 10000) - 78642;
1348 	corr2 = div_u64(corr, 100000) * ips->corr;
1349 
1350 	state2 = div_u64(corr2 * state1, 10000);
1351 	state2 /= 100; /* convert to mW */
1352 
1353 	__gen5_ips_update(ips);
1354 
1355 	return ips->gfx_power + state2;
1356 }
1357 
1358 static bool has_busy_stats(struct intel_rps *rps)
1359 {
1360 	struct intel_engine_cs *engine;
1361 	enum intel_engine_id id;
1362 
1363 	for_each_engine(engine, rps_to_gt(rps), id) {
1364 		if (!intel_engine_supports_stats(engine))
1365 			return false;
1366 	}
1367 
1368 	return true;
1369 }
1370 
1371 void intel_rps_enable(struct intel_rps *rps)
1372 {
1373 	struct drm_i915_private *i915 = rps_to_i915(rps);
1374 	struct intel_uncore *uncore = rps_to_uncore(rps);
1375 	bool enabled = false;
1376 
1377 	if (!HAS_RPS(i915))
1378 		return;
1379 
1380 	if (rps_uses_slpc(rps))
1381 		return;
1382 
1383 	intel_gt_check_clock_frequency(rps_to_gt(rps));
1384 
1385 	intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
1386 	if (rps->max_freq <= rps->min_freq)
1387 		/* leave disabled, no room for dynamic reclocking */;
1388 	else if (IS_CHERRYVIEW(i915))
1389 		enabled = chv_rps_enable(rps);
1390 	else if (IS_VALLEYVIEW(i915))
1391 		enabled = vlv_rps_enable(rps);
1392 	else if (GRAPHICS_VER(i915) >= 9)
1393 		enabled = gen9_rps_enable(rps);
1394 	else if (GRAPHICS_VER(i915) >= 8)
1395 		enabled = gen8_rps_enable(rps);
1396 	else if (GRAPHICS_VER(i915) >= 6)
1397 		enabled = gen6_rps_enable(rps);
1398 	else if (IS_IRONLAKE_M(i915))
1399 		enabled = gen5_rps_enable(rps);
1400 	else
1401 		MISSING_CASE(GRAPHICS_VER(i915));
1402 	intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL);
1403 	if (!enabled)
1404 		return;
1405 
1406 	GT_TRACE(rps_to_gt(rps),
1407 		 "min:%x, max:%x, freq:[%d, %d]\n",
1408 		 rps->min_freq, rps->max_freq,
1409 		 intel_gpu_freq(rps, rps->min_freq),
1410 		 intel_gpu_freq(rps, rps->max_freq));
1411 
1412 	GEM_BUG_ON(rps->max_freq < rps->min_freq);
1413 	GEM_BUG_ON(rps->idle_freq > rps->max_freq);
1414 
1415 	GEM_BUG_ON(rps->efficient_freq < rps->min_freq);
1416 	GEM_BUG_ON(rps->efficient_freq > rps->max_freq);
1417 
1418 	if (has_busy_stats(rps))
1419 		intel_rps_set_timer(rps);
1420 	else if (GRAPHICS_VER(i915) >= 6)
1421 		intel_rps_set_interrupts(rps);
1422 	else
1423 		/* Ironlake currently uses intel_ips.ko */ {}
1424 
1425 	intel_rps_set_enabled(rps);
1426 }
1427 
1428 static void gen6_rps_disable(struct intel_rps *rps)
1429 {
1430 	set(rps_to_uncore(rps), GEN6_RP_CONTROL, 0);
1431 }
1432 
1433 void intel_rps_disable(struct intel_rps *rps)
1434 {
1435 	struct drm_i915_private *i915 = rps_to_i915(rps);
1436 
1437 	intel_rps_clear_enabled(rps);
1438 	intel_rps_clear_interrupts(rps);
1439 	intel_rps_clear_timer(rps);
1440 
1441 	if (GRAPHICS_VER(i915) >= 6)
1442 		gen6_rps_disable(rps);
1443 	else if (IS_IRONLAKE_M(i915))
1444 		gen5_rps_disable(rps);
1445 }
1446 
1447 static int byt_gpu_freq(struct intel_rps *rps, int val)
1448 {
1449 	/*
1450 	 * N = val - 0xb7
1451 	 * Slow = Fast = GPLL ref * N
1452 	 */
1453 	return DIV_ROUND_CLOSEST(rps->gpll_ref_freq * (val - 0xb7), 1000);
1454 }
1455 
1456 static int byt_freq_opcode(struct intel_rps *rps, int val)
1457 {
1458 	return DIV_ROUND_CLOSEST(1000 * val, rps->gpll_ref_freq) + 0xb7;
1459 }
1460 
1461 static int chv_gpu_freq(struct intel_rps *rps, int val)
1462 {
1463 	/*
1464 	 * N = val / 2
1465 	 * CU (slow) = CU2x (fast) / 2 = GPLL ref * N / 2
1466 	 */
1467 	return DIV_ROUND_CLOSEST(rps->gpll_ref_freq * val, 2 * 2 * 1000);
1468 }
1469 
1470 static int chv_freq_opcode(struct intel_rps *rps, int val)
1471 {
1472 	/* CHV needs even values */
1473 	return DIV_ROUND_CLOSEST(2 * 1000 * val, rps->gpll_ref_freq) * 2;
1474 }
1475 
1476 int intel_gpu_freq(struct intel_rps *rps, int val)
1477 {
1478 	struct drm_i915_private *i915 = rps_to_i915(rps);
1479 
1480 	if (GRAPHICS_VER(i915) >= 9)
1481 		return DIV_ROUND_CLOSEST(val * GT_FREQUENCY_MULTIPLIER,
1482 					 GEN9_FREQ_SCALER);
1483 	else if (IS_CHERRYVIEW(i915))
1484 		return chv_gpu_freq(rps, val);
1485 	else if (IS_VALLEYVIEW(i915))
1486 		return byt_gpu_freq(rps, val);
1487 	else if (GRAPHICS_VER(i915) >= 6)
1488 		return val * GT_FREQUENCY_MULTIPLIER;
1489 	else
1490 		return val;
1491 }
1492 
1493 int intel_freq_opcode(struct intel_rps *rps, int val)
1494 {
1495 	struct drm_i915_private *i915 = rps_to_i915(rps);
1496 
1497 	if (GRAPHICS_VER(i915) >= 9)
1498 		return DIV_ROUND_CLOSEST(val * GEN9_FREQ_SCALER,
1499 					 GT_FREQUENCY_MULTIPLIER);
1500 	else if (IS_CHERRYVIEW(i915))
1501 		return chv_freq_opcode(rps, val);
1502 	else if (IS_VALLEYVIEW(i915))
1503 		return byt_freq_opcode(rps, val);
1504 	else if (GRAPHICS_VER(i915) >= 6)
1505 		return DIV_ROUND_CLOSEST(val, GT_FREQUENCY_MULTIPLIER);
1506 	else
1507 		return val;
1508 }
1509 
1510 static void vlv_init_gpll_ref_freq(struct intel_rps *rps)
1511 {
1512 	struct drm_i915_private *i915 = rps_to_i915(rps);
1513 
1514 	rps->gpll_ref_freq =
1515 		vlv_get_cck_clock(i915, "GPLL ref",
1516 				  CCK_GPLL_CLOCK_CONTROL,
1517 				  i915->czclk_freq);
1518 
1519 	drm_dbg(&i915->drm, "GPLL reference freq: %d kHz\n",
1520 		rps->gpll_ref_freq);
1521 }
1522 
1523 static void vlv_rps_init(struct intel_rps *rps)
1524 {
1525 	struct drm_i915_private *i915 = rps_to_i915(rps);
1526 	u32 val;
1527 
1528 	vlv_iosf_sb_get(i915,
1529 			BIT(VLV_IOSF_SB_PUNIT) |
1530 			BIT(VLV_IOSF_SB_NC) |
1531 			BIT(VLV_IOSF_SB_CCK));
1532 
1533 	vlv_init_gpll_ref_freq(rps);
1534 
1535 	val = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS);
1536 	switch ((val >> 6) & 3) {
1537 	case 0:
1538 	case 1:
1539 		i915->mem_freq = 800;
1540 		break;
1541 	case 2:
1542 		i915->mem_freq = 1066;
1543 		break;
1544 	case 3:
1545 		i915->mem_freq = 1333;
1546 		break;
1547 	}
1548 	drm_dbg(&i915->drm, "DDR speed: %d MHz\n", i915->mem_freq);
1549 
1550 	rps->max_freq = vlv_rps_max_freq(rps);
1551 	rps->rp0_freq = rps->max_freq;
1552 	drm_dbg(&i915->drm, "max GPU freq: %d MHz (%u)\n",
1553 		intel_gpu_freq(rps, rps->max_freq), rps->max_freq);
1554 
1555 	rps->efficient_freq = vlv_rps_rpe_freq(rps);
1556 	drm_dbg(&i915->drm, "RPe GPU freq: %d MHz (%u)\n",
1557 		intel_gpu_freq(rps, rps->efficient_freq), rps->efficient_freq);
1558 
1559 	rps->rp1_freq = vlv_rps_guar_freq(rps);
1560 	drm_dbg(&i915->drm, "RP1(Guar Freq) GPU freq: %d MHz (%u)\n",
1561 		intel_gpu_freq(rps, rps->rp1_freq), rps->rp1_freq);
1562 
1563 	rps->min_freq = vlv_rps_min_freq(rps);
1564 	drm_dbg(&i915->drm, "min GPU freq: %d MHz (%u)\n",
1565 		intel_gpu_freq(rps, rps->min_freq), rps->min_freq);
1566 
1567 	vlv_iosf_sb_put(i915,
1568 			BIT(VLV_IOSF_SB_PUNIT) |
1569 			BIT(VLV_IOSF_SB_NC) |
1570 			BIT(VLV_IOSF_SB_CCK));
1571 }
1572 
1573 static void chv_rps_init(struct intel_rps *rps)
1574 {
1575 	struct drm_i915_private *i915 = rps_to_i915(rps);
1576 	u32 val;
1577 
1578 	vlv_iosf_sb_get(i915,
1579 			BIT(VLV_IOSF_SB_PUNIT) |
1580 			BIT(VLV_IOSF_SB_NC) |
1581 			BIT(VLV_IOSF_SB_CCK));
1582 
1583 	vlv_init_gpll_ref_freq(rps);
1584 
1585 	val = vlv_cck_read(i915, CCK_FUSE_REG);
1586 
1587 	switch ((val >> 2) & 0x7) {
1588 	case 3:
1589 		i915->mem_freq = 2000;
1590 		break;
1591 	default:
1592 		i915->mem_freq = 1600;
1593 		break;
1594 	}
1595 	drm_dbg(&i915->drm, "DDR speed: %d MHz\n", i915->mem_freq);
1596 
1597 	rps->max_freq = chv_rps_max_freq(rps);
1598 	rps->rp0_freq = rps->max_freq;
1599 	drm_dbg(&i915->drm, "max GPU freq: %d MHz (%u)\n",
1600 		intel_gpu_freq(rps, rps->max_freq), rps->max_freq);
1601 
1602 	rps->efficient_freq = chv_rps_rpe_freq(rps);
1603 	drm_dbg(&i915->drm, "RPe GPU freq: %d MHz (%u)\n",
1604 		intel_gpu_freq(rps, rps->efficient_freq), rps->efficient_freq);
1605 
1606 	rps->rp1_freq = chv_rps_guar_freq(rps);
1607 	drm_dbg(&i915->drm, "RP1(Guar) GPU freq: %d MHz (%u)\n",
1608 		intel_gpu_freq(rps, rps->rp1_freq), rps->rp1_freq);
1609 
1610 	rps->min_freq = chv_rps_min_freq(rps);
1611 	drm_dbg(&i915->drm, "min GPU freq: %d MHz (%u)\n",
1612 		intel_gpu_freq(rps, rps->min_freq), rps->min_freq);
1613 
1614 	vlv_iosf_sb_put(i915,
1615 			BIT(VLV_IOSF_SB_PUNIT) |
1616 			BIT(VLV_IOSF_SB_NC) |
1617 			BIT(VLV_IOSF_SB_CCK));
1618 
1619 	drm_WARN_ONCE(&i915->drm, (rps->max_freq | rps->efficient_freq |
1620 				   rps->rp1_freq | rps->min_freq) & 1,
1621 		      "Odd GPU freq values\n");
1622 }
1623 
1624 static void vlv_c0_read(struct intel_uncore *uncore, struct intel_rps_ei *ei)
1625 {
1626 	ei->ktime = ktime_get_raw();
1627 	ei->render_c0 = intel_uncore_read(uncore, VLV_RENDER_C0_COUNT);
1628 	ei->media_c0 = intel_uncore_read(uncore, VLV_MEDIA_C0_COUNT);
1629 }
1630 
1631 static u32 vlv_wa_c0_ei(struct intel_rps *rps, u32 pm_iir)
1632 {
1633 	struct intel_uncore *uncore = rps_to_uncore(rps);
1634 	const struct intel_rps_ei *prev = &rps->ei;
1635 	struct intel_rps_ei now;
1636 	u32 events = 0;
1637 
1638 	if ((pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) == 0)
1639 		return 0;
1640 
1641 	vlv_c0_read(uncore, &now);
1642 
1643 #ifdef __linux__
1644 	if (prev->ktime) {
1645 #else
1646 	if (ktime_to_ns(prev->ktime)) {
1647 #endif
1648 		u64 time, c0;
1649 		u32 render, media;
1650 
1651 		time = ktime_us_delta(now.ktime, prev->ktime);
1652 
1653 		time *= rps_to_i915(rps)->czclk_freq;
1654 
1655 		/* Workload can be split between render + media,
1656 		 * e.g. SwapBuffers being blitted in X after being rendered in
1657 		 * mesa. To account for this we need to combine both engines
1658 		 * into our activity counter.
1659 		 */
1660 		render = now.render_c0 - prev->render_c0;
1661 		media = now.media_c0 - prev->media_c0;
1662 		c0 = max(render, media);
1663 		c0 *= 1000 * 100 << 8; /* to usecs and scale to threshold% */
1664 
1665 		if (c0 > time * rps->power.up_threshold)
1666 			events = GEN6_PM_RP_UP_THRESHOLD;
1667 		else if (c0 < time * rps->power.down_threshold)
1668 			events = GEN6_PM_RP_DOWN_THRESHOLD;
1669 	}
1670 
1671 	rps->ei = now;
1672 	return events;
1673 }
1674 
1675 static void rps_work(struct work_struct *work)
1676 {
1677 	struct intel_rps *rps = container_of(work, typeof(*rps), work);
1678 	struct intel_gt *gt = rps_to_gt(rps);
1679 	struct drm_i915_private *i915 = rps_to_i915(rps);
1680 	bool client_boost = false;
1681 	int new_freq, adj, min, max;
1682 	u32 pm_iir = 0;
1683 
1684 	spin_lock_irq(&gt->irq_lock);
1685 	pm_iir = fetch_and_zero(&rps->pm_iir) & rps->pm_events;
1686 	client_boost = atomic_read(&rps->num_waiters);
1687 	spin_unlock_irq(&gt->irq_lock);
1688 
1689 	/* Make sure we didn't queue anything we're not going to process. */
1690 	if (!pm_iir && !client_boost)
1691 		goto out;
1692 
1693 	mutex_lock(&rps->lock);
1694 	if (!intel_rps_is_active(rps)) {
1695 		mutex_unlock(&rps->lock);
1696 		return;
1697 	}
1698 
1699 	pm_iir |= vlv_wa_c0_ei(rps, pm_iir);
1700 
1701 	adj = rps->last_adj;
1702 	new_freq = rps->cur_freq;
1703 	min = rps->min_freq_softlimit;
1704 	max = rps->max_freq_softlimit;
1705 	if (client_boost)
1706 		max = rps->max_freq;
1707 
1708 	GT_TRACE(gt,
1709 		 "pm_iir:%x, client_boost:%s, last:%d, cur:%x, min:%x, max:%x\n",
1710 		 pm_iir, yesno(client_boost),
1711 		 adj, new_freq, min, max);
1712 
1713 	if (client_boost && new_freq < rps->boost_freq) {
1714 		new_freq = rps->boost_freq;
1715 		adj = 0;
1716 	} else if (pm_iir & GEN6_PM_RP_UP_THRESHOLD) {
1717 		if (adj > 0)
1718 			adj *= 2;
1719 		else /* CHV needs even encode values */
1720 			adj = IS_CHERRYVIEW(gt->i915) ? 2 : 1;
1721 
1722 		if (new_freq >= rps->max_freq_softlimit)
1723 			adj = 0;
1724 	} else if (client_boost) {
1725 		adj = 0;
1726 	} else if (pm_iir & GEN6_PM_RP_DOWN_TIMEOUT) {
1727 		if (rps->cur_freq > rps->efficient_freq)
1728 			new_freq = rps->efficient_freq;
1729 		else if (rps->cur_freq > rps->min_freq_softlimit)
1730 			new_freq = rps->min_freq_softlimit;
1731 		adj = 0;
1732 	} else if (pm_iir & GEN6_PM_RP_DOWN_THRESHOLD) {
1733 		if (adj < 0)
1734 			adj *= 2;
1735 		else /* CHV needs even encode values */
1736 			adj = IS_CHERRYVIEW(gt->i915) ? -2 : -1;
1737 
1738 		if (new_freq <= rps->min_freq_softlimit)
1739 			adj = 0;
1740 	} else { /* unknown event */
1741 		adj = 0;
1742 	}
1743 
1744 	/*
1745 	 * sysfs frequency limits may have snuck in while
1746 	 * servicing the interrupt
1747 	 */
1748 	new_freq += adj;
1749 	new_freq = clamp_t(int, new_freq, min, max);
1750 
1751 	if (intel_rps_set(rps, new_freq)) {
1752 		drm_dbg(&i915->drm, "Failed to set new GPU frequency\n");
1753 		adj = 0;
1754 	}
1755 	rps->last_adj = adj;
1756 
1757 	mutex_unlock(&rps->lock);
1758 
1759 out:
1760 	spin_lock_irq(&gt->irq_lock);
1761 	gen6_gt_pm_unmask_irq(gt, rps->pm_events);
1762 	spin_unlock_irq(&gt->irq_lock);
1763 }
1764 
1765 void gen11_rps_irq_handler(struct intel_rps *rps, u32 pm_iir)
1766 {
1767 	struct intel_gt *gt = rps_to_gt(rps);
1768 	const u32 events = rps->pm_events & pm_iir;
1769 
1770 	lockdep_assert_held(&gt->irq_lock);
1771 
1772 	if (unlikely(!events))
1773 		return;
1774 
1775 	GT_TRACE(gt, "irq events:%x\n", events);
1776 
1777 	gen6_gt_pm_mask_irq(gt, events);
1778 
1779 	rps->pm_iir |= events;
1780 	schedule_work(&rps->work);
1781 }
1782 
1783 void gen6_rps_irq_handler(struct intel_rps *rps, u32 pm_iir)
1784 {
1785 	struct intel_gt *gt = rps_to_gt(rps);
1786 	u32 events;
1787 
1788 	events = pm_iir & rps->pm_events;
1789 	if (events) {
1790 		spin_lock(&gt->irq_lock);
1791 
1792 		GT_TRACE(gt, "irq events:%x\n", events);
1793 
1794 		gen6_gt_pm_mask_irq(gt, events);
1795 		rps->pm_iir |= events;
1796 
1797 		schedule_work(&rps->work);
1798 		spin_unlock(&gt->irq_lock);
1799 	}
1800 
1801 	if (GRAPHICS_VER(gt->i915) >= 8)
1802 		return;
1803 
1804 	if (pm_iir & PM_VEBOX_USER_INTERRUPT)
1805 		intel_engine_cs_irq(gt->engine[VECS0], pm_iir >> 10);
1806 
1807 	if (pm_iir & PM_VEBOX_CS_ERROR_INTERRUPT)
1808 		DRM_DEBUG("Command parser error, pm_iir 0x%08x\n", pm_iir);
1809 }
1810 
1811 void gen5_rps_irq_handler(struct intel_rps *rps)
1812 {
1813 	struct intel_uncore *uncore = rps_to_uncore(rps);
1814 	u32 busy_up, busy_down, max_avg, min_avg;
1815 	u8 new_freq;
1816 
1817 	spin_lock(&mchdev_lock);
1818 
1819 	intel_uncore_write16(uncore,
1820 			     MEMINTRSTS,
1821 			     intel_uncore_read(uncore, MEMINTRSTS));
1822 
1823 	intel_uncore_write16(uncore, MEMINTRSTS, MEMINT_EVAL_CHG);
1824 	busy_up = intel_uncore_read(uncore, RCPREVBSYTUPAVG);
1825 	busy_down = intel_uncore_read(uncore, RCPREVBSYTDNAVG);
1826 	max_avg = intel_uncore_read(uncore, RCBMAXAVG);
1827 	min_avg = intel_uncore_read(uncore, RCBMINAVG);
1828 
1829 	/* Handle RCS change request from hw */
1830 	new_freq = rps->cur_freq;
1831 	if (busy_up > max_avg)
1832 		new_freq++;
1833 	else if (busy_down < min_avg)
1834 		new_freq--;
1835 	new_freq = clamp(new_freq,
1836 			 rps->min_freq_softlimit,
1837 			 rps->max_freq_softlimit);
1838 
1839 	if (new_freq != rps->cur_freq && !__gen5_rps_set(rps, new_freq))
1840 		rps->cur_freq = new_freq;
1841 
1842 	spin_unlock(&mchdev_lock);
1843 }
1844 
1845 void intel_rps_init_early(struct intel_rps *rps)
1846 {
1847 	rw_init(&rps->lock, "rpslk");
1848 	rw_init(&rps->power.mutex, "rpspwr");
1849 
1850 	INIT_WORK(&rps->work, rps_work);
1851 #ifdef __linux__
1852 	timer_setup(&rps->timer, rps_timer, 0);
1853 #else
1854 	timeout_set(&rps->timer, rps_timer, rps);
1855 #endif
1856 
1857 	atomic_set(&rps->num_waiters, 0);
1858 }
1859 
1860 void intel_rps_init(struct intel_rps *rps)
1861 {
1862 	struct drm_i915_private *i915 = rps_to_i915(rps);
1863 
1864 	if (rps_uses_slpc(rps))
1865 		return;
1866 
1867 	if (IS_CHERRYVIEW(i915))
1868 		chv_rps_init(rps);
1869 	else if (IS_VALLEYVIEW(i915))
1870 		vlv_rps_init(rps);
1871 	else if (GRAPHICS_VER(i915) >= 6)
1872 		gen6_rps_init(rps);
1873 	else if (IS_IRONLAKE_M(i915))
1874 		gen5_rps_init(rps);
1875 
1876 	/* Derive initial user preferences/limits from the hardware limits */
1877 	rps->max_freq_softlimit = rps->max_freq;
1878 	rps->min_freq_softlimit = rps->min_freq;
1879 
1880 	/* After setting max-softlimit, find the overclock max freq */
1881 	if (GRAPHICS_VER(i915) == 6 || IS_IVYBRIDGE(i915) || IS_HASWELL(i915)) {
1882 		u32 params = 0;
1883 
1884 		sandybridge_pcode_read(i915, GEN6_READ_OC_PARAMS,
1885 				       &params, NULL);
1886 		if (params & BIT(31)) { /* OC supported */
1887 			drm_dbg(&i915->drm,
1888 				"Overclocking supported, max: %dMHz, overclock: %dMHz\n",
1889 				(rps->max_freq & 0xff) * 50,
1890 				(params & 0xff) * 50);
1891 			rps->max_freq = params & 0xff;
1892 		}
1893 	}
1894 
1895 	/* Finally allow us to boost to max by default */
1896 	rps->boost_freq = rps->max_freq;
1897 	rps->idle_freq = rps->min_freq;
1898 
1899 	/* Start in the middle, from here we will autotune based on workload */
1900 	rps->cur_freq = rps->efficient_freq;
1901 
1902 	rps->pm_intrmsk_mbz = 0;
1903 
1904 	/*
1905 	 * SNB,IVB,HSW can while VLV,CHV may hard hang on looping batchbuffer
1906 	 * if GEN6_PM_UP_EI_EXPIRED is masked.
1907 	 *
1908 	 * TODO: verify if this can be reproduced on VLV,CHV.
1909 	 */
1910 	if (GRAPHICS_VER(i915) <= 7)
1911 		rps->pm_intrmsk_mbz |= GEN6_PM_RP_UP_EI_EXPIRED;
1912 
1913 	if (GRAPHICS_VER(i915) >= 8 && GRAPHICS_VER(i915) < 11)
1914 		rps->pm_intrmsk_mbz |= GEN8_PMINTR_DISABLE_REDIRECT_TO_GUC;
1915 
1916 	/* GuC needs ARAT expired interrupt unmasked */
1917 	if (intel_uc_uses_guc_submission(&rps_to_gt(rps)->uc))
1918 		rps->pm_intrmsk_mbz |= ARAT_EXPIRED_INTRMSK;
1919 }
1920 
1921 void intel_rps_sanitize(struct intel_rps *rps)
1922 {
1923 	if (rps_uses_slpc(rps))
1924 		return;
1925 
1926 	if (GRAPHICS_VER(rps_to_i915(rps)) >= 6)
1927 		rps_disable_interrupts(rps);
1928 }
1929 
1930 u32 intel_rps_get_cagf(struct intel_rps *rps, u32 rpstat)
1931 {
1932 	struct drm_i915_private *i915 = rps_to_i915(rps);
1933 	u32 cagf;
1934 
1935 	if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915))
1936 		cagf = (rpstat >> 8) & 0xff;
1937 	else if (GRAPHICS_VER(i915) >= 9)
1938 		cagf = (rpstat & GEN9_CAGF_MASK) >> GEN9_CAGF_SHIFT;
1939 	else if (IS_HASWELL(i915) || IS_BROADWELL(i915))
1940 		cagf = (rpstat & HSW_CAGF_MASK) >> HSW_CAGF_SHIFT;
1941 	else if (GRAPHICS_VER(i915) >= 6)
1942 		cagf = (rpstat & GEN6_CAGF_MASK) >> GEN6_CAGF_SHIFT;
1943 	else
1944 		cagf = gen5_invert_freq(rps, (rpstat & MEMSTAT_PSTATE_MASK) >>
1945 					MEMSTAT_PSTATE_SHIFT);
1946 
1947 	return cagf;
1948 }
1949 
1950 static u32 read_cagf(struct intel_rps *rps)
1951 {
1952 	struct drm_i915_private *i915 = rps_to_i915(rps);
1953 	struct intel_uncore *uncore = rps_to_uncore(rps);
1954 	u32 freq;
1955 
1956 	if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) {
1957 		vlv_punit_get(i915);
1958 		freq = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS);
1959 		vlv_punit_put(i915);
1960 	} else if (GRAPHICS_VER(i915) >= 6) {
1961 		freq = intel_uncore_read(uncore, GEN6_RPSTAT1);
1962 	} else {
1963 		freq = intel_uncore_read(uncore, MEMSTAT_ILK);
1964 	}
1965 
1966 	return intel_rps_get_cagf(rps, freq);
1967 }
1968 
1969 u32 intel_rps_read_actual_frequency(struct intel_rps *rps)
1970 {
1971 	struct intel_runtime_pm *rpm = rps_to_uncore(rps)->rpm;
1972 	intel_wakeref_t wakeref;
1973 	u32 freq = 0;
1974 
1975 	with_intel_runtime_pm_if_in_use(rpm, wakeref)
1976 		freq = intel_gpu_freq(rps, read_cagf(rps));
1977 
1978 	return freq;
1979 }
1980 
1981 u32 intel_rps_read_punit_req(struct intel_rps *rps)
1982 {
1983 	struct intel_uncore *uncore = rps_to_uncore(rps);
1984 	struct intel_runtime_pm *rpm = rps_to_uncore(rps)->rpm;
1985 	intel_wakeref_t wakeref;
1986 	u32 freq = 0;
1987 
1988 	with_intel_runtime_pm_if_in_use(rpm, wakeref)
1989 		freq = intel_uncore_read(uncore, GEN6_RPNSWREQ);
1990 
1991 	return freq;
1992 }
1993 
1994 static u32 intel_rps_get_req(u32 pureq)
1995 {
1996 	u32 req = pureq >> GEN9_SW_REQ_UNSLICE_RATIO_SHIFT;
1997 
1998 	return req;
1999 }
2000 
2001 u32 intel_rps_read_punit_req_frequency(struct intel_rps *rps)
2002 {
2003 	u32 freq = intel_rps_get_req(intel_rps_read_punit_req(rps));
2004 
2005 	return intel_gpu_freq(rps, freq);
2006 }
2007 
2008 u32 intel_rps_get_requested_frequency(struct intel_rps *rps)
2009 {
2010 	if (rps_uses_slpc(rps))
2011 		return intel_rps_read_punit_req_frequency(rps);
2012 	else
2013 		return intel_gpu_freq(rps, rps->cur_freq);
2014 }
2015 
2016 u32 intel_rps_get_max_frequency(struct intel_rps *rps)
2017 {
2018 	struct intel_guc_slpc *slpc = rps_to_slpc(rps);
2019 
2020 	if (rps_uses_slpc(rps))
2021 		return slpc->max_freq_softlimit;
2022 	else
2023 		return intel_gpu_freq(rps, rps->max_freq_softlimit);
2024 }
2025 
2026 u32 intel_rps_get_rp0_frequency(struct intel_rps *rps)
2027 {
2028 	struct intel_guc_slpc *slpc = rps_to_slpc(rps);
2029 
2030 	if (rps_uses_slpc(rps))
2031 		return slpc->rp0_freq;
2032 	else
2033 		return intel_gpu_freq(rps, rps->rp0_freq);
2034 }
2035 
2036 u32 intel_rps_get_rp1_frequency(struct intel_rps *rps)
2037 {
2038 	struct intel_guc_slpc *slpc = rps_to_slpc(rps);
2039 
2040 	if (rps_uses_slpc(rps))
2041 		return slpc->rp1_freq;
2042 	else
2043 		return intel_gpu_freq(rps, rps->rp1_freq);
2044 }
2045 
2046 u32 intel_rps_get_rpn_frequency(struct intel_rps *rps)
2047 {
2048 	struct intel_guc_slpc *slpc = rps_to_slpc(rps);
2049 
2050 	if (rps_uses_slpc(rps))
2051 		return slpc->min_freq;
2052 	else
2053 		return intel_gpu_freq(rps, rps->min_freq);
2054 }
2055 
2056 static int set_max_freq(struct intel_rps *rps, u32 val)
2057 {
2058 	struct drm_i915_private *i915 = rps_to_i915(rps);
2059 	int ret = 0;
2060 
2061 	mutex_lock(&rps->lock);
2062 
2063 	val = intel_freq_opcode(rps, val);
2064 	if (val < rps->min_freq ||
2065 	    val > rps->max_freq ||
2066 	    val < rps->min_freq_softlimit) {
2067 		ret = -EINVAL;
2068 		goto unlock;
2069 	}
2070 
2071 	if (val > rps->rp0_freq)
2072 		drm_dbg(&i915->drm, "User requested overclocking to %d\n",
2073 			intel_gpu_freq(rps, val));
2074 
2075 	rps->max_freq_softlimit = val;
2076 
2077 	val = clamp_t(int, rps->cur_freq,
2078 		      rps->min_freq_softlimit,
2079 		      rps->max_freq_softlimit);
2080 
2081 	/*
2082 	 * We still need *_set_rps to process the new max_delay and
2083 	 * update the interrupt limits and PMINTRMSK even though
2084 	 * frequency request may be unchanged.
2085 	 */
2086 	intel_rps_set(rps, val);
2087 
2088 unlock:
2089 	mutex_unlock(&rps->lock);
2090 
2091 	return ret;
2092 }
2093 
2094 int intel_rps_set_max_frequency(struct intel_rps *rps, u32 val)
2095 {
2096 	struct intel_guc_slpc *slpc = rps_to_slpc(rps);
2097 
2098 	if (rps_uses_slpc(rps))
2099 		return intel_guc_slpc_set_max_freq(slpc, val);
2100 	else
2101 		return set_max_freq(rps, val);
2102 }
2103 
2104 u32 intel_rps_get_min_frequency(struct intel_rps *rps)
2105 {
2106 	struct intel_guc_slpc *slpc = rps_to_slpc(rps);
2107 
2108 	if (rps_uses_slpc(rps))
2109 		return slpc->min_freq_softlimit;
2110 	else
2111 		return intel_gpu_freq(rps, rps->min_freq_softlimit);
2112 }
2113 
2114 static int set_min_freq(struct intel_rps *rps, u32 val)
2115 {
2116 	int ret = 0;
2117 
2118 	mutex_lock(&rps->lock);
2119 
2120 	val = intel_freq_opcode(rps, val);
2121 	if (val < rps->min_freq ||
2122 	    val > rps->max_freq ||
2123 	    val > rps->max_freq_softlimit) {
2124 		ret = -EINVAL;
2125 		goto unlock;
2126 	}
2127 
2128 	rps->min_freq_softlimit = val;
2129 
2130 	val = clamp_t(int, rps->cur_freq,
2131 		      rps->min_freq_softlimit,
2132 		      rps->max_freq_softlimit);
2133 
2134 	/*
2135 	 * We still need *_set_rps to process the new min_delay and
2136 	 * update the interrupt limits and PMINTRMSK even though
2137 	 * frequency request may be unchanged.
2138 	 */
2139 	intel_rps_set(rps, val);
2140 
2141 unlock:
2142 	mutex_unlock(&rps->lock);
2143 
2144 	return ret;
2145 }
2146 
2147 int intel_rps_set_min_frequency(struct intel_rps *rps, u32 val)
2148 {
2149 	struct intel_guc_slpc *slpc = rps_to_slpc(rps);
2150 
2151 	if (rps_uses_slpc(rps))
2152 		return intel_guc_slpc_set_min_freq(slpc, val);
2153 	else
2154 		return set_min_freq(rps, val);
2155 }
2156 
2157 /* External interface for intel_ips.ko */
2158 
2159 static struct drm_i915_private __rcu *ips_mchdev;
2160 
2161 /**
2162  * Tells the intel_ips driver that the i915 driver is now loaded, if
2163  * IPS got loaded first.
2164  *
2165  * This awkward dance is so that neither module has to depend on the
2166  * other in order for IPS to do the appropriate communication of
2167  * GPU turbo limits to i915.
2168  */
2169 static void
2170 ips_ping_for_i915_load(void)
2171 {
2172 #ifdef __linux__
2173 	void (*link)(void);
2174 
2175 	link = symbol_get(ips_link_to_i915_driver);
2176 	if (link) {
2177 		link();
2178 		symbol_put(ips_link_to_i915_driver);
2179 	}
2180 #endif
2181 }
2182 
2183 void intel_rps_driver_register(struct intel_rps *rps)
2184 {
2185 	struct intel_gt *gt = rps_to_gt(rps);
2186 
2187 	/*
2188 	 * We only register the i915 ips part with intel-ips once everything is
2189 	 * set up, to avoid intel-ips sneaking in and reading bogus values.
2190 	 */
2191 	if (GRAPHICS_VER(gt->i915) == 5) {
2192 		GEM_BUG_ON(ips_mchdev);
2193 		rcu_assign_pointer(ips_mchdev, gt->i915);
2194 		ips_ping_for_i915_load();
2195 	}
2196 }
2197 
2198 void intel_rps_driver_unregister(struct intel_rps *rps)
2199 {
2200 	if (rcu_access_pointer(ips_mchdev) == rps_to_i915(rps))
2201 		rcu_assign_pointer(ips_mchdev, NULL);
2202 }
2203 
2204 static struct drm_i915_private *mchdev_get(void)
2205 {
2206 	struct drm_i915_private *i915;
2207 
2208 	rcu_read_lock();
2209 	i915 = rcu_dereference(ips_mchdev);
2210 	if (i915 && !kref_get_unless_zero(&i915->drm.ref))
2211 		i915 = NULL;
2212 	rcu_read_unlock();
2213 
2214 	return i915;
2215 }
2216 
2217 /**
2218  * i915_read_mch_val - return value for IPS use
2219  *
2220  * Calculate and return a value for the IPS driver to use when deciding whether
2221  * we have thermal and power headroom to increase CPU or GPU power budget.
2222  */
2223 unsigned long i915_read_mch_val(void)
2224 {
2225 	struct drm_i915_private *i915;
2226 	unsigned long chipset_val = 0;
2227 	unsigned long graphics_val = 0;
2228 	intel_wakeref_t wakeref;
2229 
2230 	i915 = mchdev_get();
2231 	if (!i915)
2232 		return 0;
2233 
2234 	with_intel_runtime_pm(&i915->runtime_pm, wakeref) {
2235 		struct intel_ips *ips = &i915->gt.rps.ips;
2236 
2237 		spin_lock_irq(&mchdev_lock);
2238 		chipset_val = __ips_chipset_val(ips);
2239 		graphics_val = __ips_gfx_val(ips);
2240 		spin_unlock_irq(&mchdev_lock);
2241 	}
2242 
2243 	drm_dev_put(&i915->drm);
2244 	return chipset_val + graphics_val;
2245 }
2246 EXPORT_SYMBOL_GPL(i915_read_mch_val);
2247 
2248 /**
2249  * i915_gpu_raise - raise GPU frequency limit
2250  *
2251  * Raise the limit; IPS indicates we have thermal headroom.
2252  */
2253 bool i915_gpu_raise(void)
2254 {
2255 	struct drm_i915_private *i915;
2256 	struct intel_rps *rps;
2257 
2258 	i915 = mchdev_get();
2259 	if (!i915)
2260 		return false;
2261 
2262 	rps = &i915->gt.rps;
2263 
2264 	spin_lock_irq(&mchdev_lock);
2265 	if (rps->max_freq_softlimit < rps->max_freq)
2266 		rps->max_freq_softlimit++;
2267 	spin_unlock_irq(&mchdev_lock);
2268 
2269 	drm_dev_put(&i915->drm);
2270 	return true;
2271 }
2272 EXPORT_SYMBOL_GPL(i915_gpu_raise);
2273 
2274 /**
2275  * i915_gpu_lower - lower GPU frequency limit
2276  *
2277  * IPS indicates we're close to a thermal limit, so throttle back the GPU
2278  * frequency maximum.
2279  */
2280 bool i915_gpu_lower(void)
2281 {
2282 	struct drm_i915_private *i915;
2283 	struct intel_rps *rps;
2284 
2285 	i915 = mchdev_get();
2286 	if (!i915)
2287 		return false;
2288 
2289 	rps = &i915->gt.rps;
2290 
2291 	spin_lock_irq(&mchdev_lock);
2292 	if (rps->max_freq_softlimit > rps->min_freq)
2293 		rps->max_freq_softlimit--;
2294 	spin_unlock_irq(&mchdev_lock);
2295 
2296 	drm_dev_put(&i915->drm);
2297 	return true;
2298 }
2299 EXPORT_SYMBOL_GPL(i915_gpu_lower);
2300 
2301 /**
2302  * i915_gpu_busy - indicate GPU business to IPS
2303  *
2304  * Tell the IPS driver whether or not the GPU is busy.
2305  */
2306 bool i915_gpu_busy(void)
2307 {
2308 	struct drm_i915_private *i915;
2309 	bool ret;
2310 
2311 	i915 = mchdev_get();
2312 	if (!i915)
2313 		return false;
2314 
2315 	ret = i915->gt.awake;
2316 
2317 	drm_dev_put(&i915->drm);
2318 	return ret;
2319 }
2320 EXPORT_SYMBOL_GPL(i915_gpu_busy);
2321 
2322 /**
2323  * i915_gpu_turbo_disable - disable graphics turbo
2324  *
2325  * Disable graphics turbo by resetting the max frequency and setting the
2326  * current frequency to the default.
2327  */
2328 bool i915_gpu_turbo_disable(void)
2329 {
2330 	struct drm_i915_private *i915;
2331 	struct intel_rps *rps;
2332 	bool ret;
2333 
2334 	i915 = mchdev_get();
2335 	if (!i915)
2336 		return false;
2337 
2338 	rps = &i915->gt.rps;
2339 
2340 	spin_lock_irq(&mchdev_lock);
2341 	rps->max_freq_softlimit = rps->min_freq;
2342 	ret = !__gen5_rps_set(&i915->gt.rps, rps->min_freq);
2343 	spin_unlock_irq(&mchdev_lock);
2344 
2345 	drm_dev_put(&i915->drm);
2346 	return ret;
2347 }
2348 EXPORT_SYMBOL_GPL(i915_gpu_turbo_disable);
2349 
2350 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
2351 #include "selftest_rps.c"
2352 #include "selftest_slpc.c"
2353 #endif
2354