1 /* 2 * SPDX-License-Identifier: MIT 3 * 4 * Copyright © 2019 Intel Corporation 5 */ 6 7 #include <drm/i915_drm.h> 8 9 #include "i915_drv.h" 10 #include "intel_breadcrumbs.h" 11 #include "intel_gt.h" 12 #include "intel_gt_clock_utils.h" 13 #include "intel_gt_irq.h" 14 #include "intel_gt_pm_irq.h" 15 #include "intel_rps.h" 16 #include "intel_sideband.h" 17 #ifdef __linux__ 18 #include "../../../platform/x86/intel_ips.h" 19 #endif 20 21 #define BUSY_MAX_EI 20u /* ms */ 22 23 /* 24 * Lock protecting IPS related data structures 25 */ 26 static DEFINE_SPINLOCK(mchdev_lock); 27 28 static struct intel_gt *rps_to_gt(struct intel_rps *rps) 29 { 30 return container_of(rps, struct intel_gt, rps); 31 } 32 33 static struct drm_i915_private *rps_to_i915(struct intel_rps *rps) 34 { 35 return rps_to_gt(rps)->i915; 36 } 37 38 static struct intel_uncore *rps_to_uncore(struct intel_rps *rps) 39 { 40 return rps_to_gt(rps)->uncore; 41 } 42 43 static u32 rps_pm_sanitize_mask(struct intel_rps *rps, u32 mask) 44 { 45 return mask & ~rps->pm_intrmsk_mbz; 46 } 47 48 static inline void set(struct intel_uncore *uncore, i915_reg_t reg, u32 val) 49 { 50 intel_uncore_write_fw(uncore, reg, val); 51 } 52 53 static void rps_timer(void *arg) 54 { 55 struct intel_rps *rps = arg; 56 struct intel_engine_cs *engine; 57 ktime_t dt, last, timestamp; 58 enum intel_engine_id id; 59 s64 max_busy[3] = {}; 60 61 timestamp = 0; 62 for_each_engine(engine, rps_to_gt(rps), id) { 63 s64 busy; 64 int i; 65 66 dt = intel_engine_get_busy_time(engine, ×tamp); 67 last = engine->stats.rps; 68 engine->stats.rps = dt; 69 70 busy = ktime_to_ns(ktime_sub(dt, last)); 71 for (i = 0; i < ARRAY_SIZE(max_busy); i++) { 72 if (busy > max_busy[i]) 73 swap(busy, max_busy[i]); 74 } 75 } 76 last = rps->pm_timestamp; 77 rps->pm_timestamp = timestamp; 78 79 if (intel_rps_is_active(rps)) { 80 s64 busy; 81 int i; 82 83 dt = ktime_sub(timestamp, last); 84 85 /* 86 * Our goal is to evaluate each engine independently, so we run 87 * at the lowest clocks required to sustain the heaviest 88 * workload. However, a task may be split into sequential 89 * dependent operations across a set of engines, such that 90 * the independent contributions do not account for high load, 91 * but overall the task is GPU bound. For example, consider 92 * video decode on vcs followed by colour post-processing 93 * on vecs, followed by general post-processing on rcs. 94 * Since multi-engines being active does imply a single 95 * continuous workload across all engines, we hedge our 96 * bets by only contributing a factor of the distributed 97 * load into our busyness calculation. 98 */ 99 busy = max_busy[0]; 100 for (i = 1; i < ARRAY_SIZE(max_busy); i++) { 101 if (!max_busy[i]) 102 break; 103 104 busy += div_u64(max_busy[i], 1 << i); 105 } 106 GT_TRACE(rps_to_gt(rps), 107 "busy:%lld [%d%%], max:[%lld, %lld, %lld], interval:%d\n", 108 busy, (int)div64_u64(100 * busy, dt), 109 max_busy[0], max_busy[1], max_busy[2], 110 rps->pm_interval); 111 112 if (100 * busy > rps->power.up_threshold * dt && 113 rps->cur_freq < rps->max_freq_softlimit) { 114 rps->pm_iir |= GEN6_PM_RP_UP_THRESHOLD; 115 rps->pm_interval = 1; 116 schedule_work(&rps->work); 117 } else if (100 * busy < rps->power.down_threshold * dt && 118 rps->cur_freq > rps->min_freq_softlimit) { 119 rps->pm_iir |= GEN6_PM_RP_DOWN_THRESHOLD; 120 rps->pm_interval = 1; 121 schedule_work(&rps->work); 122 } else { 123 rps->last_adj = 0; 124 } 125 126 mod_timer(&rps->timer, 127 jiffies + msecs_to_jiffies(rps->pm_interval)); 128 rps->pm_interval = min(rps->pm_interval * 2, BUSY_MAX_EI); 129 } 130 } 131 132 static void rps_start_timer(struct intel_rps *rps) 133 { 134 rps->pm_timestamp = ktime_sub(ktime_get(), rps->pm_timestamp); 135 rps->pm_interval = 1; 136 mod_timer(&rps->timer, jiffies + 1); 137 } 138 139 static void rps_stop_timer(struct intel_rps *rps) 140 { 141 del_timer_sync(&rps->timer); 142 rps->pm_timestamp = ktime_sub(ktime_get(), rps->pm_timestamp); 143 cancel_work_sync(&rps->work); 144 } 145 146 static u32 rps_pm_mask(struct intel_rps *rps, u8 val) 147 { 148 u32 mask = 0; 149 150 /* We use UP_EI_EXPIRED interrupts for both up/down in manual mode */ 151 if (val > rps->min_freq_softlimit) 152 mask |= (GEN6_PM_RP_UP_EI_EXPIRED | 153 GEN6_PM_RP_DOWN_THRESHOLD | 154 GEN6_PM_RP_DOWN_TIMEOUT); 155 156 if (val < rps->max_freq_softlimit) 157 mask |= GEN6_PM_RP_UP_EI_EXPIRED | GEN6_PM_RP_UP_THRESHOLD; 158 159 mask &= rps->pm_events; 160 161 return rps_pm_sanitize_mask(rps, ~mask); 162 } 163 164 static void rps_reset_ei(struct intel_rps *rps) 165 { 166 memset(&rps->ei, 0, sizeof(rps->ei)); 167 } 168 169 static void rps_enable_interrupts(struct intel_rps *rps) 170 { 171 struct intel_gt *gt = rps_to_gt(rps); 172 173 GT_TRACE(gt, "interrupts:on rps->pm_events: %x, rps_pm_mask:%x\n", 174 rps->pm_events, rps_pm_mask(rps, rps->last_freq)); 175 176 rps_reset_ei(rps); 177 178 spin_lock_irq(>->irq_lock); 179 gen6_gt_pm_enable_irq(gt, rps->pm_events); 180 spin_unlock_irq(>->irq_lock); 181 182 intel_uncore_write(gt->uncore, 183 GEN6_PMINTRMSK, rps_pm_mask(rps, rps->last_freq)); 184 } 185 186 static void gen6_rps_reset_interrupts(struct intel_rps *rps) 187 { 188 gen6_gt_pm_reset_iir(rps_to_gt(rps), GEN6_PM_RPS_EVENTS); 189 } 190 191 static void gen11_rps_reset_interrupts(struct intel_rps *rps) 192 { 193 while (gen11_gt_reset_one_iir(rps_to_gt(rps), 0, GEN11_GTPM)) 194 ; 195 } 196 197 static void rps_reset_interrupts(struct intel_rps *rps) 198 { 199 struct intel_gt *gt = rps_to_gt(rps); 200 201 spin_lock_irq(>->irq_lock); 202 if (INTEL_GEN(gt->i915) >= 11) 203 gen11_rps_reset_interrupts(rps); 204 else 205 gen6_rps_reset_interrupts(rps); 206 207 rps->pm_iir = 0; 208 spin_unlock_irq(>->irq_lock); 209 } 210 211 static void rps_disable_interrupts(struct intel_rps *rps) 212 { 213 struct intel_gt *gt = rps_to_gt(rps); 214 215 intel_uncore_write(gt->uncore, 216 GEN6_PMINTRMSK, rps_pm_sanitize_mask(rps, ~0u)); 217 218 spin_lock_irq(>->irq_lock); 219 gen6_gt_pm_disable_irq(gt, GEN6_PM_RPS_EVENTS); 220 spin_unlock_irq(>->irq_lock); 221 222 intel_synchronize_irq(gt->i915); 223 224 /* 225 * Now that we will not be generating any more work, flush any 226 * outstanding tasks. As we are called on the RPS idle path, 227 * we will reset the GPU to minimum frequencies, so the current 228 * state of the worker can be discarded. 229 */ 230 cancel_work_sync(&rps->work); 231 232 rps_reset_interrupts(rps); 233 GT_TRACE(gt, "interrupts:off\n"); 234 } 235 236 static const struct cparams { 237 u16 i; 238 u16 t; 239 u16 m; 240 u16 c; 241 } cparams[] = { 242 { 1, 1333, 301, 28664 }, 243 { 1, 1066, 294, 24460 }, 244 { 1, 800, 294, 25192 }, 245 { 0, 1333, 276, 27605 }, 246 { 0, 1066, 276, 27605 }, 247 { 0, 800, 231, 23784 }, 248 }; 249 250 static void gen5_rps_init(struct intel_rps *rps) 251 { 252 struct drm_i915_private *i915 = rps_to_i915(rps); 253 struct intel_uncore *uncore = rps_to_uncore(rps); 254 u8 fmax, fmin, fstart; 255 u32 rgvmodectl; 256 int c_m, i; 257 258 if (i915->fsb_freq <= 3200) 259 c_m = 0; 260 else if (i915->fsb_freq <= 4800) 261 c_m = 1; 262 else 263 c_m = 2; 264 265 for (i = 0; i < ARRAY_SIZE(cparams); i++) { 266 if (cparams[i].i == c_m && cparams[i].t == i915->mem_freq) { 267 rps->ips.m = cparams[i].m; 268 rps->ips.c = cparams[i].c; 269 break; 270 } 271 } 272 273 rgvmodectl = intel_uncore_read(uncore, MEMMODECTL); 274 275 /* Set up min, max, and cur for interrupt handling */ 276 fmax = (rgvmodectl & MEMMODE_FMAX_MASK) >> MEMMODE_FMAX_SHIFT; 277 fmin = (rgvmodectl & MEMMODE_FMIN_MASK); 278 fstart = (rgvmodectl & MEMMODE_FSTART_MASK) >> 279 MEMMODE_FSTART_SHIFT; 280 drm_dbg(&i915->drm, "fmax: %d, fmin: %d, fstart: %d\n", 281 fmax, fmin, fstart); 282 283 rps->min_freq = fmax; 284 rps->efficient_freq = fstart; 285 rps->max_freq = fmin; 286 } 287 288 static unsigned long 289 __ips_chipset_val(struct intel_ips *ips) 290 { 291 struct intel_uncore *uncore = 292 rps_to_uncore(container_of(ips, struct intel_rps, ips)); 293 unsigned long now = jiffies_to_msecs(jiffies), dt; 294 unsigned long result; 295 u64 total, delta; 296 297 lockdep_assert_held(&mchdev_lock); 298 299 /* 300 * Prevent division-by-zero if we are asking too fast. 301 * Also, we don't get interesting results if we are polling 302 * faster than once in 10ms, so just return the saved value 303 * in such cases. 304 */ 305 dt = now - ips->last_time1; 306 if (dt <= 10) 307 return ips->chipset_power; 308 309 /* FIXME: handle per-counter overflow */ 310 total = intel_uncore_read(uncore, DMIEC); 311 total += intel_uncore_read(uncore, DDREC); 312 total += intel_uncore_read(uncore, CSIEC); 313 314 delta = total - ips->last_count1; 315 316 result = div_u64(div_u64(ips->m * delta, dt) + ips->c, 10); 317 318 ips->last_count1 = total; 319 ips->last_time1 = now; 320 321 ips->chipset_power = result; 322 323 return result; 324 } 325 326 static unsigned long ips_mch_val(struct intel_uncore *uncore) 327 { 328 unsigned int m, x, b; 329 u32 tsfs; 330 331 tsfs = intel_uncore_read(uncore, TSFS); 332 x = intel_uncore_read8(uncore, TR1); 333 334 b = tsfs & TSFS_INTR_MASK; 335 m = (tsfs & TSFS_SLOPE_MASK) >> TSFS_SLOPE_SHIFT; 336 337 return m * x / 127 - b; 338 } 339 340 static int _pxvid_to_vd(u8 pxvid) 341 { 342 if (pxvid == 0) 343 return 0; 344 345 if (pxvid >= 8 && pxvid < 31) 346 pxvid = 31; 347 348 return (pxvid + 2) * 125; 349 } 350 351 static u32 pvid_to_extvid(struct drm_i915_private *i915, u8 pxvid) 352 { 353 const int vd = _pxvid_to_vd(pxvid); 354 355 if (INTEL_INFO(i915)->is_mobile) 356 return max(vd - 1125, 0); 357 358 return vd; 359 } 360 361 static void __gen5_ips_update(struct intel_ips *ips) 362 { 363 struct intel_uncore *uncore = 364 rps_to_uncore(container_of(ips, struct intel_rps, ips)); 365 u64 now, delta, dt; 366 u32 count; 367 368 lockdep_assert_held(&mchdev_lock); 369 370 now = ktime_get_raw_ns(); 371 dt = now - ips->last_time2; 372 do_div(dt, NSEC_PER_MSEC); 373 374 /* Don't divide by 0 */ 375 if (dt <= 10) 376 return; 377 378 count = intel_uncore_read(uncore, GFXEC); 379 delta = count - ips->last_count2; 380 381 ips->last_count2 = count; 382 ips->last_time2 = now; 383 384 /* More magic constants... */ 385 ips->gfx_power = div_u64(delta * 1181, dt * 10); 386 } 387 388 static void gen5_rps_update(struct intel_rps *rps) 389 { 390 spin_lock_irq(&mchdev_lock); 391 __gen5_ips_update(&rps->ips); 392 spin_unlock_irq(&mchdev_lock); 393 } 394 395 static bool gen5_rps_set(struct intel_rps *rps, u8 val) 396 { 397 struct intel_uncore *uncore = rps_to_uncore(rps); 398 u16 rgvswctl; 399 400 lockdep_assert_held(&mchdev_lock); 401 402 rgvswctl = intel_uncore_read16(uncore, MEMSWCTL); 403 if (rgvswctl & MEMCTL_CMD_STS) { 404 DRM_DEBUG("gpu busy, RCS change rejected\n"); 405 return false; /* still busy with another command */ 406 } 407 408 /* Invert the frequency bin into an ips delay */ 409 val = rps->max_freq - val; 410 val = rps->min_freq + val; 411 412 rgvswctl = 413 (MEMCTL_CMD_CHFREQ << MEMCTL_CMD_SHIFT) | 414 (val << MEMCTL_FREQ_SHIFT) | 415 MEMCTL_SFCAVM; 416 intel_uncore_write16(uncore, MEMSWCTL, rgvswctl); 417 intel_uncore_posting_read16(uncore, MEMSWCTL); 418 419 rgvswctl |= MEMCTL_CMD_STS; 420 intel_uncore_write16(uncore, MEMSWCTL, rgvswctl); 421 422 return true; 423 } 424 425 static unsigned long intel_pxfreq(u32 vidfreq) 426 { 427 int div = (vidfreq & 0x3f0000) >> 16; 428 int post = (vidfreq & 0x3000) >> 12; 429 int pre = (vidfreq & 0x7); 430 431 if (!pre) 432 return 0; 433 434 return div * 133333 / (pre << post); 435 } 436 437 static unsigned int init_emon(struct intel_uncore *uncore) 438 { 439 u8 pxw[16]; 440 int i; 441 442 /* Disable to program */ 443 intel_uncore_write(uncore, ECR, 0); 444 intel_uncore_posting_read(uncore, ECR); 445 446 /* Program energy weights for various events */ 447 intel_uncore_write(uncore, SDEW, 0x15040d00); 448 intel_uncore_write(uncore, CSIEW0, 0x007f0000); 449 intel_uncore_write(uncore, CSIEW1, 0x1e220004); 450 intel_uncore_write(uncore, CSIEW2, 0x04000004); 451 452 for (i = 0; i < 5; i++) 453 intel_uncore_write(uncore, PEW(i), 0); 454 for (i = 0; i < 3; i++) 455 intel_uncore_write(uncore, DEW(i), 0); 456 457 /* Program P-state weights to account for frequency power adjustment */ 458 for (i = 0; i < 16; i++) { 459 u32 pxvidfreq = intel_uncore_read(uncore, PXVFREQ(i)); 460 unsigned int freq = intel_pxfreq(pxvidfreq); 461 unsigned int vid = 462 (pxvidfreq & PXVFREQ_PX_MASK) >> PXVFREQ_PX_SHIFT; 463 unsigned int val; 464 465 val = vid * vid * freq / 1000 * 255; 466 val /= 127 * 127 * 900; 467 468 pxw[i] = val; 469 } 470 /* Render standby states get 0 weight */ 471 pxw[14] = 0; 472 pxw[15] = 0; 473 474 for (i = 0; i < 4; i++) { 475 intel_uncore_write(uncore, PXW(i), 476 pxw[i * 4 + 0] << 24 | 477 pxw[i * 4 + 1] << 16 | 478 pxw[i * 4 + 2] << 8 | 479 pxw[i * 4 + 3] << 0); 480 } 481 482 /* Adjust magic regs to magic values (more experimental results) */ 483 intel_uncore_write(uncore, OGW0, 0); 484 intel_uncore_write(uncore, OGW1, 0); 485 intel_uncore_write(uncore, EG0, 0x00007f00); 486 intel_uncore_write(uncore, EG1, 0x0000000e); 487 intel_uncore_write(uncore, EG2, 0x000e0000); 488 intel_uncore_write(uncore, EG3, 0x68000300); 489 intel_uncore_write(uncore, EG4, 0x42000000); 490 intel_uncore_write(uncore, EG5, 0x00140031); 491 intel_uncore_write(uncore, EG6, 0); 492 intel_uncore_write(uncore, EG7, 0); 493 494 for (i = 0; i < 8; i++) 495 intel_uncore_write(uncore, PXWL(i), 0); 496 497 /* Enable PMON + select events */ 498 intel_uncore_write(uncore, ECR, 0x80000019); 499 500 return intel_uncore_read(uncore, LCFUSE02) & LCFUSE_HIV_MASK; 501 } 502 503 static bool gen5_rps_enable(struct intel_rps *rps) 504 { 505 struct intel_uncore *uncore = rps_to_uncore(rps); 506 u8 fstart, vstart; 507 u32 rgvmodectl; 508 509 spin_lock_irq(&mchdev_lock); 510 511 rgvmodectl = intel_uncore_read(uncore, MEMMODECTL); 512 513 /* Enable temp reporting */ 514 intel_uncore_write16(uncore, PMMISC, 515 intel_uncore_read16(uncore, PMMISC) | MCPPCE_EN); 516 intel_uncore_write16(uncore, TSC1, 517 intel_uncore_read16(uncore, TSC1) | TSE); 518 519 /* 100ms RC evaluation intervals */ 520 intel_uncore_write(uncore, RCUPEI, 100000); 521 intel_uncore_write(uncore, RCDNEI, 100000); 522 523 /* Set max/min thresholds to 90ms and 80ms respectively */ 524 intel_uncore_write(uncore, RCBMAXAVG, 90000); 525 intel_uncore_write(uncore, RCBMINAVG, 80000); 526 527 intel_uncore_write(uncore, MEMIHYST, 1); 528 529 /* Set up min, max, and cur for interrupt handling */ 530 fstart = (rgvmodectl & MEMMODE_FSTART_MASK) >> 531 MEMMODE_FSTART_SHIFT; 532 533 vstart = (intel_uncore_read(uncore, PXVFREQ(fstart)) & 534 PXVFREQ_PX_MASK) >> PXVFREQ_PX_SHIFT; 535 536 intel_uncore_write(uncore, 537 MEMINTREN, 538 MEMINT_CX_SUPR_EN | MEMINT_EVAL_CHG_EN); 539 540 intel_uncore_write(uncore, VIDSTART, vstart); 541 intel_uncore_posting_read(uncore, VIDSTART); 542 543 rgvmodectl |= MEMMODE_SWMODE_EN; 544 intel_uncore_write(uncore, MEMMODECTL, rgvmodectl); 545 546 if (wait_for_atomic((intel_uncore_read(uncore, MEMSWCTL) & 547 MEMCTL_CMD_STS) == 0, 10)) 548 drm_err(&uncore->i915->drm, 549 "stuck trying to change perf mode\n"); 550 mdelay(1); 551 552 gen5_rps_set(rps, rps->cur_freq); 553 554 rps->ips.last_count1 = intel_uncore_read(uncore, DMIEC); 555 rps->ips.last_count1 += intel_uncore_read(uncore, DDREC); 556 rps->ips.last_count1 += intel_uncore_read(uncore, CSIEC); 557 rps->ips.last_time1 = jiffies_to_msecs(jiffies); 558 559 rps->ips.last_count2 = intel_uncore_read(uncore, GFXEC); 560 rps->ips.last_time2 = ktime_get_raw_ns(); 561 562 spin_unlock_irq(&mchdev_lock); 563 564 rps->ips.corr = init_emon(uncore); 565 566 return true; 567 } 568 569 static void gen5_rps_disable(struct intel_rps *rps) 570 { 571 struct intel_uncore *uncore = rps_to_uncore(rps); 572 u16 rgvswctl; 573 574 spin_lock_irq(&mchdev_lock); 575 576 rgvswctl = intel_uncore_read16(uncore, MEMSWCTL); 577 578 /* Ack interrupts, disable EFC interrupt */ 579 intel_uncore_write(uncore, MEMINTREN, 580 intel_uncore_read(uncore, MEMINTREN) & 581 ~MEMINT_EVAL_CHG_EN); 582 intel_uncore_write(uncore, MEMINTRSTS, MEMINT_EVAL_CHG); 583 intel_uncore_write(uncore, DEIER, 584 intel_uncore_read(uncore, DEIER) & ~DE_PCU_EVENT); 585 intel_uncore_write(uncore, DEIIR, DE_PCU_EVENT); 586 intel_uncore_write(uncore, DEIMR, 587 intel_uncore_read(uncore, DEIMR) | DE_PCU_EVENT); 588 589 /* Go back to the starting frequency */ 590 gen5_rps_set(rps, rps->idle_freq); 591 mdelay(1); 592 rgvswctl |= MEMCTL_CMD_STS; 593 intel_uncore_write(uncore, MEMSWCTL, rgvswctl); 594 mdelay(1); 595 596 spin_unlock_irq(&mchdev_lock); 597 } 598 599 static u32 rps_limits(struct intel_rps *rps, u8 val) 600 { 601 u32 limits; 602 603 /* 604 * Only set the down limit when we've reached the lowest level to avoid 605 * getting more interrupts, otherwise leave this clear. This prevents a 606 * race in the hw when coming out of rc6: There's a tiny window where 607 * the hw runs at the minimal clock before selecting the desired 608 * frequency, if the down threshold expires in that window we will not 609 * receive a down interrupt. 610 */ 611 if (INTEL_GEN(rps_to_i915(rps)) >= 9) { 612 limits = rps->max_freq_softlimit << 23; 613 if (val <= rps->min_freq_softlimit) 614 limits |= rps->min_freq_softlimit << 14; 615 } else { 616 limits = rps->max_freq_softlimit << 24; 617 if (val <= rps->min_freq_softlimit) 618 limits |= rps->min_freq_softlimit << 16; 619 } 620 621 return limits; 622 } 623 624 static void rps_set_power(struct intel_rps *rps, int new_power) 625 { 626 struct intel_gt *gt = rps_to_gt(rps); 627 struct intel_uncore *uncore = gt->uncore; 628 u32 threshold_up = 0, threshold_down = 0; /* in % */ 629 u32 ei_up = 0, ei_down = 0; 630 631 lockdep_assert_held(&rps->power.mutex); 632 633 if (new_power == rps->power.mode) 634 return; 635 636 threshold_up = 95; 637 threshold_down = 85; 638 639 /* Note the units here are not exactly 1us, but 1280ns. */ 640 switch (new_power) { 641 case LOW_POWER: 642 ei_up = 16000; 643 ei_down = 32000; 644 break; 645 646 case BETWEEN: 647 ei_up = 13000; 648 ei_down = 32000; 649 break; 650 651 case HIGH_POWER: 652 ei_up = 10000; 653 ei_down = 32000; 654 break; 655 } 656 657 /* When byt can survive without system hang with dynamic 658 * sw freq adjustments, this restriction can be lifted. 659 */ 660 if (IS_VALLEYVIEW(gt->i915)) 661 goto skip_hw_write; 662 663 GT_TRACE(gt, 664 "changing power mode [%d], up %d%% @ %dus, down %d%% @ %dus\n", 665 new_power, threshold_up, ei_up, threshold_down, ei_down); 666 667 set(uncore, GEN6_RP_UP_EI, 668 intel_gt_ns_to_pm_interval(gt, ei_up * 1000)); 669 set(uncore, GEN6_RP_UP_THRESHOLD, 670 intel_gt_ns_to_pm_interval(gt, ei_up * threshold_up * 10)); 671 672 set(uncore, GEN6_RP_DOWN_EI, 673 intel_gt_ns_to_pm_interval(gt, ei_down * 1000)); 674 set(uncore, GEN6_RP_DOWN_THRESHOLD, 675 intel_gt_ns_to_pm_interval(gt, ei_down * threshold_down * 10)); 676 677 set(uncore, GEN6_RP_CONTROL, 678 (INTEL_GEN(gt->i915) > 9 ? 0 : GEN6_RP_MEDIA_TURBO) | 679 GEN6_RP_MEDIA_HW_NORMAL_MODE | 680 GEN6_RP_MEDIA_IS_GFX | 681 GEN6_RP_ENABLE | 682 GEN6_RP_UP_BUSY_AVG | 683 GEN6_RP_DOWN_IDLE_AVG); 684 685 skip_hw_write: 686 rps->power.mode = new_power; 687 rps->power.up_threshold = threshold_up; 688 rps->power.down_threshold = threshold_down; 689 } 690 691 static void gen6_rps_set_thresholds(struct intel_rps *rps, u8 val) 692 { 693 int new_power; 694 695 new_power = rps->power.mode; 696 switch (rps->power.mode) { 697 case LOW_POWER: 698 if (val > rps->efficient_freq + 1 && 699 val > rps->cur_freq) 700 new_power = BETWEEN; 701 break; 702 703 case BETWEEN: 704 if (val <= rps->efficient_freq && 705 val < rps->cur_freq) 706 new_power = LOW_POWER; 707 else if (val >= rps->rp0_freq && 708 val > rps->cur_freq) 709 new_power = HIGH_POWER; 710 break; 711 712 case HIGH_POWER: 713 if (val < (rps->rp1_freq + rps->rp0_freq) >> 1 && 714 val < rps->cur_freq) 715 new_power = BETWEEN; 716 break; 717 } 718 /* Max/min bins are special */ 719 if (val <= rps->min_freq_softlimit) 720 new_power = LOW_POWER; 721 if (val >= rps->max_freq_softlimit) 722 new_power = HIGH_POWER; 723 724 mutex_lock(&rps->power.mutex); 725 if (rps->power.interactive) 726 new_power = HIGH_POWER; 727 rps_set_power(rps, new_power); 728 mutex_unlock(&rps->power.mutex); 729 } 730 731 void intel_rps_mark_interactive(struct intel_rps *rps, bool interactive) 732 { 733 GT_TRACE(rps_to_gt(rps), "mark interactive: %s\n", yesno(interactive)); 734 735 mutex_lock(&rps->power.mutex); 736 if (interactive) { 737 if (!rps->power.interactive++ && intel_rps_is_active(rps)) 738 rps_set_power(rps, HIGH_POWER); 739 } else { 740 GEM_BUG_ON(!rps->power.interactive); 741 rps->power.interactive--; 742 } 743 mutex_unlock(&rps->power.mutex); 744 } 745 746 static int gen6_rps_set(struct intel_rps *rps, u8 val) 747 { 748 struct intel_uncore *uncore = rps_to_uncore(rps); 749 struct drm_i915_private *i915 = rps_to_i915(rps); 750 u32 swreq; 751 752 if (INTEL_GEN(i915) >= 9) 753 swreq = GEN9_FREQUENCY(val); 754 else if (IS_HASWELL(i915) || IS_BROADWELL(i915)) 755 swreq = HSW_FREQUENCY(val); 756 else 757 swreq = (GEN6_FREQUENCY(val) | 758 GEN6_OFFSET(0) | 759 GEN6_AGGRESSIVE_TURBO); 760 set(uncore, GEN6_RPNSWREQ, swreq); 761 762 GT_TRACE(rps_to_gt(rps), "set val:%x, freq:%d, swreq:%x\n", 763 val, intel_gpu_freq(rps, val), swreq); 764 765 return 0; 766 } 767 768 static int vlv_rps_set(struct intel_rps *rps, u8 val) 769 { 770 struct drm_i915_private *i915 = rps_to_i915(rps); 771 int err; 772 773 vlv_punit_get(i915); 774 err = vlv_punit_write(i915, PUNIT_REG_GPU_FREQ_REQ, val); 775 vlv_punit_put(i915); 776 777 GT_TRACE(rps_to_gt(rps), "set val:%x, freq:%d\n", 778 val, intel_gpu_freq(rps, val)); 779 780 return err; 781 } 782 783 static int rps_set(struct intel_rps *rps, u8 val, bool update) 784 { 785 struct drm_i915_private *i915 = rps_to_i915(rps); 786 int err; 787 788 if (INTEL_GEN(i915) < 6) 789 return 0; 790 791 if (val == rps->last_freq) 792 return 0; 793 794 if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) 795 err = vlv_rps_set(rps, val); 796 else 797 err = gen6_rps_set(rps, val); 798 if (err) 799 return err; 800 801 if (update) 802 gen6_rps_set_thresholds(rps, val); 803 rps->last_freq = val; 804 805 return 0; 806 } 807 808 void intel_rps_unpark(struct intel_rps *rps) 809 { 810 if (!intel_rps_is_enabled(rps)) 811 return; 812 813 GT_TRACE(rps_to_gt(rps), "unpark:%x\n", rps->cur_freq); 814 815 /* 816 * Use the user's desired frequency as a guide, but for better 817 * performance, jump directly to RPe as our starting frequency. 818 */ 819 mutex_lock(&rps->lock); 820 821 intel_rps_set_active(rps); 822 intel_rps_set(rps, 823 clamp(rps->cur_freq, 824 rps->min_freq_softlimit, 825 rps->max_freq_softlimit)); 826 827 mutex_unlock(&rps->lock); 828 829 rps->pm_iir = 0; 830 if (intel_rps_has_interrupts(rps)) 831 rps_enable_interrupts(rps); 832 if (intel_rps_uses_timer(rps)) 833 rps_start_timer(rps); 834 835 if (IS_GEN(rps_to_i915(rps), 5)) 836 gen5_rps_update(rps); 837 } 838 839 void intel_rps_park(struct intel_rps *rps) 840 { 841 int adj; 842 843 if (!intel_rps_clear_active(rps)) 844 return; 845 846 if (intel_rps_uses_timer(rps)) 847 rps_stop_timer(rps); 848 if (intel_rps_has_interrupts(rps)) 849 rps_disable_interrupts(rps); 850 851 if (rps->last_freq <= rps->idle_freq) 852 return; 853 854 /* 855 * The punit delays the write of the frequency and voltage until it 856 * determines the GPU is awake. During normal usage we don't want to 857 * waste power changing the frequency if the GPU is sleeping (rc6). 858 * However, the GPU and driver is now idle and we do not want to delay 859 * switching to minimum voltage (reducing power whilst idle) as we do 860 * not expect to be woken in the near future and so must flush the 861 * change by waking the device. 862 * 863 * We choose to take the media powerwell (either would do to trick the 864 * punit into committing the voltage change) as that takes a lot less 865 * power than the render powerwell. 866 */ 867 intel_uncore_forcewake_get(rps_to_uncore(rps), FORCEWAKE_MEDIA); 868 rps_set(rps, rps->idle_freq, false); 869 intel_uncore_forcewake_put(rps_to_uncore(rps), FORCEWAKE_MEDIA); 870 871 /* 872 * Since we will try and restart from the previously requested 873 * frequency on unparking, treat this idle point as a downclock 874 * interrupt and reduce the frequency for resume. If we park/unpark 875 * more frequently than the rps worker can run, we will not respond 876 * to any EI and never see a change in frequency. 877 * 878 * (Note we accommodate Cherryview's limitation of only using an 879 * even bin by applying it to all.) 880 */ 881 adj = rps->last_adj; 882 if (adj < 0) 883 adj *= 2; 884 else /* CHV needs even encode values */ 885 adj = -2; 886 rps->last_adj = adj; 887 rps->cur_freq = max_t(int, rps->cur_freq + adj, rps->min_freq); 888 if (rps->cur_freq < rps->efficient_freq) { 889 rps->cur_freq = rps->efficient_freq; 890 rps->last_adj = 0; 891 } 892 893 GT_TRACE(rps_to_gt(rps), "park:%x\n", rps->cur_freq); 894 } 895 896 void intel_rps_boost(struct i915_request *rq) 897 { 898 struct intel_rps *rps = &READ_ONCE(rq->engine)->gt->rps; 899 unsigned long flags; 900 901 if (i915_request_signaled(rq) || !intel_rps_is_active(rps)) 902 return; 903 904 /* Serializes with i915_request_retire() */ 905 spin_lock_irqsave(&rq->lock, flags); 906 if (!i915_request_has_waitboost(rq) && 907 !dma_fence_is_signaled_locked(&rq->fence)) { 908 set_bit(I915_FENCE_FLAG_BOOST, &rq->fence.flags); 909 910 GT_TRACE(rps_to_gt(rps), "boost fence:%llx:%llx\n", 911 rq->fence.context, rq->fence.seqno); 912 913 if (!atomic_fetch_inc(&rps->num_waiters) && 914 READ_ONCE(rps->cur_freq) < rps->boost_freq) 915 schedule_work(&rps->work); 916 917 atomic_inc(&rps->boosts); 918 } 919 spin_unlock_irqrestore(&rq->lock, flags); 920 } 921 922 int intel_rps_set(struct intel_rps *rps, u8 val) 923 { 924 int err; 925 926 lockdep_assert_held(&rps->lock); 927 GEM_BUG_ON(val > rps->max_freq); 928 GEM_BUG_ON(val < rps->min_freq); 929 930 if (intel_rps_is_active(rps)) { 931 err = rps_set(rps, val, true); 932 if (err) 933 return err; 934 935 /* 936 * Make sure we continue to get interrupts 937 * until we hit the minimum or maximum frequencies. 938 */ 939 if (intel_rps_has_interrupts(rps)) { 940 struct intel_uncore *uncore = rps_to_uncore(rps); 941 942 set(uncore, 943 GEN6_RP_INTERRUPT_LIMITS, rps_limits(rps, val)); 944 945 set(uncore, GEN6_PMINTRMSK, rps_pm_mask(rps, val)); 946 } 947 } 948 949 rps->cur_freq = val; 950 return 0; 951 } 952 953 static void gen6_rps_init(struct intel_rps *rps) 954 { 955 struct drm_i915_private *i915 = rps_to_i915(rps); 956 struct intel_uncore *uncore = rps_to_uncore(rps); 957 958 /* All of these values are in units of 50MHz */ 959 960 /* static values from HW: RP0 > RP1 > RPn (min_freq) */ 961 if (IS_GEN9_LP(i915)) { 962 u32 rp_state_cap = intel_uncore_read(uncore, BXT_RP_STATE_CAP); 963 964 rps->rp0_freq = (rp_state_cap >> 16) & 0xff; 965 rps->rp1_freq = (rp_state_cap >> 8) & 0xff; 966 rps->min_freq = (rp_state_cap >> 0) & 0xff; 967 } else { 968 u32 rp_state_cap = intel_uncore_read(uncore, GEN6_RP_STATE_CAP); 969 970 rps->rp0_freq = (rp_state_cap >> 0) & 0xff; 971 rps->rp1_freq = (rp_state_cap >> 8) & 0xff; 972 rps->min_freq = (rp_state_cap >> 16) & 0xff; 973 } 974 975 /* hw_max = RP0 until we check for overclocking */ 976 rps->max_freq = rps->rp0_freq; 977 978 rps->efficient_freq = rps->rp1_freq; 979 if (IS_HASWELL(i915) || IS_BROADWELL(i915) || 980 IS_GEN9_BC(i915) || INTEL_GEN(i915) >= 10) { 981 u32 ddcc_status = 0; 982 983 if (sandybridge_pcode_read(i915, 984 HSW_PCODE_DYNAMIC_DUTY_CYCLE_CONTROL, 985 &ddcc_status, NULL) == 0) 986 rps->efficient_freq = 987 clamp_t(u8, 988 (ddcc_status >> 8) & 0xff, 989 rps->min_freq, 990 rps->max_freq); 991 } 992 993 if (IS_GEN9_BC(i915) || INTEL_GEN(i915) >= 10) { 994 /* Store the frequency values in 16.66 MHZ units, which is 995 * the natural hardware unit for SKL 996 */ 997 rps->rp0_freq *= GEN9_FREQ_SCALER; 998 rps->rp1_freq *= GEN9_FREQ_SCALER; 999 rps->min_freq *= GEN9_FREQ_SCALER; 1000 rps->max_freq *= GEN9_FREQ_SCALER; 1001 rps->efficient_freq *= GEN9_FREQ_SCALER; 1002 } 1003 } 1004 1005 static bool rps_reset(struct intel_rps *rps) 1006 { 1007 struct drm_i915_private *i915 = rps_to_i915(rps); 1008 1009 /* force a reset */ 1010 rps->power.mode = -1; 1011 rps->last_freq = -1; 1012 1013 if (rps_set(rps, rps->min_freq, true)) { 1014 drm_err(&i915->drm, "Failed to reset RPS to initial values\n"); 1015 return false; 1016 } 1017 1018 rps->cur_freq = rps->min_freq; 1019 return true; 1020 } 1021 1022 /* See the Gen9_GT_PM_Programming_Guide doc for the below */ 1023 static bool gen9_rps_enable(struct intel_rps *rps) 1024 { 1025 struct intel_gt *gt = rps_to_gt(rps); 1026 struct intel_uncore *uncore = gt->uncore; 1027 1028 /* Program defaults and thresholds for RPS */ 1029 if (IS_GEN(gt->i915, 9)) 1030 intel_uncore_write_fw(uncore, GEN6_RC_VIDEO_FREQ, 1031 GEN9_FREQUENCY(rps->rp1_freq)); 1032 1033 intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 0xa); 1034 1035 rps->pm_events = GEN6_PM_RP_UP_THRESHOLD | GEN6_PM_RP_DOWN_THRESHOLD; 1036 1037 return rps_reset(rps); 1038 } 1039 1040 static bool gen8_rps_enable(struct intel_rps *rps) 1041 { 1042 struct intel_uncore *uncore = rps_to_uncore(rps); 1043 1044 intel_uncore_write_fw(uncore, GEN6_RC_VIDEO_FREQ, 1045 HSW_FREQUENCY(rps->rp1_freq)); 1046 1047 intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 10); 1048 1049 rps->pm_events = GEN6_PM_RP_UP_THRESHOLD | GEN6_PM_RP_DOWN_THRESHOLD; 1050 1051 return rps_reset(rps); 1052 } 1053 1054 static bool gen6_rps_enable(struct intel_rps *rps) 1055 { 1056 struct intel_uncore *uncore = rps_to_uncore(rps); 1057 1058 /* Power down if completely idle for over 50ms */ 1059 intel_uncore_write_fw(uncore, GEN6_RP_DOWN_TIMEOUT, 50000); 1060 intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 10); 1061 1062 rps->pm_events = (GEN6_PM_RP_UP_THRESHOLD | 1063 GEN6_PM_RP_DOWN_THRESHOLD | 1064 GEN6_PM_RP_DOWN_TIMEOUT); 1065 1066 return rps_reset(rps); 1067 } 1068 1069 static int chv_rps_max_freq(struct intel_rps *rps) 1070 { 1071 struct drm_i915_private *i915 = rps_to_i915(rps); 1072 struct intel_gt *gt = rps_to_gt(rps); 1073 u32 val; 1074 1075 val = vlv_punit_read(i915, FB_GFX_FMAX_AT_VMAX_FUSE); 1076 1077 switch (gt->info.sseu.eu_total) { 1078 case 8: 1079 /* (2 * 4) config */ 1080 val >>= FB_GFX_FMAX_AT_VMAX_2SS4EU_FUSE_SHIFT; 1081 break; 1082 case 12: 1083 /* (2 * 6) config */ 1084 val >>= FB_GFX_FMAX_AT_VMAX_2SS6EU_FUSE_SHIFT; 1085 break; 1086 case 16: 1087 /* (2 * 8) config */ 1088 default: 1089 /* Setting (2 * 8) Min RP0 for any other combination */ 1090 val >>= FB_GFX_FMAX_AT_VMAX_2SS8EU_FUSE_SHIFT; 1091 break; 1092 } 1093 1094 return val & FB_GFX_FREQ_FUSE_MASK; 1095 } 1096 1097 static int chv_rps_rpe_freq(struct intel_rps *rps) 1098 { 1099 struct drm_i915_private *i915 = rps_to_i915(rps); 1100 u32 val; 1101 1102 val = vlv_punit_read(i915, PUNIT_GPU_DUTYCYCLE_REG); 1103 val >>= PUNIT_GPU_DUTYCYCLE_RPE_FREQ_SHIFT; 1104 1105 return val & PUNIT_GPU_DUTYCYCLE_RPE_FREQ_MASK; 1106 } 1107 1108 static int chv_rps_guar_freq(struct intel_rps *rps) 1109 { 1110 struct drm_i915_private *i915 = rps_to_i915(rps); 1111 u32 val; 1112 1113 val = vlv_punit_read(i915, FB_GFX_FMAX_AT_VMAX_FUSE); 1114 1115 return val & FB_GFX_FREQ_FUSE_MASK; 1116 } 1117 1118 static u32 chv_rps_min_freq(struct intel_rps *rps) 1119 { 1120 struct drm_i915_private *i915 = rps_to_i915(rps); 1121 u32 val; 1122 1123 val = vlv_punit_read(i915, FB_GFX_FMIN_AT_VMIN_FUSE); 1124 val >>= FB_GFX_FMIN_AT_VMIN_FUSE_SHIFT; 1125 1126 return val & FB_GFX_FREQ_FUSE_MASK; 1127 } 1128 1129 static bool chv_rps_enable(struct intel_rps *rps) 1130 { 1131 struct intel_uncore *uncore = rps_to_uncore(rps); 1132 struct drm_i915_private *i915 = rps_to_i915(rps); 1133 u32 val; 1134 1135 /* 1: Program defaults and thresholds for RPS*/ 1136 intel_uncore_write_fw(uncore, GEN6_RP_DOWN_TIMEOUT, 1000000); 1137 intel_uncore_write_fw(uncore, GEN6_RP_UP_THRESHOLD, 59400); 1138 intel_uncore_write_fw(uncore, GEN6_RP_DOWN_THRESHOLD, 245000); 1139 intel_uncore_write_fw(uncore, GEN6_RP_UP_EI, 66000); 1140 intel_uncore_write_fw(uncore, GEN6_RP_DOWN_EI, 350000); 1141 1142 intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 10); 1143 1144 /* 2: Enable RPS */ 1145 intel_uncore_write_fw(uncore, GEN6_RP_CONTROL, 1146 GEN6_RP_MEDIA_HW_NORMAL_MODE | 1147 GEN6_RP_MEDIA_IS_GFX | 1148 GEN6_RP_ENABLE | 1149 GEN6_RP_UP_BUSY_AVG | 1150 GEN6_RP_DOWN_IDLE_AVG); 1151 1152 rps->pm_events = (GEN6_PM_RP_UP_THRESHOLD | 1153 GEN6_PM_RP_DOWN_THRESHOLD | 1154 GEN6_PM_RP_DOWN_TIMEOUT); 1155 1156 /* Setting Fixed Bias */ 1157 vlv_punit_get(i915); 1158 1159 val = VLV_OVERRIDE_EN | VLV_SOC_TDP_EN | CHV_BIAS_CPU_50_SOC_50; 1160 vlv_punit_write(i915, VLV_TURBO_SOC_OVERRIDE, val); 1161 1162 val = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS); 1163 1164 vlv_punit_put(i915); 1165 1166 /* RPS code assumes GPLL is used */ 1167 drm_WARN_ONCE(&i915->drm, (val & GPLLENABLE) == 0, 1168 "GPLL not enabled\n"); 1169 1170 drm_dbg(&i915->drm, "GPLL enabled? %s\n", yesno(val & GPLLENABLE)); 1171 drm_dbg(&i915->drm, "GPU status: 0x%08x\n", val); 1172 1173 return rps_reset(rps); 1174 } 1175 1176 static int vlv_rps_guar_freq(struct intel_rps *rps) 1177 { 1178 struct drm_i915_private *i915 = rps_to_i915(rps); 1179 u32 val, rp1; 1180 1181 val = vlv_nc_read(i915, IOSF_NC_FB_GFX_FREQ_FUSE); 1182 1183 rp1 = val & FB_GFX_FGUARANTEED_FREQ_FUSE_MASK; 1184 rp1 >>= FB_GFX_FGUARANTEED_FREQ_FUSE_SHIFT; 1185 1186 return rp1; 1187 } 1188 1189 static int vlv_rps_max_freq(struct intel_rps *rps) 1190 { 1191 struct drm_i915_private *i915 = rps_to_i915(rps); 1192 u32 val, rp0; 1193 1194 val = vlv_nc_read(i915, IOSF_NC_FB_GFX_FREQ_FUSE); 1195 1196 rp0 = (val & FB_GFX_MAX_FREQ_FUSE_MASK) >> FB_GFX_MAX_FREQ_FUSE_SHIFT; 1197 /* Clamp to max */ 1198 rp0 = min_t(u32, rp0, 0xea); 1199 1200 return rp0; 1201 } 1202 1203 static int vlv_rps_rpe_freq(struct intel_rps *rps) 1204 { 1205 struct drm_i915_private *i915 = rps_to_i915(rps); 1206 u32 val, rpe; 1207 1208 val = vlv_nc_read(i915, IOSF_NC_FB_GFX_FMAX_FUSE_LO); 1209 rpe = (val & FB_FMAX_VMIN_FREQ_LO_MASK) >> FB_FMAX_VMIN_FREQ_LO_SHIFT; 1210 val = vlv_nc_read(i915, IOSF_NC_FB_GFX_FMAX_FUSE_HI); 1211 rpe |= (val & FB_FMAX_VMIN_FREQ_HI_MASK) << 5; 1212 1213 return rpe; 1214 } 1215 1216 static int vlv_rps_min_freq(struct intel_rps *rps) 1217 { 1218 struct drm_i915_private *i915 = rps_to_i915(rps); 1219 u32 val; 1220 1221 val = vlv_punit_read(i915, PUNIT_REG_GPU_LFM) & 0xff; 1222 /* 1223 * According to the BYT Punit GPU turbo HAS 1.1.6.3 the minimum value 1224 * for the minimum frequency in GPLL mode is 0xc1. Contrary to this on 1225 * a BYT-M B0 the above register contains 0xbf. Moreover when setting 1226 * a frequency Punit will not allow values below 0xc0. Clamp it 0xc0 1227 * to make sure it matches what Punit accepts. 1228 */ 1229 return max_t(u32, val, 0xc0); 1230 } 1231 1232 static bool vlv_rps_enable(struct intel_rps *rps) 1233 { 1234 struct intel_uncore *uncore = rps_to_uncore(rps); 1235 struct drm_i915_private *i915 = rps_to_i915(rps); 1236 u32 val; 1237 1238 intel_uncore_write_fw(uncore, GEN6_RP_DOWN_TIMEOUT, 1000000); 1239 intel_uncore_write_fw(uncore, GEN6_RP_UP_THRESHOLD, 59400); 1240 intel_uncore_write_fw(uncore, GEN6_RP_DOWN_THRESHOLD, 245000); 1241 intel_uncore_write_fw(uncore, GEN6_RP_UP_EI, 66000); 1242 intel_uncore_write_fw(uncore, GEN6_RP_DOWN_EI, 350000); 1243 1244 intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 10); 1245 1246 intel_uncore_write_fw(uncore, GEN6_RP_CONTROL, 1247 GEN6_RP_MEDIA_TURBO | 1248 GEN6_RP_MEDIA_HW_NORMAL_MODE | 1249 GEN6_RP_MEDIA_IS_GFX | 1250 GEN6_RP_ENABLE | 1251 GEN6_RP_UP_BUSY_AVG | 1252 GEN6_RP_DOWN_IDLE_CONT); 1253 1254 /* WaGsvRC0ResidencyMethod:vlv */ 1255 rps->pm_events = GEN6_PM_RP_UP_EI_EXPIRED; 1256 1257 vlv_punit_get(i915); 1258 1259 /* Setting Fixed Bias */ 1260 val = VLV_OVERRIDE_EN | VLV_SOC_TDP_EN | VLV_BIAS_CPU_125_SOC_875; 1261 vlv_punit_write(i915, VLV_TURBO_SOC_OVERRIDE, val); 1262 1263 val = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS); 1264 1265 vlv_punit_put(i915); 1266 1267 /* RPS code assumes GPLL is used */ 1268 drm_WARN_ONCE(&i915->drm, (val & GPLLENABLE) == 0, 1269 "GPLL not enabled\n"); 1270 1271 drm_dbg(&i915->drm, "GPLL enabled? %s\n", yesno(val & GPLLENABLE)); 1272 drm_dbg(&i915->drm, "GPU status: 0x%08x\n", val); 1273 1274 return rps_reset(rps); 1275 } 1276 1277 static unsigned long __ips_gfx_val(struct intel_ips *ips) 1278 { 1279 struct intel_rps *rps = container_of(ips, typeof(*rps), ips); 1280 struct intel_uncore *uncore = rps_to_uncore(rps); 1281 unsigned long t, corr, state1, corr2, state2; 1282 u32 pxvid, ext_v; 1283 1284 lockdep_assert_held(&mchdev_lock); 1285 1286 pxvid = intel_uncore_read(uncore, PXVFREQ(rps->cur_freq)); 1287 pxvid = (pxvid >> 24) & 0x7f; 1288 ext_v = pvid_to_extvid(rps_to_i915(rps), pxvid); 1289 1290 state1 = ext_v; 1291 1292 /* Revel in the empirically derived constants */ 1293 1294 /* Correction factor in 1/100000 units */ 1295 t = ips_mch_val(uncore); 1296 if (t > 80) 1297 corr = t * 2349 + 135940; 1298 else if (t >= 50) 1299 corr = t * 964 + 29317; 1300 else /* < 50 */ 1301 corr = t * 301 + 1004; 1302 1303 corr = corr * 150142 * state1 / 10000 - 78642; 1304 corr /= 100000; 1305 corr2 = corr * ips->corr; 1306 1307 state2 = corr2 * state1 / 10000; 1308 state2 /= 100; /* convert to mW */ 1309 1310 __gen5_ips_update(ips); 1311 1312 return ips->gfx_power + state2; 1313 } 1314 1315 static bool has_busy_stats(struct intel_rps *rps) 1316 { 1317 struct intel_engine_cs *engine; 1318 enum intel_engine_id id; 1319 1320 for_each_engine(engine, rps_to_gt(rps), id) { 1321 if (!intel_engine_supports_stats(engine)) 1322 return false; 1323 } 1324 1325 return true; 1326 } 1327 1328 void intel_rps_enable(struct intel_rps *rps) 1329 { 1330 struct drm_i915_private *i915 = rps_to_i915(rps); 1331 struct intel_uncore *uncore = rps_to_uncore(rps); 1332 bool enabled = false; 1333 1334 if (!HAS_RPS(i915)) 1335 return; 1336 1337 intel_gt_check_clock_frequency(rps_to_gt(rps)); 1338 1339 intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL); 1340 if (rps->max_freq <= rps->min_freq) 1341 /* leave disabled, no room for dynamic reclocking */; 1342 else if (IS_CHERRYVIEW(i915)) 1343 enabled = chv_rps_enable(rps); 1344 else if (IS_VALLEYVIEW(i915)) 1345 enabled = vlv_rps_enable(rps); 1346 else if (INTEL_GEN(i915) >= 9) 1347 enabled = gen9_rps_enable(rps); 1348 else if (INTEL_GEN(i915) >= 8) 1349 enabled = gen8_rps_enable(rps); 1350 else if (INTEL_GEN(i915) >= 6) 1351 enabled = gen6_rps_enable(rps); 1352 else if (IS_IRONLAKE_M(i915)) 1353 enabled = gen5_rps_enable(rps); 1354 else 1355 MISSING_CASE(INTEL_GEN(i915)); 1356 intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL); 1357 if (!enabled) 1358 return; 1359 1360 GT_TRACE(rps_to_gt(rps), 1361 "min:%x, max:%x, freq:[%d, %d]\n", 1362 rps->min_freq, rps->max_freq, 1363 intel_gpu_freq(rps, rps->min_freq), 1364 intel_gpu_freq(rps, rps->max_freq)); 1365 1366 GEM_BUG_ON(rps->max_freq < rps->min_freq); 1367 GEM_BUG_ON(rps->idle_freq > rps->max_freq); 1368 1369 GEM_BUG_ON(rps->efficient_freq < rps->min_freq); 1370 GEM_BUG_ON(rps->efficient_freq > rps->max_freq); 1371 1372 if (has_busy_stats(rps)) 1373 intel_rps_set_timer(rps); 1374 else if (INTEL_GEN(i915) >= 6) 1375 intel_rps_set_interrupts(rps); 1376 else 1377 /* Ironlake currently uses intel_ips.ko */ {} 1378 1379 intel_rps_set_enabled(rps); 1380 } 1381 1382 static void gen6_rps_disable(struct intel_rps *rps) 1383 { 1384 set(rps_to_uncore(rps), GEN6_RP_CONTROL, 0); 1385 } 1386 1387 void intel_rps_disable(struct intel_rps *rps) 1388 { 1389 struct drm_i915_private *i915 = rps_to_i915(rps); 1390 1391 intel_rps_clear_enabled(rps); 1392 intel_rps_clear_interrupts(rps); 1393 intel_rps_clear_timer(rps); 1394 1395 if (INTEL_GEN(i915) >= 6) 1396 gen6_rps_disable(rps); 1397 else if (IS_IRONLAKE_M(i915)) 1398 gen5_rps_disable(rps); 1399 } 1400 1401 static int byt_gpu_freq(struct intel_rps *rps, int val) 1402 { 1403 /* 1404 * N = val - 0xb7 1405 * Slow = Fast = GPLL ref * N 1406 */ 1407 return DIV_ROUND_CLOSEST(rps->gpll_ref_freq * (val - 0xb7), 1000); 1408 } 1409 1410 static int byt_freq_opcode(struct intel_rps *rps, int val) 1411 { 1412 return DIV_ROUND_CLOSEST(1000 * val, rps->gpll_ref_freq) + 0xb7; 1413 } 1414 1415 static int chv_gpu_freq(struct intel_rps *rps, int val) 1416 { 1417 /* 1418 * N = val / 2 1419 * CU (slow) = CU2x (fast) / 2 = GPLL ref * N / 2 1420 */ 1421 return DIV_ROUND_CLOSEST(rps->gpll_ref_freq * val, 2 * 2 * 1000); 1422 } 1423 1424 static int chv_freq_opcode(struct intel_rps *rps, int val) 1425 { 1426 /* CHV needs even values */ 1427 return DIV_ROUND_CLOSEST(2 * 1000 * val, rps->gpll_ref_freq) * 2; 1428 } 1429 1430 int intel_gpu_freq(struct intel_rps *rps, int val) 1431 { 1432 struct drm_i915_private *i915 = rps_to_i915(rps); 1433 1434 if (INTEL_GEN(i915) >= 9) 1435 return DIV_ROUND_CLOSEST(val * GT_FREQUENCY_MULTIPLIER, 1436 GEN9_FREQ_SCALER); 1437 else if (IS_CHERRYVIEW(i915)) 1438 return chv_gpu_freq(rps, val); 1439 else if (IS_VALLEYVIEW(i915)) 1440 return byt_gpu_freq(rps, val); 1441 else 1442 return val * GT_FREQUENCY_MULTIPLIER; 1443 } 1444 1445 int intel_freq_opcode(struct intel_rps *rps, int val) 1446 { 1447 struct drm_i915_private *i915 = rps_to_i915(rps); 1448 1449 if (INTEL_GEN(i915) >= 9) 1450 return DIV_ROUND_CLOSEST(val * GEN9_FREQ_SCALER, 1451 GT_FREQUENCY_MULTIPLIER); 1452 else if (IS_CHERRYVIEW(i915)) 1453 return chv_freq_opcode(rps, val); 1454 else if (IS_VALLEYVIEW(i915)) 1455 return byt_freq_opcode(rps, val); 1456 else 1457 return DIV_ROUND_CLOSEST(val, GT_FREQUENCY_MULTIPLIER); 1458 } 1459 1460 static void vlv_init_gpll_ref_freq(struct intel_rps *rps) 1461 { 1462 struct drm_i915_private *i915 = rps_to_i915(rps); 1463 1464 rps->gpll_ref_freq = 1465 vlv_get_cck_clock(i915, "GPLL ref", 1466 CCK_GPLL_CLOCK_CONTROL, 1467 i915->czclk_freq); 1468 1469 drm_dbg(&i915->drm, "GPLL reference freq: %d kHz\n", 1470 rps->gpll_ref_freq); 1471 } 1472 1473 static void vlv_rps_init(struct intel_rps *rps) 1474 { 1475 struct drm_i915_private *i915 = rps_to_i915(rps); 1476 u32 val; 1477 1478 vlv_iosf_sb_get(i915, 1479 BIT(VLV_IOSF_SB_PUNIT) | 1480 BIT(VLV_IOSF_SB_NC) | 1481 BIT(VLV_IOSF_SB_CCK)); 1482 1483 vlv_init_gpll_ref_freq(rps); 1484 1485 val = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS); 1486 switch ((val >> 6) & 3) { 1487 case 0: 1488 case 1: 1489 i915->mem_freq = 800; 1490 break; 1491 case 2: 1492 i915->mem_freq = 1066; 1493 break; 1494 case 3: 1495 i915->mem_freq = 1333; 1496 break; 1497 } 1498 drm_dbg(&i915->drm, "DDR speed: %d MHz\n", i915->mem_freq); 1499 1500 rps->max_freq = vlv_rps_max_freq(rps); 1501 rps->rp0_freq = rps->max_freq; 1502 drm_dbg(&i915->drm, "max GPU freq: %d MHz (%u)\n", 1503 intel_gpu_freq(rps, rps->max_freq), rps->max_freq); 1504 1505 rps->efficient_freq = vlv_rps_rpe_freq(rps); 1506 drm_dbg(&i915->drm, "RPe GPU freq: %d MHz (%u)\n", 1507 intel_gpu_freq(rps, rps->efficient_freq), rps->efficient_freq); 1508 1509 rps->rp1_freq = vlv_rps_guar_freq(rps); 1510 drm_dbg(&i915->drm, "RP1(Guar Freq) GPU freq: %d MHz (%u)\n", 1511 intel_gpu_freq(rps, rps->rp1_freq), rps->rp1_freq); 1512 1513 rps->min_freq = vlv_rps_min_freq(rps); 1514 drm_dbg(&i915->drm, "min GPU freq: %d MHz (%u)\n", 1515 intel_gpu_freq(rps, rps->min_freq), rps->min_freq); 1516 1517 vlv_iosf_sb_put(i915, 1518 BIT(VLV_IOSF_SB_PUNIT) | 1519 BIT(VLV_IOSF_SB_NC) | 1520 BIT(VLV_IOSF_SB_CCK)); 1521 } 1522 1523 static void chv_rps_init(struct intel_rps *rps) 1524 { 1525 struct drm_i915_private *i915 = rps_to_i915(rps); 1526 u32 val; 1527 1528 vlv_iosf_sb_get(i915, 1529 BIT(VLV_IOSF_SB_PUNIT) | 1530 BIT(VLV_IOSF_SB_NC) | 1531 BIT(VLV_IOSF_SB_CCK)); 1532 1533 vlv_init_gpll_ref_freq(rps); 1534 1535 val = vlv_cck_read(i915, CCK_FUSE_REG); 1536 1537 switch ((val >> 2) & 0x7) { 1538 case 3: 1539 i915->mem_freq = 2000; 1540 break; 1541 default: 1542 i915->mem_freq = 1600; 1543 break; 1544 } 1545 drm_dbg(&i915->drm, "DDR speed: %d MHz\n", i915->mem_freq); 1546 1547 rps->max_freq = chv_rps_max_freq(rps); 1548 rps->rp0_freq = rps->max_freq; 1549 drm_dbg(&i915->drm, "max GPU freq: %d MHz (%u)\n", 1550 intel_gpu_freq(rps, rps->max_freq), rps->max_freq); 1551 1552 rps->efficient_freq = chv_rps_rpe_freq(rps); 1553 drm_dbg(&i915->drm, "RPe GPU freq: %d MHz (%u)\n", 1554 intel_gpu_freq(rps, rps->efficient_freq), rps->efficient_freq); 1555 1556 rps->rp1_freq = chv_rps_guar_freq(rps); 1557 drm_dbg(&i915->drm, "RP1(Guar) GPU freq: %d MHz (%u)\n", 1558 intel_gpu_freq(rps, rps->rp1_freq), rps->rp1_freq); 1559 1560 rps->min_freq = chv_rps_min_freq(rps); 1561 drm_dbg(&i915->drm, "min GPU freq: %d MHz (%u)\n", 1562 intel_gpu_freq(rps, rps->min_freq), rps->min_freq); 1563 1564 vlv_iosf_sb_put(i915, 1565 BIT(VLV_IOSF_SB_PUNIT) | 1566 BIT(VLV_IOSF_SB_NC) | 1567 BIT(VLV_IOSF_SB_CCK)); 1568 1569 drm_WARN_ONCE(&i915->drm, (rps->max_freq | rps->efficient_freq | 1570 rps->rp1_freq | rps->min_freq) & 1, 1571 "Odd GPU freq values\n"); 1572 } 1573 1574 static void vlv_c0_read(struct intel_uncore *uncore, struct intel_rps_ei *ei) 1575 { 1576 ei->ktime = ktime_get_raw(); 1577 ei->render_c0 = intel_uncore_read(uncore, VLV_RENDER_C0_COUNT); 1578 ei->media_c0 = intel_uncore_read(uncore, VLV_MEDIA_C0_COUNT); 1579 } 1580 1581 static u32 vlv_wa_c0_ei(struct intel_rps *rps, u32 pm_iir) 1582 { 1583 struct intel_uncore *uncore = rps_to_uncore(rps); 1584 const struct intel_rps_ei *prev = &rps->ei; 1585 struct intel_rps_ei now; 1586 u32 events = 0; 1587 1588 if ((pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) == 0) 1589 return 0; 1590 1591 vlv_c0_read(uncore, &now); 1592 1593 #ifdef __linux__ 1594 if (prev->ktime) { 1595 #else 1596 if (ktime_to_ns(prev->ktime)) { 1597 #endif 1598 u64 time, c0; 1599 u32 render, media; 1600 1601 time = ktime_us_delta(now.ktime, prev->ktime); 1602 1603 time *= rps_to_i915(rps)->czclk_freq; 1604 1605 /* Workload can be split between render + media, 1606 * e.g. SwapBuffers being blitted in X after being rendered in 1607 * mesa. To account for this we need to combine both engines 1608 * into our activity counter. 1609 */ 1610 render = now.render_c0 - prev->render_c0; 1611 media = now.media_c0 - prev->media_c0; 1612 c0 = max(render, media); 1613 c0 *= 1000 * 100 << 8; /* to usecs and scale to threshold% */ 1614 1615 if (c0 > time * rps->power.up_threshold) 1616 events = GEN6_PM_RP_UP_THRESHOLD; 1617 else if (c0 < time * rps->power.down_threshold) 1618 events = GEN6_PM_RP_DOWN_THRESHOLD; 1619 } 1620 1621 rps->ei = now; 1622 return events; 1623 } 1624 1625 static void rps_work(struct work_struct *work) 1626 { 1627 struct intel_rps *rps = container_of(work, typeof(*rps), work); 1628 struct intel_gt *gt = rps_to_gt(rps); 1629 struct drm_i915_private *i915 = rps_to_i915(rps); 1630 bool client_boost = false; 1631 int new_freq, adj, min, max; 1632 u32 pm_iir = 0; 1633 1634 spin_lock_irq(>->irq_lock); 1635 pm_iir = fetch_and_zero(&rps->pm_iir) & rps->pm_events; 1636 client_boost = atomic_read(&rps->num_waiters); 1637 spin_unlock_irq(>->irq_lock); 1638 1639 /* Make sure we didn't queue anything we're not going to process. */ 1640 if (!pm_iir && !client_boost) 1641 goto out; 1642 1643 mutex_lock(&rps->lock); 1644 if (!intel_rps_is_active(rps)) { 1645 mutex_unlock(&rps->lock); 1646 return; 1647 } 1648 1649 pm_iir |= vlv_wa_c0_ei(rps, pm_iir); 1650 1651 adj = rps->last_adj; 1652 new_freq = rps->cur_freq; 1653 min = rps->min_freq_softlimit; 1654 max = rps->max_freq_softlimit; 1655 if (client_boost) 1656 max = rps->max_freq; 1657 1658 GT_TRACE(gt, 1659 "pm_iir:%x, client_boost:%s, last:%d, cur:%x, min:%x, max:%x\n", 1660 pm_iir, yesno(client_boost), 1661 adj, new_freq, min, max); 1662 1663 if (client_boost && new_freq < rps->boost_freq) { 1664 new_freq = rps->boost_freq; 1665 adj = 0; 1666 } else if (pm_iir & GEN6_PM_RP_UP_THRESHOLD) { 1667 if (adj > 0) 1668 adj *= 2; 1669 else /* CHV needs even encode values */ 1670 adj = IS_CHERRYVIEW(gt->i915) ? 2 : 1; 1671 1672 if (new_freq >= rps->max_freq_softlimit) 1673 adj = 0; 1674 } else if (client_boost) { 1675 adj = 0; 1676 } else if (pm_iir & GEN6_PM_RP_DOWN_TIMEOUT) { 1677 if (rps->cur_freq > rps->efficient_freq) 1678 new_freq = rps->efficient_freq; 1679 else if (rps->cur_freq > rps->min_freq_softlimit) 1680 new_freq = rps->min_freq_softlimit; 1681 adj = 0; 1682 } else if (pm_iir & GEN6_PM_RP_DOWN_THRESHOLD) { 1683 if (adj < 0) 1684 adj *= 2; 1685 else /* CHV needs even encode values */ 1686 adj = IS_CHERRYVIEW(gt->i915) ? -2 : -1; 1687 1688 if (new_freq <= rps->min_freq_softlimit) 1689 adj = 0; 1690 } else { /* unknown event */ 1691 adj = 0; 1692 } 1693 1694 /* 1695 * sysfs frequency limits may have snuck in while 1696 * servicing the interrupt 1697 */ 1698 new_freq += adj; 1699 new_freq = clamp_t(int, new_freq, min, max); 1700 1701 if (intel_rps_set(rps, new_freq)) { 1702 drm_dbg(&i915->drm, "Failed to set new GPU frequency\n"); 1703 adj = 0; 1704 } 1705 rps->last_adj = adj; 1706 1707 mutex_unlock(&rps->lock); 1708 1709 out: 1710 spin_lock_irq(>->irq_lock); 1711 gen6_gt_pm_unmask_irq(gt, rps->pm_events); 1712 spin_unlock_irq(>->irq_lock); 1713 } 1714 1715 void gen11_rps_irq_handler(struct intel_rps *rps, u32 pm_iir) 1716 { 1717 struct intel_gt *gt = rps_to_gt(rps); 1718 const u32 events = rps->pm_events & pm_iir; 1719 1720 lockdep_assert_held(>->irq_lock); 1721 1722 if (unlikely(!events)) 1723 return; 1724 1725 GT_TRACE(gt, "irq events:%x\n", events); 1726 1727 gen6_gt_pm_mask_irq(gt, events); 1728 1729 rps->pm_iir |= events; 1730 schedule_work(&rps->work); 1731 } 1732 1733 void gen6_rps_irq_handler(struct intel_rps *rps, u32 pm_iir) 1734 { 1735 struct intel_gt *gt = rps_to_gt(rps); 1736 u32 events; 1737 1738 events = pm_iir & rps->pm_events; 1739 if (events) { 1740 spin_lock(>->irq_lock); 1741 1742 GT_TRACE(gt, "irq events:%x\n", events); 1743 1744 gen6_gt_pm_mask_irq(gt, events); 1745 rps->pm_iir |= events; 1746 1747 schedule_work(&rps->work); 1748 spin_unlock(>->irq_lock); 1749 } 1750 1751 if (INTEL_GEN(gt->i915) >= 8) 1752 return; 1753 1754 if (pm_iir & PM_VEBOX_USER_INTERRUPT) 1755 intel_engine_signal_breadcrumbs(gt->engine[VECS0]); 1756 1757 if (pm_iir & PM_VEBOX_CS_ERROR_INTERRUPT) 1758 DRM_DEBUG("Command parser error, pm_iir 0x%08x\n", pm_iir); 1759 } 1760 1761 void gen5_rps_irq_handler(struct intel_rps *rps) 1762 { 1763 struct intel_uncore *uncore = rps_to_uncore(rps); 1764 u32 busy_up, busy_down, max_avg, min_avg; 1765 u8 new_freq; 1766 1767 spin_lock(&mchdev_lock); 1768 1769 intel_uncore_write16(uncore, 1770 MEMINTRSTS, 1771 intel_uncore_read(uncore, MEMINTRSTS)); 1772 1773 intel_uncore_write16(uncore, MEMINTRSTS, MEMINT_EVAL_CHG); 1774 busy_up = intel_uncore_read(uncore, RCPREVBSYTUPAVG); 1775 busy_down = intel_uncore_read(uncore, RCPREVBSYTDNAVG); 1776 max_avg = intel_uncore_read(uncore, RCBMAXAVG); 1777 min_avg = intel_uncore_read(uncore, RCBMINAVG); 1778 1779 /* Handle RCS change request from hw */ 1780 new_freq = rps->cur_freq; 1781 if (busy_up > max_avg) 1782 new_freq++; 1783 else if (busy_down < min_avg) 1784 new_freq--; 1785 new_freq = clamp(new_freq, 1786 rps->min_freq_softlimit, 1787 rps->max_freq_softlimit); 1788 1789 if (new_freq != rps->cur_freq && gen5_rps_set(rps, new_freq)) 1790 rps->cur_freq = new_freq; 1791 1792 spin_unlock(&mchdev_lock); 1793 } 1794 1795 void intel_rps_init_early(struct intel_rps *rps) 1796 { 1797 rw_init(&rps->lock, "rpslk"); 1798 rw_init(&rps->power.mutex, "rpspwr"); 1799 1800 INIT_WORK(&rps->work, rps_work); 1801 #ifdef __linux__ 1802 timer_setup(&rps->timer, rps_timer, 0); 1803 #else 1804 timeout_set(&rps->timer, rps_timer, rps); 1805 #endif 1806 1807 atomic_set(&rps->num_waiters, 0); 1808 } 1809 1810 void intel_rps_init(struct intel_rps *rps) 1811 { 1812 struct drm_i915_private *i915 = rps_to_i915(rps); 1813 1814 if (IS_CHERRYVIEW(i915)) 1815 chv_rps_init(rps); 1816 else if (IS_VALLEYVIEW(i915)) 1817 vlv_rps_init(rps); 1818 else if (INTEL_GEN(i915) >= 6) 1819 gen6_rps_init(rps); 1820 else if (IS_IRONLAKE_M(i915)) 1821 gen5_rps_init(rps); 1822 1823 /* Derive initial user preferences/limits from the hardware limits */ 1824 rps->max_freq_softlimit = rps->max_freq; 1825 rps->min_freq_softlimit = rps->min_freq; 1826 1827 /* After setting max-softlimit, find the overclock max freq */ 1828 if (IS_GEN(i915, 6) || IS_IVYBRIDGE(i915) || IS_HASWELL(i915)) { 1829 u32 params = 0; 1830 1831 sandybridge_pcode_read(i915, GEN6_READ_OC_PARAMS, 1832 ¶ms, NULL); 1833 if (params & BIT(31)) { /* OC supported */ 1834 drm_dbg(&i915->drm, 1835 "Overclocking supported, max: %dMHz, overclock: %dMHz\n", 1836 (rps->max_freq & 0xff) * 50, 1837 (params & 0xff) * 50); 1838 rps->max_freq = params & 0xff; 1839 } 1840 } 1841 1842 /* Finally allow us to boost to max by default */ 1843 rps->boost_freq = rps->max_freq; 1844 rps->idle_freq = rps->min_freq; 1845 1846 /* Start in the middle, from here we will autotune based on workload */ 1847 rps->cur_freq = rps->efficient_freq; 1848 1849 rps->pm_intrmsk_mbz = 0; 1850 1851 /* 1852 * SNB,IVB,HSW can while VLV,CHV may hard hang on looping batchbuffer 1853 * if GEN6_PM_UP_EI_EXPIRED is masked. 1854 * 1855 * TODO: verify if this can be reproduced on VLV,CHV. 1856 */ 1857 if (INTEL_GEN(i915) <= 7) 1858 rps->pm_intrmsk_mbz |= GEN6_PM_RP_UP_EI_EXPIRED; 1859 1860 if (INTEL_GEN(i915) >= 8 && INTEL_GEN(i915) < 11) 1861 rps->pm_intrmsk_mbz |= GEN8_PMINTR_DISABLE_REDIRECT_TO_GUC; 1862 } 1863 1864 void intel_rps_sanitize(struct intel_rps *rps) 1865 { 1866 if (INTEL_GEN(rps_to_i915(rps)) >= 6) 1867 rps_disable_interrupts(rps); 1868 } 1869 1870 u32 intel_rps_get_cagf(struct intel_rps *rps, u32 rpstat) 1871 { 1872 struct drm_i915_private *i915 = rps_to_i915(rps); 1873 u32 cagf; 1874 1875 if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) 1876 cagf = (rpstat >> 8) & 0xff; 1877 else if (INTEL_GEN(i915) >= 9) 1878 cagf = (rpstat & GEN9_CAGF_MASK) >> GEN9_CAGF_SHIFT; 1879 else if (IS_HASWELL(i915) || IS_BROADWELL(i915)) 1880 cagf = (rpstat & HSW_CAGF_MASK) >> HSW_CAGF_SHIFT; 1881 else 1882 cagf = (rpstat & GEN6_CAGF_MASK) >> GEN6_CAGF_SHIFT; 1883 1884 return cagf; 1885 } 1886 1887 static u32 read_cagf(struct intel_rps *rps) 1888 { 1889 struct drm_i915_private *i915 = rps_to_i915(rps); 1890 u32 freq; 1891 1892 if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) { 1893 vlv_punit_get(i915); 1894 freq = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS); 1895 vlv_punit_put(i915); 1896 } else { 1897 freq = intel_uncore_read(rps_to_uncore(rps), GEN6_RPSTAT1); 1898 } 1899 1900 return intel_rps_get_cagf(rps, freq); 1901 } 1902 1903 u32 intel_rps_read_actual_frequency(struct intel_rps *rps) 1904 { 1905 struct intel_runtime_pm *rpm = rps_to_uncore(rps)->rpm; 1906 intel_wakeref_t wakeref; 1907 u32 freq = 0; 1908 1909 with_intel_runtime_pm_if_in_use(rpm, wakeref) 1910 freq = intel_gpu_freq(rps, read_cagf(rps)); 1911 1912 return freq; 1913 } 1914 1915 /* External interface for intel_ips.ko */ 1916 1917 static struct drm_i915_private __rcu *ips_mchdev; 1918 1919 /** 1920 * Tells the intel_ips driver that the i915 driver is now loaded, if 1921 * IPS got loaded first. 1922 * 1923 * This awkward dance is so that neither module has to depend on the 1924 * other in order for IPS to do the appropriate communication of 1925 * GPU turbo limits to i915. 1926 */ 1927 static void 1928 ips_ping_for_i915_load(void) 1929 { 1930 #ifdef __linux__ 1931 void (*link)(void); 1932 1933 link = symbol_get(ips_link_to_i915_driver); 1934 if (link) { 1935 link(); 1936 symbol_put(ips_link_to_i915_driver); 1937 } 1938 #endif 1939 } 1940 1941 void intel_rps_driver_register(struct intel_rps *rps) 1942 { 1943 struct intel_gt *gt = rps_to_gt(rps); 1944 1945 /* 1946 * We only register the i915 ips part with intel-ips once everything is 1947 * set up, to avoid intel-ips sneaking in and reading bogus values. 1948 */ 1949 if (IS_GEN(gt->i915, 5)) { 1950 GEM_BUG_ON(ips_mchdev); 1951 rcu_assign_pointer(ips_mchdev, gt->i915); 1952 ips_ping_for_i915_load(); 1953 } 1954 } 1955 1956 void intel_rps_driver_unregister(struct intel_rps *rps) 1957 { 1958 if (rcu_access_pointer(ips_mchdev) == rps_to_i915(rps)) 1959 rcu_assign_pointer(ips_mchdev, NULL); 1960 } 1961 1962 static struct drm_i915_private *mchdev_get(void) 1963 { 1964 struct drm_i915_private *i915; 1965 1966 rcu_read_lock(); 1967 i915 = rcu_dereference(ips_mchdev); 1968 if (!kref_get_unless_zero(&i915->drm.ref)) 1969 i915 = NULL; 1970 rcu_read_unlock(); 1971 1972 return i915; 1973 } 1974 1975 /** 1976 * i915_read_mch_val - return value for IPS use 1977 * 1978 * Calculate and return a value for the IPS driver to use when deciding whether 1979 * we have thermal and power headroom to increase CPU or GPU power budget. 1980 */ 1981 unsigned long i915_read_mch_val(void) 1982 { 1983 struct drm_i915_private *i915; 1984 unsigned long chipset_val = 0; 1985 unsigned long graphics_val = 0; 1986 intel_wakeref_t wakeref; 1987 1988 i915 = mchdev_get(); 1989 if (!i915) 1990 return 0; 1991 1992 with_intel_runtime_pm(&i915->runtime_pm, wakeref) { 1993 struct intel_ips *ips = &i915->gt.rps.ips; 1994 1995 spin_lock_irq(&mchdev_lock); 1996 chipset_val = __ips_chipset_val(ips); 1997 graphics_val = __ips_gfx_val(ips); 1998 spin_unlock_irq(&mchdev_lock); 1999 } 2000 2001 drm_dev_put(&i915->drm); 2002 return chipset_val + graphics_val; 2003 } 2004 EXPORT_SYMBOL_GPL(i915_read_mch_val); 2005 2006 /** 2007 * i915_gpu_raise - raise GPU frequency limit 2008 * 2009 * Raise the limit; IPS indicates we have thermal headroom. 2010 */ 2011 bool i915_gpu_raise(void) 2012 { 2013 struct drm_i915_private *i915; 2014 struct intel_rps *rps; 2015 2016 i915 = mchdev_get(); 2017 if (!i915) 2018 return false; 2019 2020 rps = &i915->gt.rps; 2021 2022 spin_lock_irq(&mchdev_lock); 2023 if (rps->max_freq_softlimit < rps->max_freq) 2024 rps->max_freq_softlimit++; 2025 spin_unlock_irq(&mchdev_lock); 2026 2027 drm_dev_put(&i915->drm); 2028 return true; 2029 } 2030 EXPORT_SYMBOL_GPL(i915_gpu_raise); 2031 2032 /** 2033 * i915_gpu_lower - lower GPU frequency limit 2034 * 2035 * IPS indicates we're close to a thermal limit, so throttle back the GPU 2036 * frequency maximum. 2037 */ 2038 bool i915_gpu_lower(void) 2039 { 2040 struct drm_i915_private *i915; 2041 struct intel_rps *rps; 2042 2043 i915 = mchdev_get(); 2044 if (!i915) 2045 return false; 2046 2047 rps = &i915->gt.rps; 2048 2049 spin_lock_irq(&mchdev_lock); 2050 if (rps->max_freq_softlimit > rps->min_freq) 2051 rps->max_freq_softlimit--; 2052 spin_unlock_irq(&mchdev_lock); 2053 2054 drm_dev_put(&i915->drm); 2055 return true; 2056 } 2057 EXPORT_SYMBOL_GPL(i915_gpu_lower); 2058 2059 /** 2060 * i915_gpu_busy - indicate GPU business to IPS 2061 * 2062 * Tell the IPS driver whether or not the GPU is busy. 2063 */ 2064 bool i915_gpu_busy(void) 2065 { 2066 struct drm_i915_private *i915; 2067 bool ret; 2068 2069 i915 = mchdev_get(); 2070 if (!i915) 2071 return false; 2072 2073 ret = i915->gt.awake; 2074 2075 drm_dev_put(&i915->drm); 2076 return ret; 2077 } 2078 EXPORT_SYMBOL_GPL(i915_gpu_busy); 2079 2080 /** 2081 * i915_gpu_turbo_disable - disable graphics turbo 2082 * 2083 * Disable graphics turbo by resetting the max frequency and setting the 2084 * current frequency to the default. 2085 */ 2086 bool i915_gpu_turbo_disable(void) 2087 { 2088 struct drm_i915_private *i915; 2089 struct intel_rps *rps; 2090 bool ret; 2091 2092 i915 = mchdev_get(); 2093 if (!i915) 2094 return false; 2095 2096 rps = &i915->gt.rps; 2097 2098 spin_lock_irq(&mchdev_lock); 2099 rps->max_freq_softlimit = rps->min_freq; 2100 ret = gen5_rps_set(&i915->gt.rps, rps->min_freq); 2101 spin_unlock_irq(&mchdev_lock); 2102 2103 drm_dev_put(&i915->drm); 2104 return ret; 2105 } 2106 EXPORT_SYMBOL_GPL(i915_gpu_turbo_disable); 2107 2108 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) 2109 #include "selftest_rps.c" 2110 #endif 2111