1 /* 2 * Copyright © 2012 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Eugeni Dodonov <eugeni.dodonov@intel.com> 25 * 26 */ 27 28 #include <linux/cpufreq.h> 29 #include <drm/drm_plane_helper.h> 30 #include "i915_drv.h" 31 #include "intel_drv.h" 32 #include <linux/module.h> 33 #include <drm/drm_atomic_helper.h> 34 35 /** 36 * DOC: RC6 37 * 38 * RC6 is a special power stage which allows the GPU to enter an very 39 * low-voltage mode when idle, using down to 0V while at this stage. This 40 * stage is entered automatically when the GPU is idle when RC6 support is 41 * enabled, and as soon as new workload arises GPU wakes up automatically as well. 42 * 43 * There are different RC6 modes available in Intel GPU, which differentiate 44 * among each other with the latency required to enter and leave RC6 and 45 * voltage consumed by the GPU in different states. 46 * 47 * The combination of the following flags define which states GPU is allowed 48 * to enter, while RC6 is the normal RC6 state, RC6p is the deep RC6, and 49 * RC6pp is deepest RC6. Their support by hardware varies according to the 50 * GPU, BIOS, chipset and platform. RC6 is usually the safest one and the one 51 * which brings the most power savings; deeper states save more power, but 52 * require higher latency to switch to and wake up. 53 */ 54 #define INTEL_RC6_ENABLE (1<<0) 55 #define INTEL_RC6p_ENABLE (1<<1) 56 #define INTEL_RC6pp_ENABLE (1<<2) 57 58 static void gen9_init_clock_gating(struct drm_i915_private *dev_priv) 59 { 60 /* See Bspec note for PSR2_CTL bit 31, Wa#828:skl,bxt,kbl */ 61 I915_WRITE(CHICKEN_PAR1_1, 62 I915_READ(CHICKEN_PAR1_1) | SKL_EDP_PSR_FIX_RDWRAP); 63 64 I915_WRITE(GEN8_CONFIG0, 65 I915_READ(GEN8_CONFIG0) | GEN9_DEFAULT_FIXES); 66 67 /* WaEnableChickenDCPR:skl,bxt,kbl,glk */ 68 I915_WRITE(GEN8_CHICKEN_DCPR_1, 69 I915_READ(GEN8_CHICKEN_DCPR_1) | MASK_WAKEMEM); 70 71 /* WaFbcTurnOffFbcWatermark:skl,bxt,kbl */ 72 /* WaFbcWakeMemOn:skl,bxt,kbl,glk */ 73 I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) | 74 DISP_FBC_WM_DIS | 75 DISP_FBC_MEMORY_WAKE); 76 77 /* WaFbcHighMemBwCorruptionAvoidance:skl,bxt,kbl */ 78 I915_WRITE(ILK_DPFC_CHICKEN, I915_READ(ILK_DPFC_CHICKEN) | 79 ILK_DPFC_DISABLE_DUMMY0); 80 } 81 82 static void bxt_init_clock_gating(struct drm_i915_private *dev_priv) 83 { 84 gen9_init_clock_gating(dev_priv); 85 86 /* WaDisableSDEUnitClockGating:bxt */ 87 I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) | 88 GEN8_SDEUNIT_CLOCK_GATE_DISABLE); 89 90 /* 91 * FIXME: 92 * GEN8_HDCUNIT_CLOCK_GATE_DISABLE_HDCREQ applies on 3x6 GT SKUs only. 93 */ 94 I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) | 95 GEN8_HDCUNIT_CLOCK_GATE_DISABLE_HDCREQ); 96 97 /* 98 * Wa: Backlight PWM may stop in the asserted state, causing backlight 99 * to stay fully on. 100 */ 101 I915_WRITE(GEN9_CLKGATE_DIS_0, I915_READ(GEN9_CLKGATE_DIS_0) | 102 PWM1_GATING_DIS | PWM2_GATING_DIS); 103 } 104 105 static void glk_init_clock_gating(struct drm_i915_private *dev_priv) 106 { 107 gen9_init_clock_gating(dev_priv); 108 109 /* 110 * WaDisablePWMClockGating:glk 111 * Backlight PWM may stop in the asserted state, causing backlight 112 * to stay fully on. 113 */ 114 I915_WRITE(GEN9_CLKGATE_DIS_0, I915_READ(GEN9_CLKGATE_DIS_0) | 115 PWM1_GATING_DIS | PWM2_GATING_DIS); 116 117 /* WaDDIIOTimeout:glk */ 118 if (IS_GLK_REVID(dev_priv, 0, GLK_REVID_A1)) { 119 u32 val = I915_READ(CHICKEN_MISC_2); 120 val &= ~(GLK_CL0_PWR_DOWN | 121 GLK_CL1_PWR_DOWN | 122 GLK_CL2_PWR_DOWN); 123 I915_WRITE(CHICKEN_MISC_2, val); 124 } 125 126 } 127 128 static void i915_pineview_get_mem_freq(struct drm_i915_private *dev_priv) 129 { 130 u32 tmp; 131 132 tmp = I915_READ(CLKCFG); 133 134 switch (tmp & CLKCFG_FSB_MASK) { 135 case CLKCFG_FSB_533: 136 dev_priv->fsb_freq = 533; /* 133*4 */ 137 break; 138 case CLKCFG_FSB_800: 139 dev_priv->fsb_freq = 800; /* 200*4 */ 140 break; 141 case CLKCFG_FSB_667: 142 dev_priv->fsb_freq = 667; /* 167*4 */ 143 break; 144 case CLKCFG_FSB_400: 145 dev_priv->fsb_freq = 400; /* 100*4 */ 146 break; 147 } 148 149 switch (tmp & CLKCFG_MEM_MASK) { 150 case CLKCFG_MEM_533: 151 dev_priv->mem_freq = 533; 152 break; 153 case CLKCFG_MEM_667: 154 dev_priv->mem_freq = 667; 155 break; 156 case CLKCFG_MEM_800: 157 dev_priv->mem_freq = 800; 158 break; 159 } 160 161 /* detect pineview DDR3 setting */ 162 tmp = I915_READ(CSHRDDR3CTL); 163 dev_priv->is_ddr3 = (tmp & CSHRDDR3CTL_DDR3) ? 1 : 0; 164 } 165 166 static void i915_ironlake_get_mem_freq(struct drm_i915_private *dev_priv) 167 { 168 u16 ddrpll, csipll; 169 170 ddrpll = I915_READ16(DDRMPLL1); 171 csipll = I915_READ16(CSIPLL0); 172 173 switch (ddrpll & 0xff) { 174 case 0xc: 175 dev_priv->mem_freq = 800; 176 break; 177 case 0x10: 178 dev_priv->mem_freq = 1066; 179 break; 180 case 0x14: 181 dev_priv->mem_freq = 1333; 182 break; 183 case 0x18: 184 dev_priv->mem_freq = 1600; 185 break; 186 default: 187 DRM_DEBUG_DRIVER("unknown memory frequency 0x%02x\n", 188 ddrpll & 0xff); 189 dev_priv->mem_freq = 0; 190 break; 191 } 192 193 dev_priv->ips.r_t = dev_priv->mem_freq; 194 195 switch (csipll & 0x3ff) { 196 case 0x00c: 197 dev_priv->fsb_freq = 3200; 198 break; 199 case 0x00e: 200 dev_priv->fsb_freq = 3733; 201 break; 202 case 0x010: 203 dev_priv->fsb_freq = 4266; 204 break; 205 case 0x012: 206 dev_priv->fsb_freq = 4800; 207 break; 208 case 0x014: 209 dev_priv->fsb_freq = 5333; 210 break; 211 case 0x016: 212 dev_priv->fsb_freq = 5866; 213 break; 214 case 0x018: 215 dev_priv->fsb_freq = 6400; 216 break; 217 default: 218 DRM_DEBUG_DRIVER("unknown fsb frequency 0x%04x\n", 219 csipll & 0x3ff); 220 dev_priv->fsb_freq = 0; 221 break; 222 } 223 224 if (dev_priv->fsb_freq == 3200) { 225 dev_priv->ips.c_m = 0; 226 } else if (dev_priv->fsb_freq > 3200 && dev_priv->fsb_freq <= 4800) { 227 dev_priv->ips.c_m = 1; 228 } else { 229 dev_priv->ips.c_m = 2; 230 } 231 } 232 233 static const struct cxsr_latency cxsr_latency_table[] = { 234 {1, 0, 800, 400, 3382, 33382, 3983, 33983}, /* DDR2-400 SC */ 235 {1, 0, 800, 667, 3354, 33354, 3807, 33807}, /* DDR2-667 SC */ 236 {1, 0, 800, 800, 3347, 33347, 3763, 33763}, /* DDR2-800 SC */ 237 {1, 1, 800, 667, 6420, 36420, 6873, 36873}, /* DDR3-667 SC */ 238 {1, 1, 800, 800, 5902, 35902, 6318, 36318}, /* DDR3-800 SC */ 239 240 {1, 0, 667, 400, 3400, 33400, 4021, 34021}, /* DDR2-400 SC */ 241 {1, 0, 667, 667, 3372, 33372, 3845, 33845}, /* DDR2-667 SC */ 242 {1, 0, 667, 800, 3386, 33386, 3822, 33822}, /* DDR2-800 SC */ 243 {1, 1, 667, 667, 6438, 36438, 6911, 36911}, /* DDR3-667 SC */ 244 {1, 1, 667, 800, 5941, 35941, 6377, 36377}, /* DDR3-800 SC */ 245 246 {1, 0, 400, 400, 3472, 33472, 4173, 34173}, /* DDR2-400 SC */ 247 {1, 0, 400, 667, 3443, 33443, 3996, 33996}, /* DDR2-667 SC */ 248 {1, 0, 400, 800, 3430, 33430, 3946, 33946}, /* DDR2-800 SC */ 249 {1, 1, 400, 667, 6509, 36509, 7062, 37062}, /* DDR3-667 SC */ 250 {1, 1, 400, 800, 5985, 35985, 6501, 36501}, /* DDR3-800 SC */ 251 252 {0, 0, 800, 400, 3438, 33438, 4065, 34065}, /* DDR2-400 SC */ 253 {0, 0, 800, 667, 3410, 33410, 3889, 33889}, /* DDR2-667 SC */ 254 {0, 0, 800, 800, 3403, 33403, 3845, 33845}, /* DDR2-800 SC */ 255 {0, 1, 800, 667, 6476, 36476, 6955, 36955}, /* DDR3-667 SC */ 256 {0, 1, 800, 800, 5958, 35958, 6400, 36400}, /* DDR3-800 SC */ 257 258 {0, 0, 667, 400, 3456, 33456, 4103, 34106}, /* DDR2-400 SC */ 259 {0, 0, 667, 667, 3428, 33428, 3927, 33927}, /* DDR2-667 SC */ 260 {0, 0, 667, 800, 3443, 33443, 3905, 33905}, /* DDR2-800 SC */ 261 {0, 1, 667, 667, 6494, 36494, 6993, 36993}, /* DDR3-667 SC */ 262 {0, 1, 667, 800, 5998, 35998, 6460, 36460}, /* DDR3-800 SC */ 263 264 {0, 0, 400, 400, 3528, 33528, 4255, 34255}, /* DDR2-400 SC */ 265 {0, 0, 400, 667, 3500, 33500, 4079, 34079}, /* DDR2-667 SC */ 266 {0, 0, 400, 800, 3487, 33487, 4029, 34029}, /* DDR2-800 SC */ 267 {0, 1, 400, 667, 6566, 36566, 7145, 37145}, /* DDR3-667 SC */ 268 {0, 1, 400, 800, 6042, 36042, 6584, 36584}, /* DDR3-800 SC */ 269 }; 270 271 static const struct cxsr_latency *intel_get_cxsr_latency(bool is_desktop, 272 bool is_ddr3, 273 int fsb, 274 int mem) 275 { 276 const struct cxsr_latency *latency; 277 int i; 278 279 if (fsb == 0 || mem == 0) 280 return NULL; 281 282 for (i = 0; i < ARRAY_SIZE(cxsr_latency_table); i++) { 283 latency = &cxsr_latency_table[i]; 284 if (is_desktop == latency->is_desktop && 285 is_ddr3 == latency->is_ddr3 && 286 fsb == latency->fsb_freq && mem == latency->mem_freq) 287 return latency; 288 } 289 290 DRM_DEBUG_KMS("Unknown FSB/MEM found, disable CxSR\n"); 291 292 return NULL; 293 } 294 295 static void chv_set_memory_dvfs(struct drm_i915_private *dev_priv, bool enable) 296 { 297 u32 val; 298 299 mutex_lock(&dev_priv->rps.hw_lock); 300 301 val = vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2); 302 if (enable) 303 val &= ~FORCE_DDR_HIGH_FREQ; 304 else 305 val |= FORCE_DDR_HIGH_FREQ; 306 val &= ~FORCE_DDR_LOW_FREQ; 307 val |= FORCE_DDR_FREQ_REQ_ACK; 308 vlv_punit_write(dev_priv, PUNIT_REG_DDR_SETUP2, val); 309 310 if (wait_for((vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2) & 311 FORCE_DDR_FREQ_REQ_ACK) == 0, 3)) 312 DRM_ERROR("timed out waiting for Punit DDR DVFS request\n"); 313 314 mutex_unlock(&dev_priv->rps.hw_lock); 315 } 316 317 static void chv_set_memory_pm5(struct drm_i915_private *dev_priv, bool enable) 318 { 319 u32 val; 320 321 mutex_lock(&dev_priv->rps.hw_lock); 322 323 val = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ); 324 if (enable) 325 val |= DSP_MAXFIFO_PM5_ENABLE; 326 else 327 val &= ~DSP_MAXFIFO_PM5_ENABLE; 328 vlv_punit_write(dev_priv, PUNIT_REG_DSPFREQ, val); 329 330 mutex_unlock(&dev_priv->rps.hw_lock); 331 } 332 333 #define FW_WM(value, plane) \ 334 (((value) << DSPFW_ ## plane ## _SHIFT) & DSPFW_ ## plane ## _MASK) 335 336 static bool _intel_set_memory_cxsr(struct drm_i915_private *dev_priv, bool enable) 337 { 338 bool was_enabled; 339 u32 val; 340 341 if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) { 342 was_enabled = I915_READ(FW_BLC_SELF_VLV) & FW_CSPWRDWNEN; 343 I915_WRITE(FW_BLC_SELF_VLV, enable ? FW_CSPWRDWNEN : 0); 344 POSTING_READ(FW_BLC_SELF_VLV); 345 } else if (IS_G4X(dev_priv) || IS_I965GM(dev_priv)) { 346 was_enabled = I915_READ(FW_BLC_SELF) & FW_BLC_SELF_EN; 347 I915_WRITE(FW_BLC_SELF, enable ? FW_BLC_SELF_EN : 0); 348 POSTING_READ(FW_BLC_SELF); 349 } else if (IS_PINEVIEW(dev_priv)) { 350 val = I915_READ(DSPFW3); 351 was_enabled = val & PINEVIEW_SELF_REFRESH_EN; 352 if (enable) 353 val |= PINEVIEW_SELF_REFRESH_EN; 354 else 355 val &= ~PINEVIEW_SELF_REFRESH_EN; 356 I915_WRITE(DSPFW3, val); 357 POSTING_READ(DSPFW3); 358 } else if (IS_I945G(dev_priv) || IS_I945GM(dev_priv)) { 359 was_enabled = I915_READ(FW_BLC_SELF) & FW_BLC_SELF_EN; 360 val = enable ? _MASKED_BIT_ENABLE(FW_BLC_SELF_EN) : 361 _MASKED_BIT_DISABLE(FW_BLC_SELF_EN); 362 I915_WRITE(FW_BLC_SELF, val); 363 POSTING_READ(FW_BLC_SELF); 364 } else if (IS_I915GM(dev_priv)) { 365 /* 366 * FIXME can't find a bit like this for 915G, and 367 * and yet it does have the related watermark in 368 * FW_BLC_SELF. What's going on? 369 */ 370 was_enabled = I915_READ(INSTPM) & INSTPM_SELF_EN; 371 val = enable ? _MASKED_BIT_ENABLE(INSTPM_SELF_EN) : 372 _MASKED_BIT_DISABLE(INSTPM_SELF_EN); 373 I915_WRITE(INSTPM, val); 374 POSTING_READ(INSTPM); 375 } else { 376 return false; 377 } 378 379 trace_intel_memory_cxsr(dev_priv, was_enabled, enable); 380 381 DRM_DEBUG_KMS("memory self-refresh is %s (was %s)\n", 382 enableddisabled(enable), 383 enableddisabled(was_enabled)); 384 385 return was_enabled; 386 } 387 388 bool intel_set_memory_cxsr(struct drm_i915_private *dev_priv, bool enable) 389 { 390 bool ret; 391 392 mutex_lock(&dev_priv->wm.wm_mutex); 393 ret = _intel_set_memory_cxsr(dev_priv, enable); 394 dev_priv->wm.vlv.cxsr = enable; 395 mutex_unlock(&dev_priv->wm.wm_mutex); 396 397 return ret; 398 } 399 400 /* 401 * Latency for FIFO fetches is dependent on several factors: 402 * - memory configuration (speed, channels) 403 * - chipset 404 * - current MCH state 405 * It can be fairly high in some situations, so here we assume a fairly 406 * pessimal value. It's a tradeoff between extra memory fetches (if we 407 * set this value too high, the FIFO will fetch frequently to stay full) 408 * and power consumption (set it too low to save power and we might see 409 * FIFO underruns and display "flicker"). 410 * 411 * A value of 5us seems to be a good balance; safe for very low end 412 * platforms but not overly aggressive on lower latency configs. 413 */ 414 static const int pessimal_latency_ns = 5000; 415 416 #define VLV_FIFO_START(dsparb, dsparb2, lo_shift, hi_shift) \ 417 ((((dsparb) >> (lo_shift)) & 0xff) | ((((dsparb2) >> (hi_shift)) & 0x1) << 8)) 418 419 static void vlv_get_fifo_size(struct intel_crtc_state *crtc_state) 420 { 421 struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); 422 struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); 423 struct vlv_fifo_state *fifo_state = &crtc_state->wm.vlv.fifo_state; 424 enum i915_pipe pipe = crtc->pipe; 425 int sprite0_start, sprite1_start; 426 427 switch (pipe) { 428 uint32_t dsparb, dsparb2, dsparb3; 429 case PIPE_A: 430 dsparb = I915_READ(DSPARB); 431 dsparb2 = I915_READ(DSPARB2); 432 sprite0_start = VLV_FIFO_START(dsparb, dsparb2, 0, 0); 433 sprite1_start = VLV_FIFO_START(dsparb, dsparb2, 8, 4); 434 break; 435 case PIPE_B: 436 dsparb = I915_READ(DSPARB); 437 dsparb2 = I915_READ(DSPARB2); 438 sprite0_start = VLV_FIFO_START(dsparb, dsparb2, 16, 8); 439 sprite1_start = VLV_FIFO_START(dsparb, dsparb2, 24, 12); 440 break; 441 case PIPE_C: 442 dsparb2 = I915_READ(DSPARB2); 443 dsparb3 = I915_READ(DSPARB3); 444 sprite0_start = VLV_FIFO_START(dsparb3, dsparb2, 0, 16); 445 sprite1_start = VLV_FIFO_START(dsparb3, dsparb2, 8, 20); 446 break; 447 default: 448 MISSING_CASE(pipe); 449 return; 450 } 451 452 fifo_state->plane[PLANE_PRIMARY] = sprite0_start; 453 fifo_state->plane[PLANE_SPRITE0] = sprite1_start - sprite0_start; 454 fifo_state->plane[PLANE_SPRITE1] = 511 - sprite1_start; 455 fifo_state->plane[PLANE_CURSOR] = 63; 456 457 DRM_DEBUG_KMS("Pipe %c FIFO size: %d/%d/%d/%d\n", 458 pipe_name(pipe), 459 fifo_state->plane[PLANE_PRIMARY], 460 fifo_state->plane[PLANE_SPRITE0], 461 fifo_state->plane[PLANE_SPRITE1], 462 fifo_state->plane[PLANE_CURSOR]); 463 } 464 465 static int i9xx_get_fifo_size(struct drm_i915_private *dev_priv, int plane) 466 { 467 uint32_t dsparb = I915_READ(DSPARB); 468 int size; 469 470 size = dsparb & 0x7f; 471 if (plane) 472 size = ((dsparb >> DSPARB_CSTART_SHIFT) & 0x7f) - size; 473 474 DRM_DEBUG_KMS("FIFO size - (0x%08x) %s: %d\n", dsparb, 475 plane ? "B" : "A", size); 476 477 return size; 478 } 479 480 static int i830_get_fifo_size(struct drm_i915_private *dev_priv, int plane) 481 { 482 uint32_t dsparb = I915_READ(DSPARB); 483 int size; 484 485 size = dsparb & 0x1ff; 486 if (plane) 487 size = ((dsparb >> DSPARB_BEND_SHIFT) & 0x1ff) - size; 488 size >>= 1; /* Convert to cachelines */ 489 490 DRM_DEBUG_KMS("FIFO size - (0x%08x) %s: %d\n", dsparb, 491 plane ? "B" : "A", size); 492 493 return size; 494 } 495 496 static int i845_get_fifo_size(struct drm_i915_private *dev_priv, int plane) 497 { 498 uint32_t dsparb = I915_READ(DSPARB); 499 int size; 500 501 size = dsparb & 0x7f; 502 size >>= 2; /* Convert to cachelines */ 503 504 DRM_DEBUG_KMS("FIFO size - (0x%08x) %s: %d\n", dsparb, 505 plane ? "B" : "A", 506 size); 507 508 return size; 509 } 510 511 /* Pineview has different values for various configs */ 512 static const struct intel_watermark_params pineview_display_wm = { 513 .fifo_size = PINEVIEW_DISPLAY_FIFO, 514 .max_wm = PINEVIEW_MAX_WM, 515 .default_wm = PINEVIEW_DFT_WM, 516 .guard_size = PINEVIEW_GUARD_WM, 517 .cacheline_size = PINEVIEW_FIFO_LINE_SIZE, 518 }; 519 static const struct intel_watermark_params pineview_display_hplloff_wm = { 520 .fifo_size = PINEVIEW_DISPLAY_FIFO, 521 .max_wm = PINEVIEW_MAX_WM, 522 .default_wm = PINEVIEW_DFT_HPLLOFF_WM, 523 .guard_size = PINEVIEW_GUARD_WM, 524 .cacheline_size = PINEVIEW_FIFO_LINE_SIZE, 525 }; 526 static const struct intel_watermark_params pineview_cursor_wm = { 527 .fifo_size = PINEVIEW_CURSOR_FIFO, 528 .max_wm = PINEVIEW_CURSOR_MAX_WM, 529 .default_wm = PINEVIEW_CURSOR_DFT_WM, 530 .guard_size = PINEVIEW_CURSOR_GUARD_WM, 531 .cacheline_size = PINEVIEW_FIFO_LINE_SIZE, 532 }; 533 static const struct intel_watermark_params pineview_cursor_hplloff_wm = { 534 .fifo_size = PINEVIEW_CURSOR_FIFO, 535 .max_wm = PINEVIEW_CURSOR_MAX_WM, 536 .default_wm = PINEVIEW_CURSOR_DFT_WM, 537 .guard_size = PINEVIEW_CURSOR_GUARD_WM, 538 .cacheline_size = PINEVIEW_FIFO_LINE_SIZE, 539 }; 540 static const struct intel_watermark_params g4x_wm_info = { 541 .fifo_size = G4X_FIFO_SIZE, 542 .max_wm = G4X_MAX_WM, 543 .default_wm = G4X_MAX_WM, 544 .guard_size = 2, 545 .cacheline_size = G4X_FIFO_LINE_SIZE, 546 }; 547 static const struct intel_watermark_params g4x_cursor_wm_info = { 548 .fifo_size = I965_CURSOR_FIFO, 549 .max_wm = I965_CURSOR_MAX_WM, 550 .default_wm = I965_CURSOR_DFT_WM, 551 .guard_size = 2, 552 .cacheline_size = G4X_FIFO_LINE_SIZE, 553 }; 554 static const struct intel_watermark_params i965_cursor_wm_info = { 555 .fifo_size = I965_CURSOR_FIFO, 556 .max_wm = I965_CURSOR_MAX_WM, 557 .default_wm = I965_CURSOR_DFT_WM, 558 .guard_size = 2, 559 .cacheline_size = I915_FIFO_LINE_SIZE, 560 }; 561 static const struct intel_watermark_params i945_wm_info = { 562 .fifo_size = I945_FIFO_SIZE, 563 .max_wm = I915_MAX_WM, 564 .default_wm = 1, 565 .guard_size = 2, 566 .cacheline_size = I915_FIFO_LINE_SIZE, 567 }; 568 static const struct intel_watermark_params i915_wm_info = { 569 .fifo_size = I915_FIFO_SIZE, 570 .max_wm = I915_MAX_WM, 571 .default_wm = 1, 572 .guard_size = 2, 573 .cacheline_size = I915_FIFO_LINE_SIZE, 574 }; 575 static const struct intel_watermark_params i830_a_wm_info = { 576 .fifo_size = I855GM_FIFO_SIZE, 577 .max_wm = I915_MAX_WM, 578 .default_wm = 1, 579 .guard_size = 2, 580 .cacheline_size = I830_FIFO_LINE_SIZE, 581 }; 582 static const struct intel_watermark_params i830_bc_wm_info = { 583 .fifo_size = I855GM_FIFO_SIZE, 584 .max_wm = I915_MAX_WM/2, 585 .default_wm = 1, 586 .guard_size = 2, 587 .cacheline_size = I830_FIFO_LINE_SIZE, 588 }; 589 static const struct intel_watermark_params i845_wm_info = { 590 .fifo_size = I830_FIFO_SIZE, 591 .max_wm = I915_MAX_WM, 592 .default_wm = 1, 593 .guard_size = 2, 594 .cacheline_size = I830_FIFO_LINE_SIZE, 595 }; 596 597 /** 598 * intel_calculate_wm - calculate watermark level 599 * @clock_in_khz: pixel clock 600 * @wm: chip FIFO params 601 * @cpp: bytes per pixel 602 * @latency_ns: memory latency for the platform 603 * 604 * Calculate the watermark level (the level at which the display plane will 605 * start fetching from memory again). Each chip has a different display 606 * FIFO size and allocation, so the caller needs to figure that out and pass 607 * in the correct intel_watermark_params structure. 608 * 609 * As the pixel clock runs, the FIFO will be drained at a rate that depends 610 * on the pixel size. When it reaches the watermark level, it'll start 611 * fetching FIFO line sized based chunks from memory until the FIFO fills 612 * past the watermark point. If the FIFO drains completely, a FIFO underrun 613 * will occur, and a display engine hang could result. 614 */ 615 static unsigned long intel_calculate_wm(unsigned long clock_in_khz, 616 const struct intel_watermark_params *wm, 617 int fifo_size, int cpp, 618 unsigned long latency_ns) 619 { 620 long entries_required, wm_size; 621 622 /* 623 * Note: we need to make sure we don't overflow for various clock & 624 * latency values. 625 * clocks go from a few thousand to several hundred thousand. 626 * latency is usually a few thousand 627 */ 628 entries_required = ((clock_in_khz / 1000) * cpp * latency_ns) / 629 1000; 630 entries_required = DIV_ROUND_UP(entries_required, wm->cacheline_size); 631 632 DRM_DEBUG_KMS("FIFO entries required for mode: %ld\n", entries_required); 633 634 wm_size = fifo_size - (entries_required + wm->guard_size); 635 636 DRM_DEBUG_KMS("FIFO watermark level: %ld\n", wm_size); 637 638 /* Don't promote wm_size to unsigned... */ 639 if (wm_size > (long)wm->max_wm) 640 wm_size = wm->max_wm; 641 if (wm_size <= 0) 642 wm_size = wm->default_wm; 643 644 /* 645 * Bspec seems to indicate that the value shouldn't be lower than 646 * 'burst size + 1'. Certainly 830 is quite unhappy with low values. 647 * Lets go for 8 which is the burst size since certain platforms 648 * already use a hardcoded 8 (which is what the spec says should be 649 * done). 650 */ 651 if (wm_size <= 8) 652 wm_size = 8; 653 654 return wm_size; 655 } 656 657 static bool intel_wm_plane_visible(const struct intel_crtc_state *crtc_state, 658 const struct intel_plane_state *plane_state) 659 { 660 struct intel_plane *plane = to_intel_plane(plane_state->base.plane); 661 662 /* FIXME check the 'enable' instead */ 663 if (!crtc_state->base.active) 664 return false; 665 666 /* 667 * Treat cursor with fb as always visible since cursor updates 668 * can happen faster than the vrefresh rate, and the current 669 * watermark code doesn't handle that correctly. Cursor updates 670 * which set/clear the fb or change the cursor size are going 671 * to get throttled by intel_legacy_cursor_update() to work 672 * around this problem with the watermark code. 673 */ 674 if (plane->id == PLANE_CURSOR) 675 return plane_state->base.fb != NULL; 676 else 677 return plane_state->base.visible; 678 } 679 680 static struct intel_crtc *single_enabled_crtc(struct drm_i915_private *dev_priv) 681 { 682 struct intel_crtc *crtc, *enabled = NULL; 683 684 for_each_intel_crtc(&dev_priv->drm, crtc) { 685 if (intel_crtc_active(crtc)) { 686 if (enabled) 687 return NULL; 688 enabled = crtc; 689 } 690 } 691 692 return enabled; 693 } 694 695 static void pineview_update_wm(struct intel_crtc *unused_crtc) 696 { 697 struct drm_i915_private *dev_priv = to_i915(unused_crtc->base.dev); 698 struct intel_crtc *crtc; 699 const struct cxsr_latency *latency; 700 u32 reg; 701 unsigned long wm; 702 703 latency = intel_get_cxsr_latency(IS_PINEVIEW_G(dev_priv), 704 dev_priv->is_ddr3, 705 dev_priv->fsb_freq, 706 dev_priv->mem_freq); 707 if (!latency) { 708 DRM_DEBUG_KMS("Unknown FSB/MEM found, disable CxSR\n"); 709 intel_set_memory_cxsr(dev_priv, false); 710 return; 711 } 712 713 crtc = single_enabled_crtc(dev_priv); 714 if (crtc) { 715 const struct drm_display_mode *adjusted_mode = 716 &crtc->config->base.adjusted_mode; 717 const struct drm_framebuffer *fb = 718 crtc->base.primary->state->fb; 719 int cpp = fb->format->cpp[0]; 720 int clock = adjusted_mode->crtc_clock; 721 722 /* Display SR */ 723 wm = intel_calculate_wm(clock, &pineview_display_wm, 724 pineview_display_wm.fifo_size, 725 cpp, latency->display_sr); 726 reg = I915_READ(DSPFW1); 727 reg &= ~DSPFW_SR_MASK; 728 reg |= FW_WM(wm, SR); 729 I915_WRITE(DSPFW1, reg); 730 DRM_DEBUG_KMS("DSPFW1 register is %x\n", reg); 731 732 /* cursor SR */ 733 wm = intel_calculate_wm(clock, &pineview_cursor_wm, 734 pineview_display_wm.fifo_size, 735 cpp, latency->cursor_sr); 736 reg = I915_READ(DSPFW3); 737 reg &= ~DSPFW_CURSOR_SR_MASK; 738 reg |= FW_WM(wm, CURSOR_SR); 739 I915_WRITE(DSPFW3, reg); 740 741 /* Display HPLL off SR */ 742 wm = intel_calculate_wm(clock, &pineview_display_hplloff_wm, 743 pineview_display_hplloff_wm.fifo_size, 744 cpp, latency->display_hpll_disable); 745 reg = I915_READ(DSPFW3); 746 reg &= ~DSPFW_HPLL_SR_MASK; 747 reg |= FW_WM(wm, HPLL_SR); 748 I915_WRITE(DSPFW3, reg); 749 750 /* cursor HPLL off SR */ 751 wm = intel_calculate_wm(clock, &pineview_cursor_hplloff_wm, 752 pineview_display_hplloff_wm.fifo_size, 753 cpp, latency->cursor_hpll_disable); 754 reg = I915_READ(DSPFW3); 755 reg &= ~DSPFW_HPLL_CURSOR_MASK; 756 reg |= FW_WM(wm, HPLL_CURSOR); 757 I915_WRITE(DSPFW3, reg); 758 DRM_DEBUG_KMS("DSPFW3 register is %x\n", reg); 759 760 intel_set_memory_cxsr(dev_priv, true); 761 } else { 762 intel_set_memory_cxsr(dev_priv, false); 763 } 764 } 765 766 static bool g4x_compute_wm0(struct drm_i915_private *dev_priv, 767 int plane, 768 const struct intel_watermark_params *display, 769 int display_latency_ns, 770 const struct intel_watermark_params *cursor, 771 int cursor_latency_ns, 772 int *plane_wm, 773 int *cursor_wm) 774 { 775 struct intel_crtc *crtc; 776 const struct drm_display_mode *adjusted_mode; 777 const struct drm_framebuffer *fb; 778 int htotal, hdisplay, clock, cpp; 779 int line_time_us, line_count; 780 int entries, tlb_miss; 781 782 crtc = intel_get_crtc_for_plane(dev_priv, plane); 783 if (!intel_crtc_active(crtc)) { 784 *cursor_wm = cursor->guard_size; 785 *plane_wm = display->guard_size; 786 return false; 787 } 788 789 adjusted_mode = &crtc->config->base.adjusted_mode; 790 fb = crtc->base.primary->state->fb; 791 clock = adjusted_mode->crtc_clock; 792 htotal = adjusted_mode->crtc_htotal; 793 hdisplay = crtc->config->pipe_src_w; 794 cpp = fb->format->cpp[0]; 795 796 /* Use the small buffer method to calculate plane watermark */ 797 entries = ((clock * cpp / 1000) * display_latency_ns) / 1000; 798 tlb_miss = display->fifo_size*display->cacheline_size - hdisplay * 8; 799 if (tlb_miss > 0) 800 entries += tlb_miss; 801 entries = DIV_ROUND_UP(entries, display->cacheline_size); 802 *plane_wm = entries + display->guard_size; 803 if (*plane_wm > (int)display->max_wm) 804 *plane_wm = display->max_wm; 805 806 /* Use the large buffer method to calculate cursor watermark */ 807 line_time_us = max(htotal * 1000 / clock, 1); 808 line_count = (cursor_latency_ns / line_time_us + 1000) / 1000; 809 entries = line_count * crtc->base.cursor->state->crtc_w * cpp; 810 tlb_miss = cursor->fifo_size*cursor->cacheline_size - hdisplay * 8; 811 if (tlb_miss > 0) 812 entries += tlb_miss; 813 entries = DIV_ROUND_UP(entries, cursor->cacheline_size); 814 *cursor_wm = entries + cursor->guard_size; 815 if (*cursor_wm > (int)cursor->max_wm) 816 *cursor_wm = (int)cursor->max_wm; 817 818 return true; 819 } 820 821 /* 822 * Check the wm result. 823 * 824 * If any calculated watermark values is larger than the maximum value that 825 * can be programmed into the associated watermark register, that watermark 826 * must be disabled. 827 */ 828 static bool g4x_check_srwm(struct drm_i915_private *dev_priv, 829 int display_wm, int cursor_wm, 830 const struct intel_watermark_params *display, 831 const struct intel_watermark_params *cursor) 832 { 833 DRM_DEBUG_KMS("SR watermark: display plane %d, cursor %d\n", 834 display_wm, cursor_wm); 835 836 if (display_wm > display->max_wm) { 837 DRM_DEBUG_KMS("display watermark is too large(%d/%u), disabling\n", 838 display_wm, display->max_wm); 839 return false; 840 } 841 842 if (cursor_wm > cursor->max_wm) { 843 DRM_DEBUG_KMS("cursor watermark is too large(%d/%u), disabling\n", 844 cursor_wm, cursor->max_wm); 845 return false; 846 } 847 848 if (!(display_wm || cursor_wm)) { 849 DRM_DEBUG_KMS("SR latency is 0, disabling\n"); 850 return false; 851 } 852 853 return true; 854 } 855 856 static bool g4x_compute_srwm(struct drm_i915_private *dev_priv, 857 int plane, 858 int latency_ns, 859 const struct intel_watermark_params *display, 860 const struct intel_watermark_params *cursor, 861 int *display_wm, int *cursor_wm) 862 { 863 struct intel_crtc *crtc; 864 const struct drm_display_mode *adjusted_mode; 865 const struct drm_framebuffer *fb; 866 int hdisplay, htotal, cpp, clock; 867 unsigned long line_time_us; 868 int line_count, line_size; 869 int small, large; 870 int entries; 871 872 if (!latency_ns) { 873 *display_wm = *cursor_wm = 0; 874 return false; 875 } 876 877 crtc = intel_get_crtc_for_plane(dev_priv, plane); 878 adjusted_mode = &crtc->config->base.adjusted_mode; 879 fb = crtc->base.primary->state->fb; 880 clock = adjusted_mode->crtc_clock; 881 htotal = adjusted_mode->crtc_htotal; 882 hdisplay = crtc->config->pipe_src_w; 883 cpp = fb->format->cpp[0]; 884 885 line_time_us = max(htotal * 1000 / clock, 1); 886 line_count = (latency_ns / line_time_us + 1000) / 1000; 887 line_size = hdisplay * cpp; 888 889 /* Use the minimum of the small and large buffer method for primary */ 890 small = ((clock * cpp / 1000) * latency_ns) / 1000; 891 large = line_count * line_size; 892 893 entries = DIV_ROUND_UP(min(small, large), display->cacheline_size); 894 *display_wm = entries + display->guard_size; 895 896 /* calculate the self-refresh watermark for display cursor */ 897 entries = line_count * cpp * crtc->base.cursor->state->crtc_w; 898 entries = DIV_ROUND_UP(entries, cursor->cacheline_size); 899 *cursor_wm = entries + cursor->guard_size; 900 901 return g4x_check_srwm(dev_priv, 902 *display_wm, *cursor_wm, 903 display, cursor); 904 } 905 906 #define FW_WM_VLV(value, plane) \ 907 (((value) << DSPFW_ ## plane ## _SHIFT) & DSPFW_ ## plane ## _MASK_VLV) 908 909 static void vlv_write_wm_values(struct drm_i915_private *dev_priv, 910 const struct vlv_wm_values *wm) 911 { 912 enum i915_pipe pipe; 913 914 for_each_pipe(dev_priv, pipe) { 915 trace_vlv_wm(intel_get_crtc_for_pipe(dev_priv, pipe), wm); 916 917 I915_WRITE(VLV_DDL(pipe), 918 (wm->ddl[pipe].plane[PLANE_CURSOR] << DDL_CURSOR_SHIFT) | 919 (wm->ddl[pipe].plane[PLANE_SPRITE1] << DDL_SPRITE_SHIFT(1)) | 920 (wm->ddl[pipe].plane[PLANE_SPRITE0] << DDL_SPRITE_SHIFT(0)) | 921 (wm->ddl[pipe].plane[PLANE_PRIMARY] << DDL_PLANE_SHIFT)); 922 } 923 924 /* 925 * Zero the (unused) WM1 watermarks, and also clear all the 926 * high order bits so that there are no out of bounds values 927 * present in the registers during the reprogramming. 928 */ 929 I915_WRITE(DSPHOWM, 0); 930 I915_WRITE(DSPHOWM1, 0); 931 I915_WRITE(DSPFW4, 0); 932 I915_WRITE(DSPFW5, 0); 933 I915_WRITE(DSPFW6, 0); 934 935 I915_WRITE(DSPFW1, 936 FW_WM(wm->sr.plane, SR) | 937 FW_WM(wm->pipe[PIPE_B].plane[PLANE_CURSOR], CURSORB) | 938 FW_WM_VLV(wm->pipe[PIPE_B].plane[PLANE_PRIMARY], PLANEB) | 939 FW_WM_VLV(wm->pipe[PIPE_A].plane[PLANE_PRIMARY], PLANEA)); 940 I915_WRITE(DSPFW2, 941 FW_WM_VLV(wm->pipe[PIPE_A].plane[PLANE_SPRITE1], SPRITEB) | 942 FW_WM(wm->pipe[PIPE_A].plane[PLANE_CURSOR], CURSORA) | 943 FW_WM_VLV(wm->pipe[PIPE_A].plane[PLANE_SPRITE0], SPRITEA)); 944 I915_WRITE(DSPFW3, 945 FW_WM(wm->sr.cursor, CURSOR_SR)); 946 947 if (IS_CHERRYVIEW(dev_priv)) { 948 I915_WRITE(DSPFW7_CHV, 949 FW_WM_VLV(wm->pipe[PIPE_B].plane[PLANE_SPRITE1], SPRITED) | 950 FW_WM_VLV(wm->pipe[PIPE_B].plane[PLANE_SPRITE0], SPRITEC)); 951 I915_WRITE(DSPFW8_CHV, 952 FW_WM_VLV(wm->pipe[PIPE_C].plane[PLANE_SPRITE1], SPRITEF) | 953 FW_WM_VLV(wm->pipe[PIPE_C].plane[PLANE_SPRITE0], SPRITEE)); 954 I915_WRITE(DSPFW9_CHV, 955 FW_WM_VLV(wm->pipe[PIPE_C].plane[PLANE_PRIMARY], PLANEC) | 956 FW_WM(wm->pipe[PIPE_C].plane[PLANE_CURSOR], CURSORC)); 957 I915_WRITE(DSPHOWM, 958 FW_WM(wm->sr.plane >> 9, SR_HI) | 959 FW_WM(wm->pipe[PIPE_C].plane[PLANE_SPRITE1] >> 8, SPRITEF_HI) | 960 FW_WM(wm->pipe[PIPE_C].plane[PLANE_SPRITE0] >> 8, SPRITEE_HI) | 961 FW_WM(wm->pipe[PIPE_C].plane[PLANE_PRIMARY] >> 8, PLANEC_HI) | 962 FW_WM(wm->pipe[PIPE_B].plane[PLANE_SPRITE1] >> 8, SPRITED_HI) | 963 FW_WM(wm->pipe[PIPE_B].plane[PLANE_SPRITE0] >> 8, SPRITEC_HI) | 964 FW_WM(wm->pipe[PIPE_B].plane[PLANE_PRIMARY] >> 8, PLANEB_HI) | 965 FW_WM(wm->pipe[PIPE_A].plane[PLANE_SPRITE1] >> 8, SPRITEB_HI) | 966 FW_WM(wm->pipe[PIPE_A].plane[PLANE_SPRITE0] >> 8, SPRITEA_HI) | 967 FW_WM(wm->pipe[PIPE_A].plane[PLANE_PRIMARY] >> 8, PLANEA_HI)); 968 } else { 969 I915_WRITE(DSPFW7, 970 FW_WM_VLV(wm->pipe[PIPE_B].plane[PLANE_SPRITE1], SPRITED) | 971 FW_WM_VLV(wm->pipe[PIPE_B].plane[PLANE_SPRITE0], SPRITEC)); 972 I915_WRITE(DSPHOWM, 973 FW_WM(wm->sr.plane >> 9, SR_HI) | 974 FW_WM(wm->pipe[PIPE_B].plane[PLANE_SPRITE1] >> 8, SPRITED_HI) | 975 FW_WM(wm->pipe[PIPE_B].plane[PLANE_SPRITE0] >> 8, SPRITEC_HI) | 976 FW_WM(wm->pipe[PIPE_B].plane[PLANE_PRIMARY] >> 8, PLANEB_HI) | 977 FW_WM(wm->pipe[PIPE_A].plane[PLANE_SPRITE1] >> 8, SPRITEB_HI) | 978 FW_WM(wm->pipe[PIPE_A].plane[PLANE_SPRITE0] >> 8, SPRITEA_HI) | 979 FW_WM(wm->pipe[PIPE_A].plane[PLANE_PRIMARY] >> 8, PLANEA_HI)); 980 } 981 982 POSTING_READ(DSPFW1); 983 } 984 985 #undef FW_WM_VLV 986 987 /* latency must be in 0.1us units. */ 988 static unsigned int vlv_wm_method2(unsigned int pixel_rate, 989 unsigned int pipe_htotal, 990 unsigned int horiz_pixels, 991 unsigned int cpp, 992 unsigned int latency) 993 { 994 unsigned int ret; 995 996 ret = (latency * pixel_rate) / (pipe_htotal * 10000); 997 ret = (ret + 1) * horiz_pixels * cpp; 998 ret = DIV_ROUND_UP(ret, 64); 999 1000 return ret; 1001 } 1002 1003 static void vlv_setup_wm_latency(struct drm_i915_private *dev_priv) 1004 { 1005 /* all latencies in usec */ 1006 dev_priv->wm.pri_latency[VLV_WM_LEVEL_PM2] = 3; 1007 1008 dev_priv->wm.max_level = VLV_WM_LEVEL_PM2; 1009 1010 if (IS_CHERRYVIEW(dev_priv)) { 1011 dev_priv->wm.pri_latency[VLV_WM_LEVEL_PM5] = 12; 1012 dev_priv->wm.pri_latency[VLV_WM_LEVEL_DDR_DVFS] = 33; 1013 1014 dev_priv->wm.max_level = VLV_WM_LEVEL_DDR_DVFS; 1015 } 1016 } 1017 1018 static uint16_t vlv_compute_wm_level(const struct intel_crtc_state *crtc_state, 1019 const struct intel_plane_state *plane_state, 1020 int level) 1021 { 1022 struct intel_plane *plane = to_intel_plane(plane_state->base.plane); 1023 struct drm_i915_private *dev_priv = to_i915(plane->base.dev); 1024 const struct drm_display_mode *adjusted_mode = 1025 &crtc_state->base.adjusted_mode; 1026 int clock, htotal, cpp, width, wm; 1027 1028 if (dev_priv->wm.pri_latency[level] == 0) 1029 return USHRT_MAX; 1030 1031 if (!plane_state->base.visible) 1032 return 0; 1033 1034 cpp = plane_state->base.fb->format->cpp[0]; 1035 clock = adjusted_mode->crtc_clock; 1036 htotal = adjusted_mode->crtc_htotal; 1037 width = crtc_state->pipe_src_w; 1038 if (WARN_ON(htotal == 0)) 1039 htotal = 1; 1040 1041 if (plane->base.type == DRM_PLANE_TYPE_CURSOR) { 1042 /* 1043 * FIXME the formula gives values that are 1044 * too big for the cursor FIFO, and hence we 1045 * would never be able to use cursors. For 1046 * now just hardcode the watermark. 1047 */ 1048 wm = 63; 1049 } else { 1050 wm = vlv_wm_method2(clock, htotal, width, cpp, 1051 dev_priv->wm.pri_latency[level] * 10); 1052 } 1053 1054 return min_t(int, wm, USHRT_MAX); 1055 } 1056 1057 static bool vlv_need_sprite0_fifo_workaround(unsigned int active_planes) 1058 { 1059 return (active_planes & (BIT(PLANE_SPRITE0) | 1060 BIT(PLANE_SPRITE1))) == BIT(PLANE_SPRITE1); 1061 } 1062 1063 static int vlv_compute_fifo(struct intel_crtc_state *crtc_state) 1064 { 1065 struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); 1066 const struct vlv_pipe_wm *raw = 1067 &crtc_state->wm.vlv.raw[VLV_WM_LEVEL_PM2]; 1068 struct vlv_fifo_state *fifo_state = &crtc_state->wm.vlv.fifo_state; 1069 unsigned int active_planes = crtc_state->active_planes & ~BIT(PLANE_CURSOR); 1070 int num_active_planes = hweight32(active_planes); 1071 const int fifo_size = 511; 1072 int fifo_extra, fifo_left = fifo_size; 1073 int sprite0_fifo_extra = 0; 1074 unsigned int total_rate; 1075 enum plane_id plane_id; 1076 1077 /* 1078 * When enabling sprite0 after sprite1 has already been enabled 1079 * we tend to get an underrun unless sprite0 already has some 1080 * FIFO space allcoated. Hence we always allocate at least one 1081 * cacheline for sprite0 whenever sprite1 is enabled. 1082 * 1083 * All other plane enable sequences appear immune to this problem. 1084 */ 1085 if (vlv_need_sprite0_fifo_workaround(active_planes)) 1086 sprite0_fifo_extra = 1; 1087 1088 total_rate = raw->plane[PLANE_PRIMARY] + 1089 raw->plane[PLANE_SPRITE0] + 1090 raw->plane[PLANE_SPRITE1] + 1091 sprite0_fifo_extra; 1092 1093 if (total_rate > fifo_size) 1094 return -EINVAL; 1095 1096 if (total_rate == 0) 1097 total_rate = 1; 1098 1099 for_each_plane_id_on_crtc(crtc, plane_id) { 1100 unsigned int rate; 1101 1102 if ((active_planes & BIT(plane_id)) == 0) { 1103 fifo_state->plane[plane_id] = 0; 1104 continue; 1105 } 1106 1107 rate = raw->plane[plane_id]; 1108 fifo_state->plane[plane_id] = fifo_size * rate / total_rate; 1109 fifo_left -= fifo_state->plane[plane_id]; 1110 } 1111 1112 fifo_state->plane[PLANE_SPRITE0] += sprite0_fifo_extra; 1113 fifo_left -= sprite0_fifo_extra; 1114 1115 fifo_state->plane[PLANE_CURSOR] = 63; 1116 1117 fifo_extra = DIV_ROUND_UP(fifo_left, num_active_planes ?: 1); 1118 1119 /* spread the remainder evenly */ 1120 for_each_plane_id_on_crtc(crtc, plane_id) { 1121 int plane_extra; 1122 1123 if (fifo_left == 0) 1124 break; 1125 1126 if ((active_planes & BIT(plane_id)) == 0) 1127 continue; 1128 1129 plane_extra = min(fifo_extra, fifo_left); 1130 fifo_state->plane[plane_id] += plane_extra; 1131 fifo_left -= plane_extra; 1132 } 1133 1134 WARN_ON(active_planes != 0 && fifo_left != 0); 1135 1136 /* give it all to the first plane if none are active */ 1137 if (active_planes == 0) { 1138 WARN_ON(fifo_left != fifo_size); 1139 fifo_state->plane[PLANE_PRIMARY] = fifo_left; 1140 } 1141 1142 return 0; 1143 } 1144 1145 static int vlv_num_wm_levels(struct drm_i915_private *dev_priv) 1146 { 1147 return dev_priv->wm.max_level + 1; 1148 } 1149 1150 /* mark all levels starting from 'level' as invalid */ 1151 static void vlv_invalidate_wms(struct intel_crtc *crtc, 1152 struct vlv_wm_state *wm_state, int level) 1153 { 1154 struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); 1155 1156 for (; level < vlv_num_wm_levels(dev_priv); level++) { 1157 enum plane_id plane_id; 1158 1159 for_each_plane_id_on_crtc(crtc, plane_id) 1160 wm_state->wm[level].plane[plane_id] = USHRT_MAX; 1161 1162 wm_state->sr[level].cursor = USHRT_MAX; 1163 wm_state->sr[level].plane = USHRT_MAX; 1164 } 1165 } 1166 1167 static u16 vlv_invert_wm_value(u16 wm, u16 fifo_size) 1168 { 1169 if (wm > fifo_size) 1170 return USHRT_MAX; 1171 else 1172 return fifo_size - wm; 1173 } 1174 1175 /* 1176 * Starting from 'level' set all higher 1177 * levels to 'value' in the "raw" watermarks. 1178 */ 1179 static bool vlv_raw_plane_wm_set(struct intel_crtc_state *crtc_state, 1180 int level, enum plane_id plane_id, u16 value) 1181 { 1182 struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); 1183 int num_levels = vlv_num_wm_levels(dev_priv); 1184 bool dirty = false; 1185 1186 for (; level < num_levels; level++) { 1187 struct vlv_pipe_wm *raw = &crtc_state->wm.vlv.raw[level]; 1188 1189 dirty |= raw->plane[plane_id] != value; 1190 raw->plane[plane_id] = value; 1191 } 1192 1193 return dirty; 1194 } 1195 1196 static bool vlv_plane_wm_compute(struct intel_crtc_state *crtc_state, 1197 const struct intel_plane_state *plane_state) 1198 { 1199 struct intel_plane *plane = to_intel_plane(plane_state->base.plane); 1200 enum plane_id plane_id = plane->id; 1201 int num_levels = vlv_num_wm_levels(to_i915(plane->base.dev)); 1202 int level; 1203 bool dirty = false; 1204 1205 if (!plane_state->base.visible) { 1206 dirty |= vlv_raw_plane_wm_set(crtc_state, 0, plane_id, 0); 1207 goto out; 1208 } 1209 1210 for (level = 0; level < num_levels; level++) { 1211 struct vlv_pipe_wm *raw = &crtc_state->wm.vlv.raw[level]; 1212 int wm = vlv_compute_wm_level(crtc_state, plane_state, level); 1213 int max_wm = plane_id == PLANE_CURSOR ? 63 : 511; 1214 1215 if (wm > max_wm) 1216 break; 1217 1218 dirty |= raw->plane[plane_id] != wm; 1219 raw->plane[plane_id] = wm; 1220 } 1221 1222 /* mark all higher levels as invalid */ 1223 dirty |= vlv_raw_plane_wm_set(crtc_state, level, plane_id, USHRT_MAX); 1224 1225 out: 1226 if (dirty) 1227 DRM_DEBUG_KMS("%s wms: [0]=%d,[1]=%d,[2]=%d\n", 1228 plane->base.name, 1229 crtc_state->wm.vlv.raw[VLV_WM_LEVEL_PM2].plane[plane_id], 1230 crtc_state->wm.vlv.raw[VLV_WM_LEVEL_PM5].plane[plane_id], 1231 crtc_state->wm.vlv.raw[VLV_WM_LEVEL_DDR_DVFS].plane[plane_id]); 1232 1233 return dirty; 1234 } 1235 1236 static bool vlv_plane_wm_is_valid(const struct intel_crtc_state *crtc_state, 1237 enum plane_id plane_id, int level) 1238 { 1239 const struct vlv_pipe_wm *raw = 1240 &crtc_state->wm.vlv.raw[level]; 1241 const struct vlv_fifo_state *fifo_state = 1242 &crtc_state->wm.vlv.fifo_state; 1243 1244 return raw->plane[plane_id] <= fifo_state->plane[plane_id]; 1245 } 1246 1247 static bool vlv_crtc_wm_is_valid(const struct intel_crtc_state *crtc_state, int level) 1248 { 1249 return vlv_plane_wm_is_valid(crtc_state, PLANE_PRIMARY, level) && 1250 vlv_plane_wm_is_valid(crtc_state, PLANE_SPRITE0, level) && 1251 vlv_plane_wm_is_valid(crtc_state, PLANE_SPRITE1, level) && 1252 vlv_plane_wm_is_valid(crtc_state, PLANE_CURSOR, level); 1253 } 1254 1255 static int vlv_compute_pipe_wm(struct intel_crtc_state *crtc_state) 1256 { 1257 struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); 1258 struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); 1259 struct intel_atomic_state *state = 1260 to_intel_atomic_state(crtc_state->base.state); 1261 struct vlv_wm_state *wm_state = &crtc_state->wm.vlv.optimal; 1262 const struct vlv_fifo_state *fifo_state = 1263 &crtc_state->wm.vlv.fifo_state; 1264 int num_active_planes = hweight32(crtc_state->active_planes & 1265 ~BIT(PLANE_CURSOR)); 1266 bool needs_modeset = drm_atomic_crtc_needs_modeset(&crtc_state->base); 1267 struct intel_plane_state *plane_state; 1268 struct intel_plane *plane; 1269 enum plane_id plane_id; 1270 int level, ret, i; 1271 unsigned int dirty = 0; 1272 1273 for_each_intel_plane_in_state(state, plane, plane_state, i) { 1274 const struct intel_plane_state *old_plane_state = 1275 to_intel_plane_state(plane->base.state); 1276 1277 if (plane_state->base.crtc != &crtc->base && 1278 old_plane_state->base.crtc != &crtc->base) 1279 continue; 1280 1281 if (vlv_plane_wm_compute(crtc_state, plane_state)) 1282 dirty |= BIT(plane->id); 1283 } 1284 1285 /* 1286 * DSPARB registers may have been reset due to the 1287 * power well being turned off. Make sure we restore 1288 * them to a consistent state even if no primary/sprite 1289 * planes are initially active. 1290 */ 1291 if (needs_modeset) 1292 crtc_state->fifo_changed = true; 1293 1294 if (!dirty) 1295 return 0; 1296 1297 /* cursor changes don't warrant a FIFO recompute */ 1298 if (dirty & ~BIT(PLANE_CURSOR)) { 1299 const struct intel_crtc_state *old_crtc_state = 1300 to_intel_crtc_state(crtc->base.state); 1301 const struct vlv_fifo_state *old_fifo_state = 1302 &old_crtc_state->wm.vlv.fifo_state; 1303 1304 ret = vlv_compute_fifo(crtc_state); 1305 if (ret) 1306 return ret; 1307 1308 if (needs_modeset || 1309 memcmp(old_fifo_state, fifo_state, 1310 sizeof(*fifo_state)) != 0) 1311 crtc_state->fifo_changed = true; 1312 } 1313 1314 /* initially allow all levels */ 1315 wm_state->num_levels = vlv_num_wm_levels(dev_priv); 1316 /* 1317 * Note that enabling cxsr with no primary/sprite planes 1318 * enabled can wedge the pipe. Hence we only allow cxsr 1319 * with exactly one enabled primary/sprite plane. 1320 */ 1321 wm_state->cxsr = crtc->pipe != PIPE_C && num_active_planes == 1; 1322 1323 for (level = 0; level < wm_state->num_levels; level++) { 1324 const struct vlv_pipe_wm *raw = &crtc_state->wm.vlv.raw[level]; 1325 const int sr_fifo_size = INTEL_INFO(dev_priv)->num_pipes * 512 - 1; 1326 1327 if (!vlv_crtc_wm_is_valid(crtc_state, level)) 1328 break; 1329 1330 for_each_plane_id_on_crtc(crtc, plane_id) { 1331 wm_state->wm[level].plane[plane_id] = 1332 vlv_invert_wm_value(raw->plane[plane_id], 1333 fifo_state->plane[plane_id]); 1334 } 1335 1336 wm_state->sr[level].plane = 1337 vlv_invert_wm_value(max3(raw->plane[PLANE_PRIMARY], 1338 raw->plane[PLANE_SPRITE0], 1339 raw->plane[PLANE_SPRITE1]), 1340 sr_fifo_size); 1341 1342 wm_state->sr[level].cursor = 1343 vlv_invert_wm_value(raw->plane[PLANE_CURSOR], 1344 63); 1345 } 1346 1347 if (level == 0) 1348 return -EINVAL; 1349 1350 /* limit to only levels we can actually handle */ 1351 wm_state->num_levels = level; 1352 1353 /* invalidate the higher levels */ 1354 vlv_invalidate_wms(crtc, wm_state, level); 1355 1356 return 0; 1357 } 1358 1359 #define VLV_FIFO(plane, value) \ 1360 (((value) << DSPARB_ ## plane ## _SHIFT_VLV) & DSPARB_ ## plane ## _MASK_VLV) 1361 1362 static void vlv_atomic_update_fifo(struct intel_atomic_state *state, 1363 struct intel_crtc_state *crtc_state) 1364 { 1365 struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); 1366 struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); 1367 const struct vlv_fifo_state *fifo_state = 1368 &crtc_state->wm.vlv.fifo_state; 1369 int sprite0_start, sprite1_start, fifo_size; 1370 1371 if (!crtc_state->fifo_changed) 1372 return; 1373 1374 sprite0_start = fifo_state->plane[PLANE_PRIMARY]; 1375 sprite1_start = fifo_state->plane[PLANE_SPRITE0] + sprite0_start; 1376 fifo_size = fifo_state->plane[PLANE_SPRITE1] + sprite1_start; 1377 1378 WARN_ON(fifo_state->plane[PLANE_CURSOR] != 63); 1379 WARN_ON(fifo_size != 511); 1380 1381 trace_vlv_fifo_size(crtc, sprite0_start, sprite1_start, fifo_size); 1382 1383 /* 1384 * uncore.lock serves a double purpose here. It allows us to 1385 * use the less expensive I915_{READ,WRITE}_FW() functions, and 1386 * it protects the DSPARB registers from getting clobbered by 1387 * parallel updates from multiple pipes. 1388 * 1389 * intel_pipe_update_start() has already disabled interrupts 1390 * for us, so a plain spin_lock() is sufficient here. 1391 */ 1392 lockmgr(&dev_priv->uncore.lock, LK_EXCLUSIVE); 1393 1394 switch (crtc->pipe) { 1395 uint32_t dsparb, dsparb2, dsparb3; 1396 case PIPE_A: 1397 dsparb = I915_READ_FW(DSPARB); 1398 dsparb2 = I915_READ_FW(DSPARB2); 1399 1400 dsparb &= ~(VLV_FIFO(SPRITEA, 0xff) | 1401 VLV_FIFO(SPRITEB, 0xff)); 1402 dsparb |= (VLV_FIFO(SPRITEA, sprite0_start) | 1403 VLV_FIFO(SPRITEB, sprite1_start)); 1404 1405 dsparb2 &= ~(VLV_FIFO(SPRITEA_HI, 0x1) | 1406 VLV_FIFO(SPRITEB_HI, 0x1)); 1407 dsparb2 |= (VLV_FIFO(SPRITEA_HI, sprite0_start >> 8) | 1408 VLV_FIFO(SPRITEB_HI, sprite1_start >> 8)); 1409 1410 I915_WRITE_FW(DSPARB, dsparb); 1411 I915_WRITE_FW(DSPARB2, dsparb2); 1412 break; 1413 case PIPE_B: 1414 dsparb = I915_READ_FW(DSPARB); 1415 dsparb2 = I915_READ_FW(DSPARB2); 1416 1417 dsparb &= ~(VLV_FIFO(SPRITEC, 0xff) | 1418 VLV_FIFO(SPRITED, 0xff)); 1419 dsparb |= (VLV_FIFO(SPRITEC, sprite0_start) | 1420 VLV_FIFO(SPRITED, sprite1_start)); 1421 1422 dsparb2 &= ~(VLV_FIFO(SPRITEC_HI, 0xff) | 1423 VLV_FIFO(SPRITED_HI, 0xff)); 1424 dsparb2 |= (VLV_FIFO(SPRITEC_HI, sprite0_start >> 8) | 1425 VLV_FIFO(SPRITED_HI, sprite1_start >> 8)); 1426 1427 I915_WRITE_FW(DSPARB, dsparb); 1428 I915_WRITE_FW(DSPARB2, dsparb2); 1429 break; 1430 case PIPE_C: 1431 dsparb3 = I915_READ_FW(DSPARB3); 1432 dsparb2 = I915_READ_FW(DSPARB2); 1433 1434 dsparb3 &= ~(VLV_FIFO(SPRITEE, 0xff) | 1435 VLV_FIFO(SPRITEF, 0xff)); 1436 dsparb3 |= (VLV_FIFO(SPRITEE, sprite0_start) | 1437 VLV_FIFO(SPRITEF, sprite1_start)); 1438 1439 dsparb2 &= ~(VLV_FIFO(SPRITEE_HI, 0xff) | 1440 VLV_FIFO(SPRITEF_HI, 0xff)); 1441 dsparb2 |= (VLV_FIFO(SPRITEE_HI, sprite0_start >> 8) | 1442 VLV_FIFO(SPRITEF_HI, sprite1_start >> 8)); 1443 1444 I915_WRITE_FW(DSPARB3, dsparb3); 1445 I915_WRITE_FW(DSPARB2, dsparb2); 1446 break; 1447 default: 1448 break; 1449 } 1450 1451 POSTING_READ_FW(DSPARB); 1452 1453 lockmgr(&dev_priv->uncore.lock, LK_RELEASE); 1454 } 1455 1456 #undef VLV_FIFO 1457 1458 static int vlv_compute_intermediate_wm(struct drm_device *dev, 1459 struct intel_crtc *crtc, 1460 struct intel_crtc_state *crtc_state) 1461 { 1462 struct vlv_wm_state *intermediate = &crtc_state->wm.vlv.intermediate; 1463 const struct vlv_wm_state *optimal = &crtc_state->wm.vlv.optimal; 1464 const struct vlv_wm_state *active = &crtc->wm.active.vlv; 1465 int level; 1466 1467 intermediate->num_levels = min(optimal->num_levels, active->num_levels); 1468 intermediate->cxsr = optimal->cxsr && active->cxsr && 1469 !crtc_state->disable_cxsr; 1470 1471 for (level = 0; level < intermediate->num_levels; level++) { 1472 enum plane_id plane_id; 1473 1474 for_each_plane_id_on_crtc(crtc, plane_id) { 1475 intermediate->wm[level].plane[plane_id] = 1476 min(optimal->wm[level].plane[plane_id], 1477 active->wm[level].plane[plane_id]); 1478 } 1479 1480 intermediate->sr[level].plane = min(optimal->sr[level].plane, 1481 active->sr[level].plane); 1482 intermediate->sr[level].cursor = min(optimal->sr[level].cursor, 1483 active->sr[level].cursor); 1484 } 1485 1486 vlv_invalidate_wms(crtc, intermediate, level); 1487 1488 /* 1489 * If our intermediate WM are identical to the final WM, then we can 1490 * omit the post-vblank programming; only update if it's different. 1491 */ 1492 if (memcmp(intermediate, optimal, sizeof(*intermediate)) != 0) 1493 crtc_state->wm.need_postvbl_update = true; 1494 1495 return 0; 1496 } 1497 1498 static void vlv_merge_wm(struct drm_i915_private *dev_priv, 1499 struct vlv_wm_values *wm) 1500 { 1501 struct intel_crtc *crtc; 1502 int num_active_crtcs = 0; 1503 1504 wm->level = dev_priv->wm.max_level; 1505 wm->cxsr = true; 1506 1507 for_each_intel_crtc(&dev_priv->drm, crtc) { 1508 const struct vlv_wm_state *wm_state = &crtc->wm.active.vlv; 1509 1510 if (!crtc->active) 1511 continue; 1512 1513 if (!wm_state->cxsr) 1514 wm->cxsr = false; 1515 1516 num_active_crtcs++; 1517 wm->level = min_t(int, wm->level, wm_state->num_levels - 1); 1518 } 1519 1520 if (num_active_crtcs != 1) 1521 wm->cxsr = false; 1522 1523 if (num_active_crtcs > 1) 1524 wm->level = VLV_WM_LEVEL_PM2; 1525 1526 for_each_intel_crtc(&dev_priv->drm, crtc) { 1527 const struct vlv_wm_state *wm_state = &crtc->wm.active.vlv; 1528 enum i915_pipe pipe = crtc->pipe; 1529 1530 wm->pipe[pipe] = wm_state->wm[wm->level]; 1531 if (crtc->active && wm->cxsr) 1532 wm->sr = wm_state->sr[wm->level]; 1533 1534 wm->ddl[pipe].plane[PLANE_PRIMARY] = DDL_PRECISION_HIGH | 2; 1535 wm->ddl[pipe].plane[PLANE_SPRITE0] = DDL_PRECISION_HIGH | 2; 1536 wm->ddl[pipe].plane[PLANE_SPRITE1] = DDL_PRECISION_HIGH | 2; 1537 wm->ddl[pipe].plane[PLANE_CURSOR] = DDL_PRECISION_HIGH | 2; 1538 } 1539 } 1540 1541 static bool is_disabling(int old, int new, int threshold) 1542 { 1543 return old >= threshold && new < threshold; 1544 } 1545 1546 static bool is_enabling(int old, int new, int threshold) 1547 { 1548 return old < threshold && new >= threshold; 1549 } 1550 1551 static void vlv_program_watermarks(struct drm_i915_private *dev_priv) 1552 { 1553 struct vlv_wm_values *old_wm = &dev_priv->wm.vlv; 1554 struct vlv_wm_values new_wm = {}; 1555 1556 vlv_merge_wm(dev_priv, &new_wm); 1557 1558 if (memcmp(old_wm, &new_wm, sizeof(new_wm)) == 0) 1559 return; 1560 1561 if (is_disabling(old_wm->level, new_wm.level, VLV_WM_LEVEL_DDR_DVFS)) 1562 chv_set_memory_dvfs(dev_priv, false); 1563 1564 if (is_disabling(old_wm->level, new_wm.level, VLV_WM_LEVEL_PM5)) 1565 chv_set_memory_pm5(dev_priv, false); 1566 1567 if (is_disabling(old_wm->cxsr, new_wm.cxsr, true)) 1568 _intel_set_memory_cxsr(dev_priv, false); 1569 1570 vlv_write_wm_values(dev_priv, &new_wm); 1571 1572 if (is_enabling(old_wm->cxsr, new_wm.cxsr, true)) 1573 _intel_set_memory_cxsr(dev_priv, true); 1574 1575 if (is_enabling(old_wm->level, new_wm.level, VLV_WM_LEVEL_PM5)) 1576 chv_set_memory_pm5(dev_priv, true); 1577 1578 if (is_enabling(old_wm->level, new_wm.level, VLV_WM_LEVEL_DDR_DVFS)) 1579 chv_set_memory_dvfs(dev_priv, true); 1580 1581 *old_wm = new_wm; 1582 } 1583 1584 static void vlv_initial_watermarks(struct intel_atomic_state *state, 1585 struct intel_crtc_state *crtc_state) 1586 { 1587 struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); 1588 struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); 1589 1590 mutex_lock(&dev_priv->wm.wm_mutex); 1591 crtc->wm.active.vlv = crtc_state->wm.vlv.intermediate; 1592 vlv_program_watermarks(dev_priv); 1593 mutex_unlock(&dev_priv->wm.wm_mutex); 1594 } 1595 1596 static void vlv_optimize_watermarks(struct intel_atomic_state *state, 1597 struct intel_crtc_state *crtc_state) 1598 { 1599 struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); 1600 struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->base.crtc); 1601 1602 if (!crtc_state->wm.need_postvbl_update) 1603 return; 1604 1605 mutex_lock(&dev_priv->wm.wm_mutex); 1606 intel_crtc->wm.active.vlv = crtc_state->wm.vlv.optimal; 1607 vlv_program_watermarks(dev_priv); 1608 mutex_unlock(&dev_priv->wm.wm_mutex); 1609 } 1610 1611 #define single_plane_enabled(mask) is_power_of_2(mask) 1612 1613 static void g4x_update_wm(struct intel_crtc *crtc) 1614 { 1615 struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); 1616 static const int sr_latency_ns = 12000; 1617 int planea_wm, planeb_wm, cursora_wm, cursorb_wm; 1618 int plane_sr, cursor_sr; 1619 unsigned int enabled = 0; 1620 bool cxsr_enabled; 1621 1622 if (g4x_compute_wm0(dev_priv, PIPE_A, 1623 &g4x_wm_info, pessimal_latency_ns, 1624 &g4x_cursor_wm_info, pessimal_latency_ns, 1625 &planea_wm, &cursora_wm)) 1626 enabled |= 1 << PIPE_A; 1627 1628 if (g4x_compute_wm0(dev_priv, PIPE_B, 1629 &g4x_wm_info, pessimal_latency_ns, 1630 &g4x_cursor_wm_info, pessimal_latency_ns, 1631 &planeb_wm, &cursorb_wm)) 1632 enabled |= 1 << PIPE_B; 1633 1634 if (single_plane_enabled(enabled) && 1635 g4x_compute_srwm(dev_priv, ffs(enabled) - 1, 1636 sr_latency_ns, 1637 &g4x_wm_info, 1638 &g4x_cursor_wm_info, 1639 &plane_sr, &cursor_sr)) { 1640 cxsr_enabled = true; 1641 } else { 1642 cxsr_enabled = false; 1643 intel_set_memory_cxsr(dev_priv, false); 1644 plane_sr = cursor_sr = 0; 1645 } 1646 1647 DRM_DEBUG_KMS("Setting FIFO watermarks - A: plane=%d, cursor=%d, " 1648 "B: plane=%d, cursor=%d, SR: plane=%d, cursor=%d\n", 1649 planea_wm, cursora_wm, 1650 planeb_wm, cursorb_wm, 1651 plane_sr, cursor_sr); 1652 1653 I915_WRITE(DSPFW1, 1654 FW_WM(plane_sr, SR) | 1655 FW_WM(cursorb_wm, CURSORB) | 1656 FW_WM(planeb_wm, PLANEB) | 1657 FW_WM(planea_wm, PLANEA)); 1658 I915_WRITE(DSPFW2, 1659 (I915_READ(DSPFW2) & ~DSPFW_CURSORA_MASK) | 1660 FW_WM(cursora_wm, CURSORA)); 1661 /* HPLL off in SR has some issues on G4x... disable it */ 1662 I915_WRITE(DSPFW3, 1663 (I915_READ(DSPFW3) & ~(DSPFW_HPLL_SR_EN | DSPFW_CURSOR_SR_MASK)) | 1664 FW_WM(cursor_sr, CURSOR_SR)); 1665 1666 if (cxsr_enabled) 1667 intel_set_memory_cxsr(dev_priv, true); 1668 } 1669 1670 static void i965_update_wm(struct intel_crtc *unused_crtc) 1671 { 1672 struct drm_i915_private *dev_priv = to_i915(unused_crtc->base.dev); 1673 struct intel_crtc *crtc; 1674 int srwm = 1; 1675 int cursor_sr = 16; 1676 bool cxsr_enabled; 1677 1678 /* Calc sr entries for one plane configs */ 1679 crtc = single_enabled_crtc(dev_priv); 1680 if (crtc) { 1681 /* self-refresh has much higher latency */ 1682 static const int sr_latency_ns = 12000; 1683 const struct drm_display_mode *adjusted_mode = 1684 &crtc->config->base.adjusted_mode; 1685 const struct drm_framebuffer *fb = 1686 crtc->base.primary->state->fb; 1687 int clock = adjusted_mode->crtc_clock; 1688 int htotal = adjusted_mode->crtc_htotal; 1689 int hdisplay = crtc->config->pipe_src_w; 1690 int cpp = fb->format->cpp[0]; 1691 unsigned long line_time_us; 1692 int entries; 1693 1694 line_time_us = max(htotal * 1000 / clock, 1); 1695 1696 /* Use ns/us then divide to preserve precision */ 1697 entries = (((sr_latency_ns / line_time_us) + 1000) / 1000) * 1698 cpp * hdisplay; 1699 entries = DIV_ROUND_UP(entries, I915_FIFO_LINE_SIZE); 1700 srwm = I965_FIFO_SIZE - entries; 1701 if (srwm < 0) 1702 srwm = 1; 1703 srwm &= 0x1ff; 1704 DRM_DEBUG_KMS("self-refresh entries: %d, wm: %d\n", 1705 entries, srwm); 1706 1707 entries = (((sr_latency_ns / line_time_us) + 1000) / 1000) * 1708 cpp * crtc->base.cursor->state->crtc_w; 1709 entries = DIV_ROUND_UP(entries, 1710 i965_cursor_wm_info.cacheline_size); 1711 cursor_sr = i965_cursor_wm_info.fifo_size - 1712 (entries + i965_cursor_wm_info.guard_size); 1713 1714 if (cursor_sr > i965_cursor_wm_info.max_wm) 1715 cursor_sr = i965_cursor_wm_info.max_wm; 1716 1717 DRM_DEBUG_KMS("self-refresh watermark: display plane %d " 1718 "cursor %d\n", srwm, cursor_sr); 1719 1720 cxsr_enabled = true; 1721 } else { 1722 cxsr_enabled = false; 1723 /* Turn off self refresh if both pipes are enabled */ 1724 intel_set_memory_cxsr(dev_priv, false); 1725 } 1726 1727 DRM_DEBUG_KMS("Setting FIFO watermarks - A: 8, B: 8, C: 8, SR %d\n", 1728 srwm); 1729 1730 /* 965 has limitations... */ 1731 I915_WRITE(DSPFW1, FW_WM(srwm, SR) | 1732 FW_WM(8, CURSORB) | 1733 FW_WM(8, PLANEB) | 1734 FW_WM(8, PLANEA)); 1735 I915_WRITE(DSPFW2, FW_WM(8, CURSORA) | 1736 FW_WM(8, PLANEC_OLD)); 1737 /* update cursor SR watermark */ 1738 I915_WRITE(DSPFW3, FW_WM(cursor_sr, CURSOR_SR)); 1739 1740 if (cxsr_enabled) 1741 intel_set_memory_cxsr(dev_priv, true); 1742 } 1743 1744 #undef FW_WM 1745 1746 static void i9xx_update_wm(struct intel_crtc *unused_crtc) 1747 { 1748 struct drm_i915_private *dev_priv = to_i915(unused_crtc->base.dev); 1749 const struct intel_watermark_params *wm_info; 1750 uint32_t fwater_lo; 1751 uint32_t fwater_hi; 1752 int cwm, srwm = 1; 1753 int fifo_size; 1754 int planea_wm, planeb_wm; 1755 struct intel_crtc *crtc, *enabled = NULL; 1756 1757 if (IS_I945GM(dev_priv)) 1758 wm_info = &i945_wm_info; 1759 else if (!IS_GEN2(dev_priv)) 1760 wm_info = &i915_wm_info; 1761 else 1762 wm_info = &i830_a_wm_info; 1763 1764 fifo_size = dev_priv->display.get_fifo_size(dev_priv, 0); 1765 crtc = intel_get_crtc_for_plane(dev_priv, 0); 1766 if (intel_crtc_active(crtc)) { 1767 const struct drm_display_mode *adjusted_mode = 1768 &crtc->config->base.adjusted_mode; 1769 const struct drm_framebuffer *fb = 1770 crtc->base.primary->state->fb; 1771 int cpp; 1772 1773 if (IS_GEN2(dev_priv)) 1774 cpp = 4; 1775 else 1776 cpp = fb->format->cpp[0]; 1777 1778 planea_wm = intel_calculate_wm(adjusted_mode->crtc_clock, 1779 wm_info, fifo_size, cpp, 1780 pessimal_latency_ns); 1781 enabled = crtc; 1782 } else { 1783 planea_wm = fifo_size - wm_info->guard_size; 1784 if (planea_wm > (long)wm_info->max_wm) 1785 planea_wm = wm_info->max_wm; 1786 } 1787 1788 if (IS_GEN2(dev_priv)) 1789 wm_info = &i830_bc_wm_info; 1790 1791 fifo_size = dev_priv->display.get_fifo_size(dev_priv, 1); 1792 crtc = intel_get_crtc_for_plane(dev_priv, 1); 1793 if (intel_crtc_active(crtc)) { 1794 const struct drm_display_mode *adjusted_mode = 1795 &crtc->config->base.adjusted_mode; 1796 const struct drm_framebuffer *fb = 1797 crtc->base.primary->state->fb; 1798 int cpp; 1799 1800 if (IS_GEN2(dev_priv)) 1801 cpp = 4; 1802 else 1803 cpp = fb->format->cpp[0]; 1804 1805 planeb_wm = intel_calculate_wm(adjusted_mode->crtc_clock, 1806 wm_info, fifo_size, cpp, 1807 pessimal_latency_ns); 1808 if (enabled == NULL) 1809 enabled = crtc; 1810 else 1811 enabled = NULL; 1812 } else { 1813 planeb_wm = fifo_size - wm_info->guard_size; 1814 if (planeb_wm > (long)wm_info->max_wm) 1815 planeb_wm = wm_info->max_wm; 1816 } 1817 1818 DRM_DEBUG_KMS("FIFO watermarks - A: %d, B: %d\n", planea_wm, planeb_wm); 1819 1820 if (IS_I915GM(dev_priv) && enabled) { 1821 struct drm_i915_gem_object *obj; 1822 1823 obj = intel_fb_obj(enabled->base.primary->state->fb); 1824 1825 /* self-refresh seems busted with untiled */ 1826 if (!i915_gem_object_is_tiled(obj)) 1827 enabled = NULL; 1828 } 1829 1830 /* 1831 * Overlay gets an aggressive default since video jitter is bad. 1832 */ 1833 cwm = 2; 1834 1835 /* Play safe and disable self-refresh before adjusting watermarks. */ 1836 intel_set_memory_cxsr(dev_priv, false); 1837 1838 /* Calc sr entries for one plane configs */ 1839 if (HAS_FW_BLC(dev_priv) && enabled) { 1840 /* self-refresh has much higher latency */ 1841 static const int sr_latency_ns = 6000; 1842 const struct drm_display_mode *adjusted_mode = 1843 &enabled->config->base.adjusted_mode; 1844 const struct drm_framebuffer *fb = 1845 enabled->base.primary->state->fb; 1846 int clock = adjusted_mode->crtc_clock; 1847 int htotal = adjusted_mode->crtc_htotal; 1848 int hdisplay = enabled->config->pipe_src_w; 1849 int cpp; 1850 unsigned long line_time_us; 1851 int entries; 1852 1853 if (IS_I915GM(dev_priv) || IS_I945GM(dev_priv)) 1854 cpp = 4; 1855 else 1856 cpp = fb->format->cpp[0]; 1857 1858 line_time_us = max(htotal * 1000 / clock, 1); 1859 1860 /* Use ns/us then divide to preserve precision */ 1861 entries = (((sr_latency_ns / line_time_us) + 1000) / 1000) * 1862 cpp * hdisplay; 1863 entries = DIV_ROUND_UP(entries, wm_info->cacheline_size); 1864 DRM_DEBUG_KMS("self-refresh entries: %d\n", entries); 1865 srwm = wm_info->fifo_size - entries; 1866 if (srwm < 0) 1867 srwm = 1; 1868 1869 if (IS_I945G(dev_priv) || IS_I945GM(dev_priv)) 1870 I915_WRITE(FW_BLC_SELF, 1871 FW_BLC_SELF_FIFO_MASK | (srwm & 0xff)); 1872 else 1873 I915_WRITE(FW_BLC_SELF, srwm & 0x3f); 1874 } 1875 1876 DRM_DEBUG_KMS("Setting FIFO watermarks - A: %d, B: %d, C: %d, SR %d\n", 1877 planea_wm, planeb_wm, cwm, srwm); 1878 1879 fwater_lo = ((planeb_wm & 0x3f) << 16) | (planea_wm & 0x3f); 1880 fwater_hi = (cwm & 0x1f); 1881 1882 /* Set request length to 8 cachelines per fetch */ 1883 fwater_lo = fwater_lo | (1 << 24) | (1 << 8); 1884 fwater_hi = fwater_hi | (1 << 8); 1885 1886 I915_WRITE(FW_BLC, fwater_lo); 1887 I915_WRITE(FW_BLC2, fwater_hi); 1888 1889 if (enabled) 1890 intel_set_memory_cxsr(dev_priv, true); 1891 } 1892 1893 static void i845_update_wm(struct intel_crtc *unused_crtc) 1894 { 1895 struct drm_i915_private *dev_priv = to_i915(unused_crtc->base.dev); 1896 struct intel_crtc *crtc; 1897 const struct drm_display_mode *adjusted_mode; 1898 uint32_t fwater_lo; 1899 int planea_wm; 1900 1901 crtc = single_enabled_crtc(dev_priv); 1902 if (crtc == NULL) 1903 return; 1904 1905 adjusted_mode = &crtc->config->base.adjusted_mode; 1906 planea_wm = intel_calculate_wm(adjusted_mode->crtc_clock, 1907 &i845_wm_info, 1908 dev_priv->display.get_fifo_size(dev_priv, 0), 1909 4, pessimal_latency_ns); 1910 fwater_lo = I915_READ(FW_BLC) & ~0xfff; 1911 fwater_lo |= (3<<8) | planea_wm; 1912 1913 DRM_DEBUG_KMS("Setting FIFO watermarks - A: %d\n", planea_wm); 1914 1915 I915_WRITE(FW_BLC, fwater_lo); 1916 } 1917 1918 /* latency must be in 0.1us units. */ 1919 static uint32_t ilk_wm_method1(uint32_t pixel_rate, uint8_t cpp, uint32_t latency) 1920 { 1921 uint64_t ret; 1922 1923 if (WARN(latency == 0, "Latency value missing\n")) 1924 return UINT_MAX; 1925 1926 ret = (uint64_t) pixel_rate * cpp * latency; 1927 ret = DIV_ROUND_UP_ULL(ret, 64 * 10000) + 2; 1928 1929 return ret; 1930 } 1931 1932 /* latency must be in 0.1us units. */ 1933 static uint32_t ilk_wm_method2(uint32_t pixel_rate, uint32_t pipe_htotal, 1934 uint32_t horiz_pixels, uint8_t cpp, 1935 uint32_t latency) 1936 { 1937 uint32_t ret; 1938 1939 if (WARN(latency == 0, "Latency value missing\n")) 1940 return UINT_MAX; 1941 if (WARN_ON(!pipe_htotal)) 1942 return UINT_MAX; 1943 1944 ret = (latency * pixel_rate) / (pipe_htotal * 10000); 1945 ret = (ret + 1) * horiz_pixels * cpp; 1946 ret = DIV_ROUND_UP(ret, 64) + 2; 1947 return ret; 1948 } 1949 1950 static uint32_t ilk_wm_fbc(uint32_t pri_val, uint32_t horiz_pixels, 1951 uint8_t cpp) 1952 { 1953 /* 1954 * Neither of these should be possible since this function shouldn't be 1955 * called if the CRTC is off or the plane is invisible. But let's be 1956 * extra paranoid to avoid a potential divide-by-zero if we screw up 1957 * elsewhere in the driver. 1958 */ 1959 if (WARN_ON(!cpp)) 1960 return 0; 1961 if (WARN_ON(!horiz_pixels)) 1962 return 0; 1963 1964 return DIV_ROUND_UP(pri_val * 64, horiz_pixels * cpp) + 2; 1965 } 1966 1967 struct ilk_wm_maximums { 1968 uint16_t pri; 1969 uint16_t spr; 1970 uint16_t cur; 1971 uint16_t fbc; 1972 }; 1973 1974 /* 1975 * For both WM_PIPE and WM_LP. 1976 * mem_value must be in 0.1us units. 1977 */ 1978 static uint32_t ilk_compute_pri_wm(const struct intel_crtc_state *cstate, 1979 const struct intel_plane_state *pstate, 1980 uint32_t mem_value, 1981 bool is_lp) 1982 { 1983 uint32_t method1, method2; 1984 int cpp; 1985 1986 if (!intel_wm_plane_visible(cstate, pstate)) 1987 return 0; 1988 1989 cpp = pstate->base.fb->format->cpp[0]; 1990 1991 method1 = ilk_wm_method1(cstate->pixel_rate, cpp, mem_value); 1992 1993 if (!is_lp) 1994 return method1; 1995 1996 method2 = ilk_wm_method2(cstate->pixel_rate, 1997 cstate->base.adjusted_mode.crtc_htotal, 1998 drm_rect_width(&pstate->base.dst), 1999 cpp, mem_value); 2000 2001 return min(method1, method2); 2002 } 2003 2004 /* 2005 * For both WM_PIPE and WM_LP. 2006 * mem_value must be in 0.1us units. 2007 */ 2008 static uint32_t ilk_compute_spr_wm(const struct intel_crtc_state *cstate, 2009 const struct intel_plane_state *pstate, 2010 uint32_t mem_value) 2011 { 2012 uint32_t method1, method2; 2013 int cpp; 2014 2015 if (!intel_wm_plane_visible(cstate, pstate)) 2016 return 0; 2017 2018 cpp = pstate->base.fb->format->cpp[0]; 2019 2020 method1 = ilk_wm_method1(cstate->pixel_rate, cpp, mem_value); 2021 method2 = ilk_wm_method2(cstate->pixel_rate, 2022 cstate->base.adjusted_mode.crtc_htotal, 2023 drm_rect_width(&pstate->base.dst), 2024 cpp, mem_value); 2025 return min(method1, method2); 2026 } 2027 2028 /* 2029 * For both WM_PIPE and WM_LP. 2030 * mem_value must be in 0.1us units. 2031 */ 2032 static uint32_t ilk_compute_cur_wm(const struct intel_crtc_state *cstate, 2033 const struct intel_plane_state *pstate, 2034 uint32_t mem_value) 2035 { 2036 int cpp; 2037 2038 if (!intel_wm_plane_visible(cstate, pstate)) 2039 return 0; 2040 2041 cpp = pstate->base.fb->format->cpp[0]; 2042 2043 return ilk_wm_method2(cstate->pixel_rate, 2044 cstate->base.adjusted_mode.crtc_htotal, 2045 pstate->base.crtc_w, cpp, mem_value); 2046 } 2047 2048 /* Only for WM_LP. */ 2049 static uint32_t ilk_compute_fbc_wm(const struct intel_crtc_state *cstate, 2050 const struct intel_plane_state *pstate, 2051 uint32_t pri_val) 2052 { 2053 int cpp; 2054 2055 if (!intel_wm_plane_visible(cstate, pstate)) 2056 return 0; 2057 2058 cpp = pstate->base.fb->format->cpp[0]; 2059 2060 return ilk_wm_fbc(pri_val, drm_rect_width(&pstate->base.dst), cpp); 2061 } 2062 2063 static unsigned int 2064 ilk_display_fifo_size(const struct drm_i915_private *dev_priv) 2065 { 2066 if (INTEL_GEN(dev_priv) >= 8) 2067 return 3072; 2068 else if (INTEL_GEN(dev_priv) >= 7) 2069 return 768; 2070 else 2071 return 512; 2072 } 2073 2074 static unsigned int 2075 ilk_plane_wm_reg_max(const struct drm_i915_private *dev_priv, 2076 int level, bool is_sprite) 2077 { 2078 if (INTEL_GEN(dev_priv) >= 8) 2079 /* BDW primary/sprite plane watermarks */ 2080 return level == 0 ? 255 : 2047; 2081 else if (INTEL_GEN(dev_priv) >= 7) 2082 /* IVB/HSW primary/sprite plane watermarks */ 2083 return level == 0 ? 127 : 1023; 2084 else if (!is_sprite) 2085 /* ILK/SNB primary plane watermarks */ 2086 return level == 0 ? 127 : 511; 2087 else 2088 /* ILK/SNB sprite plane watermarks */ 2089 return level == 0 ? 63 : 255; 2090 } 2091 2092 static unsigned int 2093 ilk_cursor_wm_reg_max(const struct drm_i915_private *dev_priv, int level) 2094 { 2095 if (INTEL_GEN(dev_priv) >= 7) 2096 return level == 0 ? 63 : 255; 2097 else 2098 return level == 0 ? 31 : 63; 2099 } 2100 2101 static unsigned int ilk_fbc_wm_reg_max(const struct drm_i915_private *dev_priv) 2102 { 2103 if (INTEL_GEN(dev_priv) >= 8) 2104 return 31; 2105 else 2106 return 15; 2107 } 2108 2109 #pragma GCC diagnostic ignored "-Wdiscarded-qualifiers" 2110 2111 /* Calculate the maximum primary/sprite plane watermark */ 2112 static unsigned int ilk_plane_wm_max(const struct drm_device *dev, 2113 int level, 2114 const struct intel_wm_config *config, 2115 enum intel_ddb_partitioning ddb_partitioning, 2116 bool is_sprite) 2117 { 2118 struct drm_i915_private *dev_priv = to_i915(dev); 2119 unsigned int fifo_size = ilk_display_fifo_size(dev_priv); 2120 2121 /* if sprites aren't enabled, sprites get nothing */ 2122 if (is_sprite && !config->sprites_enabled) 2123 return 0; 2124 2125 /* HSW allows LP1+ watermarks even with multiple pipes */ 2126 if (level == 0 || config->num_pipes_active > 1) { 2127 fifo_size /= INTEL_INFO(dev_priv)->num_pipes; 2128 2129 /* 2130 * For some reason the non self refresh 2131 * FIFO size is only half of the self 2132 * refresh FIFO size on ILK/SNB. 2133 */ 2134 if (INTEL_GEN(dev_priv) <= 6) 2135 fifo_size /= 2; 2136 } 2137 2138 if (config->sprites_enabled) { 2139 /* level 0 is always calculated with 1:1 split */ 2140 if (level > 0 && ddb_partitioning == INTEL_DDB_PART_5_6) { 2141 if (is_sprite) 2142 fifo_size *= 5; 2143 fifo_size /= 6; 2144 } else { 2145 fifo_size /= 2; 2146 } 2147 } 2148 2149 /* clamp to max that the registers can hold */ 2150 return min(fifo_size, ilk_plane_wm_reg_max(dev_priv, level, is_sprite)); 2151 } 2152 2153 /* Calculate the maximum cursor plane watermark */ 2154 static unsigned int ilk_cursor_wm_max(const struct drm_device *dev, 2155 int level, 2156 const struct intel_wm_config *config) 2157 { 2158 /* HSW LP1+ watermarks w/ multiple pipes */ 2159 if (level > 0 && config->num_pipes_active > 1) 2160 return 64; 2161 2162 /* otherwise just report max that registers can hold */ 2163 return ilk_cursor_wm_reg_max(to_i915(dev), level); 2164 } 2165 2166 static void ilk_compute_wm_maximums(const struct drm_device *dev, 2167 int level, 2168 const struct intel_wm_config *config, 2169 enum intel_ddb_partitioning ddb_partitioning, 2170 struct ilk_wm_maximums *max) 2171 { 2172 max->pri = ilk_plane_wm_max(dev, level, config, ddb_partitioning, false); 2173 max->spr = ilk_plane_wm_max(dev, level, config, ddb_partitioning, true); 2174 max->cur = ilk_cursor_wm_max(dev, level, config); 2175 max->fbc = ilk_fbc_wm_reg_max(to_i915(dev)); 2176 } 2177 2178 static void ilk_compute_wm_reg_maximums(const struct drm_i915_private *dev_priv, 2179 int level, 2180 struct ilk_wm_maximums *max) 2181 { 2182 max->pri = ilk_plane_wm_reg_max(dev_priv, level, false); 2183 max->spr = ilk_plane_wm_reg_max(dev_priv, level, true); 2184 max->cur = ilk_cursor_wm_reg_max(dev_priv, level); 2185 max->fbc = ilk_fbc_wm_reg_max(dev_priv); 2186 } 2187 2188 static bool ilk_validate_wm_level(int level, 2189 const struct ilk_wm_maximums *max, 2190 struct intel_wm_level *result) 2191 { 2192 bool ret; 2193 2194 /* already determined to be invalid? */ 2195 if (!result->enable) 2196 return false; 2197 2198 result->enable = result->pri_val <= max->pri && 2199 result->spr_val <= max->spr && 2200 result->cur_val <= max->cur; 2201 2202 ret = result->enable; 2203 2204 /* 2205 * HACK until we can pre-compute everything, 2206 * and thus fail gracefully if LP0 watermarks 2207 * are exceeded... 2208 */ 2209 if (level == 0 && !result->enable) { 2210 if (result->pri_val > max->pri) 2211 DRM_DEBUG_KMS("Primary WM%d too large %u (max %u)\n", 2212 level, result->pri_val, max->pri); 2213 if (result->spr_val > max->spr) 2214 DRM_DEBUG_KMS("Sprite WM%d too large %u (max %u)\n", 2215 level, result->spr_val, max->spr); 2216 if (result->cur_val > max->cur) 2217 DRM_DEBUG_KMS("Cursor WM%d too large %u (max %u)\n", 2218 level, result->cur_val, max->cur); 2219 2220 result->pri_val = min_t(uint32_t, result->pri_val, max->pri); 2221 result->spr_val = min_t(uint32_t, result->spr_val, max->spr); 2222 result->cur_val = min_t(uint32_t, result->cur_val, max->cur); 2223 result->enable = true; 2224 } 2225 2226 return ret; 2227 } 2228 2229 static void ilk_compute_wm_level(const struct drm_i915_private *dev_priv, 2230 const struct intel_crtc *intel_crtc, 2231 int level, 2232 struct intel_crtc_state *cstate, 2233 struct intel_plane_state *pristate, 2234 struct intel_plane_state *sprstate, 2235 struct intel_plane_state *curstate, 2236 struct intel_wm_level *result) 2237 { 2238 uint16_t pri_latency = dev_priv->wm.pri_latency[level]; 2239 uint16_t spr_latency = dev_priv->wm.spr_latency[level]; 2240 uint16_t cur_latency = dev_priv->wm.cur_latency[level]; 2241 2242 /* WM1+ latency values stored in 0.5us units */ 2243 if (level > 0) { 2244 pri_latency *= 5; 2245 spr_latency *= 5; 2246 cur_latency *= 5; 2247 } 2248 2249 if (pristate) { 2250 result->pri_val = ilk_compute_pri_wm(cstate, pristate, 2251 pri_latency, level); 2252 result->fbc_val = ilk_compute_fbc_wm(cstate, pristate, result->pri_val); 2253 } 2254 2255 if (sprstate) 2256 result->spr_val = ilk_compute_spr_wm(cstate, sprstate, spr_latency); 2257 2258 if (curstate) 2259 result->cur_val = ilk_compute_cur_wm(cstate, curstate, cur_latency); 2260 2261 result->enable = true; 2262 } 2263 2264 static uint32_t 2265 hsw_compute_linetime_wm(const struct intel_crtc_state *cstate) 2266 { 2267 const struct intel_atomic_state *intel_state = 2268 to_intel_atomic_state(cstate->base.state); 2269 const struct drm_display_mode *adjusted_mode = 2270 &cstate->base.adjusted_mode; 2271 u32 linetime, ips_linetime; 2272 2273 if (!cstate->base.active) 2274 return 0; 2275 if (WARN_ON(adjusted_mode->crtc_clock == 0)) 2276 return 0; 2277 if (WARN_ON(intel_state->cdclk.logical.cdclk == 0)) 2278 return 0; 2279 2280 /* The WM are computed with base on how long it takes to fill a single 2281 * row at the given clock rate, multiplied by 8. 2282 * */ 2283 linetime = DIV_ROUND_CLOSEST(adjusted_mode->crtc_htotal * 1000 * 8, 2284 adjusted_mode->crtc_clock); 2285 ips_linetime = DIV_ROUND_CLOSEST(adjusted_mode->crtc_htotal * 1000 * 8, 2286 intel_state->cdclk.logical.cdclk); 2287 2288 return PIPE_WM_LINETIME_IPS_LINETIME(ips_linetime) | 2289 PIPE_WM_LINETIME_TIME(linetime); 2290 } 2291 2292 static void intel_read_wm_latency(struct drm_i915_private *dev_priv, 2293 uint16_t wm[8]) 2294 { 2295 if (IS_GEN9(dev_priv)) { 2296 uint32_t val; 2297 int ret, i; 2298 int level, max_level = ilk_wm_max_level(dev_priv); 2299 2300 /* read the first set of memory latencies[0:3] */ 2301 val = 0; /* data0 to be programmed to 0 for first set */ 2302 mutex_lock(&dev_priv->rps.hw_lock); 2303 ret = sandybridge_pcode_read(dev_priv, 2304 GEN9_PCODE_READ_MEM_LATENCY, 2305 &val); 2306 mutex_unlock(&dev_priv->rps.hw_lock); 2307 2308 if (ret) { 2309 DRM_ERROR("SKL Mailbox read error = %d\n", ret); 2310 return; 2311 } 2312 2313 wm[0] = val & GEN9_MEM_LATENCY_LEVEL_MASK; 2314 wm[1] = (val >> GEN9_MEM_LATENCY_LEVEL_1_5_SHIFT) & 2315 GEN9_MEM_LATENCY_LEVEL_MASK; 2316 wm[2] = (val >> GEN9_MEM_LATENCY_LEVEL_2_6_SHIFT) & 2317 GEN9_MEM_LATENCY_LEVEL_MASK; 2318 wm[3] = (val >> GEN9_MEM_LATENCY_LEVEL_3_7_SHIFT) & 2319 GEN9_MEM_LATENCY_LEVEL_MASK; 2320 2321 /* read the second set of memory latencies[4:7] */ 2322 val = 1; /* data0 to be programmed to 1 for second set */ 2323 mutex_lock(&dev_priv->rps.hw_lock); 2324 ret = sandybridge_pcode_read(dev_priv, 2325 GEN9_PCODE_READ_MEM_LATENCY, 2326 &val); 2327 mutex_unlock(&dev_priv->rps.hw_lock); 2328 if (ret) { 2329 DRM_ERROR("SKL Mailbox read error = %d\n", ret); 2330 return; 2331 } 2332 2333 wm[4] = val & GEN9_MEM_LATENCY_LEVEL_MASK; 2334 wm[5] = (val >> GEN9_MEM_LATENCY_LEVEL_1_5_SHIFT) & 2335 GEN9_MEM_LATENCY_LEVEL_MASK; 2336 wm[6] = (val >> GEN9_MEM_LATENCY_LEVEL_2_6_SHIFT) & 2337 GEN9_MEM_LATENCY_LEVEL_MASK; 2338 wm[7] = (val >> GEN9_MEM_LATENCY_LEVEL_3_7_SHIFT) & 2339 GEN9_MEM_LATENCY_LEVEL_MASK; 2340 2341 /* 2342 * If a level n (n > 1) has a 0us latency, all levels m (m >= n) 2343 * need to be disabled. We make sure to sanitize the values out 2344 * of the punit to satisfy this requirement. 2345 */ 2346 for (level = 1; level <= max_level; level++) { 2347 if (wm[level] == 0) { 2348 for (i = level + 1; i <= max_level; i++) 2349 wm[i] = 0; 2350 break; 2351 } 2352 } 2353 2354 /* 2355 * WaWmMemoryReadLatency:skl,glk 2356 * 2357 * punit doesn't take into account the read latency so we need 2358 * to add 2us to the various latency levels we retrieve from the 2359 * punit when level 0 response data us 0us. 2360 */ 2361 if (wm[0] == 0) { 2362 wm[0] += 2; 2363 for (level = 1; level <= max_level; level++) { 2364 if (wm[level] == 0) 2365 break; 2366 wm[level] += 2; 2367 } 2368 } 2369 2370 } else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) { 2371 uint64_t sskpd = I915_READ64(MCH_SSKPD); 2372 2373 wm[0] = (sskpd >> 56) & 0xFF; 2374 if (wm[0] == 0) 2375 wm[0] = sskpd & 0xF; 2376 wm[1] = (sskpd >> 4) & 0xFF; 2377 wm[2] = (sskpd >> 12) & 0xFF; 2378 wm[3] = (sskpd >> 20) & 0x1FF; 2379 wm[4] = (sskpd >> 32) & 0x1FF; 2380 } else if (INTEL_GEN(dev_priv) >= 6) { 2381 uint32_t sskpd = I915_READ(MCH_SSKPD); 2382 2383 wm[0] = (sskpd >> SSKPD_WM0_SHIFT) & SSKPD_WM_MASK; 2384 wm[1] = (sskpd >> SSKPD_WM1_SHIFT) & SSKPD_WM_MASK; 2385 wm[2] = (sskpd >> SSKPD_WM2_SHIFT) & SSKPD_WM_MASK; 2386 wm[3] = (sskpd >> SSKPD_WM3_SHIFT) & SSKPD_WM_MASK; 2387 } else if (INTEL_GEN(dev_priv) >= 5) { 2388 uint32_t mltr = I915_READ(MLTR_ILK); 2389 2390 /* ILK primary LP0 latency is 700 ns */ 2391 wm[0] = 7; 2392 wm[1] = (mltr >> MLTR_WM1_SHIFT) & ILK_SRLT_MASK; 2393 wm[2] = (mltr >> MLTR_WM2_SHIFT) & ILK_SRLT_MASK; 2394 } 2395 } 2396 2397 static void intel_fixup_spr_wm_latency(struct drm_i915_private *dev_priv, 2398 uint16_t wm[5]) 2399 { 2400 /* ILK sprite LP0 latency is 1300 ns */ 2401 if (IS_GEN5(dev_priv)) 2402 wm[0] = 13; 2403 } 2404 2405 static void intel_fixup_cur_wm_latency(struct drm_i915_private *dev_priv, 2406 uint16_t wm[5]) 2407 { 2408 /* ILK cursor LP0 latency is 1300 ns */ 2409 if (IS_GEN5(dev_priv)) 2410 wm[0] = 13; 2411 2412 /* WaDoubleCursorLP3Latency:ivb */ 2413 if (IS_IVYBRIDGE(dev_priv)) 2414 wm[3] *= 2; 2415 } 2416 2417 int ilk_wm_max_level(const struct drm_i915_private *dev_priv) 2418 { 2419 /* how many WM levels are we expecting */ 2420 if (INTEL_GEN(dev_priv) >= 9) 2421 return 7; 2422 else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) 2423 return 4; 2424 else if (INTEL_GEN(dev_priv) >= 6) 2425 return 3; 2426 else 2427 return 2; 2428 } 2429 2430 static void intel_print_wm_latency(struct drm_i915_private *dev_priv, 2431 const char *name, 2432 const uint16_t wm[8]) 2433 { 2434 int level, max_level = ilk_wm_max_level(dev_priv); 2435 2436 for (level = 0; level <= max_level; level++) { 2437 unsigned int latency = wm[level]; 2438 2439 if (latency == 0) { 2440 DRM_ERROR("%s WM%d latency not provided\n", 2441 name, level); 2442 continue; 2443 } 2444 2445 /* 2446 * - latencies are in us on gen9. 2447 * - before then, WM1+ latency values are in 0.5us units 2448 */ 2449 if (IS_GEN9(dev_priv)) 2450 latency *= 10; 2451 else if (level > 0) 2452 latency *= 5; 2453 2454 DRM_DEBUG_KMS("%s WM%d latency %u (%u.%u usec)\n", 2455 name, level, wm[level], 2456 latency / 10, latency % 10); 2457 } 2458 } 2459 2460 static bool ilk_increase_wm_latency(struct drm_i915_private *dev_priv, 2461 uint16_t wm[5], uint16_t min) 2462 { 2463 int level, max_level = ilk_wm_max_level(dev_priv); 2464 2465 if (wm[0] >= min) 2466 return false; 2467 2468 wm[0] = max(wm[0], min); 2469 for (level = 1; level <= max_level; level++) 2470 wm[level] = max_t(uint16_t, wm[level], DIV_ROUND_UP(min, 5)); 2471 2472 return true; 2473 } 2474 2475 static void snb_wm_latency_quirk(struct drm_i915_private *dev_priv) 2476 { 2477 bool changed; 2478 2479 /* 2480 * The BIOS provided WM memory latency values are often 2481 * inadequate for high resolution displays. Adjust them. 2482 */ 2483 changed = ilk_increase_wm_latency(dev_priv, dev_priv->wm.pri_latency, 12) | 2484 ilk_increase_wm_latency(dev_priv, dev_priv->wm.spr_latency, 12) | 2485 ilk_increase_wm_latency(dev_priv, dev_priv->wm.cur_latency, 12); 2486 2487 if (!changed) 2488 return; 2489 2490 DRM_DEBUG_KMS("WM latency values increased to avoid potential underruns\n"); 2491 intel_print_wm_latency(dev_priv, "Primary", dev_priv->wm.pri_latency); 2492 intel_print_wm_latency(dev_priv, "Sprite", dev_priv->wm.spr_latency); 2493 intel_print_wm_latency(dev_priv, "Cursor", dev_priv->wm.cur_latency); 2494 } 2495 2496 static void ilk_setup_wm_latency(struct drm_i915_private *dev_priv) 2497 { 2498 intel_read_wm_latency(dev_priv, dev_priv->wm.pri_latency); 2499 2500 memcpy(dev_priv->wm.spr_latency, dev_priv->wm.pri_latency, 2501 sizeof(dev_priv->wm.pri_latency)); 2502 memcpy(dev_priv->wm.cur_latency, dev_priv->wm.pri_latency, 2503 sizeof(dev_priv->wm.pri_latency)); 2504 2505 intel_fixup_spr_wm_latency(dev_priv, dev_priv->wm.spr_latency); 2506 intel_fixup_cur_wm_latency(dev_priv, dev_priv->wm.cur_latency); 2507 2508 intel_print_wm_latency(dev_priv, "Primary", dev_priv->wm.pri_latency); 2509 intel_print_wm_latency(dev_priv, "Sprite", dev_priv->wm.spr_latency); 2510 intel_print_wm_latency(dev_priv, "Cursor", dev_priv->wm.cur_latency); 2511 2512 if (IS_GEN6(dev_priv)) 2513 snb_wm_latency_quirk(dev_priv); 2514 } 2515 2516 static void skl_setup_wm_latency(struct drm_i915_private *dev_priv) 2517 { 2518 intel_read_wm_latency(dev_priv, dev_priv->wm.skl_latency); 2519 intel_print_wm_latency(dev_priv, "Gen9 Plane", dev_priv->wm.skl_latency); 2520 } 2521 2522 static bool ilk_validate_pipe_wm(struct drm_device *dev, 2523 struct intel_pipe_wm *pipe_wm) 2524 { 2525 /* LP0 watermark maximums depend on this pipe alone */ 2526 const struct intel_wm_config config = { 2527 .num_pipes_active = 1, 2528 .sprites_enabled = pipe_wm->sprites_enabled, 2529 .sprites_scaled = pipe_wm->sprites_scaled, 2530 }; 2531 struct ilk_wm_maximums max; 2532 2533 /* LP0 watermarks always use 1/2 DDB partitioning */ 2534 ilk_compute_wm_maximums(dev, 0, &config, INTEL_DDB_PART_1_2, &max); 2535 2536 /* At least LP0 must be valid */ 2537 if (!ilk_validate_wm_level(0, &max, &pipe_wm->wm[0])) { 2538 DRM_DEBUG_KMS("LP0 watermark invalid\n"); 2539 return false; 2540 } 2541 2542 return true; 2543 } 2544 2545 /* Compute new watermarks for the pipe */ 2546 static int ilk_compute_pipe_wm(struct intel_crtc_state *cstate) 2547 { 2548 struct drm_atomic_state *state = cstate->base.state; 2549 struct intel_crtc *intel_crtc = to_intel_crtc(cstate->base.crtc); 2550 struct intel_pipe_wm *pipe_wm; 2551 struct drm_device *dev = state->dev; 2552 const struct drm_i915_private *dev_priv = to_i915(dev); 2553 struct intel_plane *intel_plane; 2554 struct intel_plane_state *pristate = NULL; 2555 struct intel_plane_state *sprstate = NULL; 2556 struct intel_plane_state *curstate = NULL; 2557 int level, max_level = ilk_wm_max_level(dev_priv), usable_level; 2558 struct ilk_wm_maximums max; 2559 2560 pipe_wm = &cstate->wm.ilk.optimal; 2561 2562 for_each_intel_plane_on_crtc(dev, intel_crtc, intel_plane) { 2563 struct intel_plane_state *ps; 2564 2565 ps = intel_atomic_get_existing_plane_state(state, 2566 intel_plane); 2567 if (!ps) 2568 continue; 2569 2570 if (intel_plane->base.type == DRM_PLANE_TYPE_PRIMARY) 2571 pristate = ps; 2572 else if (intel_plane->base.type == DRM_PLANE_TYPE_OVERLAY) 2573 sprstate = ps; 2574 else if (intel_plane->base.type == DRM_PLANE_TYPE_CURSOR) 2575 curstate = ps; 2576 } 2577 2578 pipe_wm->pipe_enabled = cstate->base.active; 2579 if (sprstate) { 2580 pipe_wm->sprites_enabled = sprstate->base.visible; 2581 pipe_wm->sprites_scaled = sprstate->base.visible && 2582 (drm_rect_width(&sprstate->base.dst) != drm_rect_width(&sprstate->base.src) >> 16 || 2583 drm_rect_height(&sprstate->base.dst) != drm_rect_height(&sprstate->base.src) >> 16); 2584 } 2585 2586 usable_level = max_level; 2587 2588 /* ILK/SNB: LP2+ watermarks only w/o sprites */ 2589 if (INTEL_GEN(dev_priv) <= 6 && pipe_wm->sprites_enabled) 2590 usable_level = 1; 2591 2592 /* ILK/SNB/IVB: LP1+ watermarks only w/o scaling */ 2593 if (pipe_wm->sprites_scaled) 2594 usable_level = 0; 2595 2596 ilk_compute_wm_level(dev_priv, intel_crtc, 0, cstate, 2597 pristate, sprstate, curstate, &pipe_wm->raw_wm[0]); 2598 2599 memset(&pipe_wm->wm, 0, sizeof(pipe_wm->wm)); 2600 pipe_wm->wm[0] = pipe_wm->raw_wm[0]; 2601 2602 if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) 2603 pipe_wm->linetime = hsw_compute_linetime_wm(cstate); 2604 2605 if (!ilk_validate_pipe_wm(dev, pipe_wm)) 2606 return -EINVAL; 2607 2608 ilk_compute_wm_reg_maximums(dev_priv, 1, &max); 2609 2610 for (level = 1; level <= max_level; level++) { 2611 struct intel_wm_level *wm = &pipe_wm->raw_wm[level]; 2612 2613 ilk_compute_wm_level(dev_priv, intel_crtc, level, cstate, 2614 pristate, sprstate, curstate, wm); 2615 2616 /* 2617 * Disable any watermark level that exceeds the 2618 * register maximums since such watermarks are 2619 * always invalid. 2620 */ 2621 if (level > usable_level) 2622 continue; 2623 2624 if (ilk_validate_wm_level(level, &max, wm)) 2625 pipe_wm->wm[level] = *wm; 2626 else 2627 usable_level = level; 2628 } 2629 2630 return 0; 2631 } 2632 2633 /* 2634 * Build a set of 'intermediate' watermark values that satisfy both the old 2635 * state and the new state. These can be programmed to the hardware 2636 * immediately. 2637 */ 2638 static int ilk_compute_intermediate_wm(struct drm_device *dev, 2639 struct intel_crtc *intel_crtc, 2640 struct intel_crtc_state *newstate) 2641 { 2642 struct intel_pipe_wm *a = &newstate->wm.ilk.intermediate; 2643 struct intel_pipe_wm *b = &intel_crtc->wm.active.ilk; 2644 int level, max_level = ilk_wm_max_level(to_i915(dev)); 2645 2646 /* 2647 * Start with the final, target watermarks, then combine with the 2648 * currently active watermarks to get values that are safe both before 2649 * and after the vblank. 2650 */ 2651 *a = newstate->wm.ilk.optimal; 2652 a->pipe_enabled |= b->pipe_enabled; 2653 a->sprites_enabled |= b->sprites_enabled; 2654 a->sprites_scaled |= b->sprites_scaled; 2655 2656 for (level = 0; level <= max_level; level++) { 2657 struct intel_wm_level *a_wm = &a->wm[level]; 2658 const struct intel_wm_level *b_wm = &b->wm[level]; 2659 2660 a_wm->enable &= b_wm->enable; 2661 a_wm->pri_val = max(a_wm->pri_val, b_wm->pri_val); 2662 a_wm->spr_val = max(a_wm->spr_val, b_wm->spr_val); 2663 a_wm->cur_val = max(a_wm->cur_val, b_wm->cur_val); 2664 a_wm->fbc_val = max(a_wm->fbc_val, b_wm->fbc_val); 2665 } 2666 2667 /* 2668 * We need to make sure that these merged watermark values are 2669 * actually a valid configuration themselves. If they're not, 2670 * there's no safe way to transition from the old state to 2671 * the new state, so we need to fail the atomic transaction. 2672 */ 2673 if (!ilk_validate_pipe_wm(dev, a)) 2674 return -EINVAL; 2675 2676 /* 2677 * If our intermediate WM are identical to the final WM, then we can 2678 * omit the post-vblank programming; only update if it's different. 2679 */ 2680 if (memcmp(a, &newstate->wm.ilk.optimal, sizeof(*a)) != 0) 2681 newstate->wm.need_postvbl_update = true; 2682 2683 return 0; 2684 } 2685 2686 /* 2687 * Merge the watermarks from all active pipes for a specific level. 2688 */ 2689 static void ilk_merge_wm_level(struct drm_device *dev, 2690 int level, 2691 struct intel_wm_level *ret_wm) 2692 { 2693 const struct intel_crtc *intel_crtc; 2694 2695 ret_wm->enable = true; 2696 2697 for_each_intel_crtc(dev, intel_crtc) { 2698 const struct intel_pipe_wm *active = &intel_crtc->wm.active.ilk; 2699 const struct intel_wm_level *wm = &active->wm[level]; 2700 2701 if (!active->pipe_enabled) 2702 continue; 2703 2704 /* 2705 * The watermark values may have been used in the past, 2706 * so we must maintain them in the registers for some 2707 * time even if the level is now disabled. 2708 */ 2709 if (!wm->enable) 2710 ret_wm->enable = false; 2711 2712 ret_wm->pri_val = max(ret_wm->pri_val, wm->pri_val); 2713 ret_wm->spr_val = max(ret_wm->spr_val, wm->spr_val); 2714 ret_wm->cur_val = max(ret_wm->cur_val, wm->cur_val); 2715 ret_wm->fbc_val = max(ret_wm->fbc_val, wm->fbc_val); 2716 } 2717 } 2718 2719 /* 2720 * Merge all low power watermarks for all active pipes. 2721 */ 2722 static void ilk_wm_merge(struct drm_device *dev, 2723 const struct intel_wm_config *config, 2724 const struct ilk_wm_maximums *max, 2725 struct intel_pipe_wm *merged) 2726 { 2727 struct drm_i915_private *dev_priv = to_i915(dev); 2728 int level, max_level = ilk_wm_max_level(dev_priv); 2729 int last_enabled_level = max_level; 2730 2731 /* ILK/SNB/IVB: LP1+ watermarks only w/ single pipe */ 2732 if ((INTEL_GEN(dev_priv) <= 6 || IS_IVYBRIDGE(dev_priv)) && 2733 config->num_pipes_active > 1) 2734 last_enabled_level = 0; 2735 2736 /* ILK: FBC WM must be disabled always */ 2737 merged->fbc_wm_enabled = INTEL_GEN(dev_priv) >= 6; 2738 2739 /* merge each WM1+ level */ 2740 for (level = 1; level <= max_level; level++) { 2741 struct intel_wm_level *wm = &merged->wm[level]; 2742 2743 ilk_merge_wm_level(dev, level, wm); 2744 2745 if (level > last_enabled_level) 2746 wm->enable = false; 2747 else if (!ilk_validate_wm_level(level, max, wm)) 2748 /* make sure all following levels get disabled */ 2749 last_enabled_level = level - 1; 2750 2751 /* 2752 * The spec says it is preferred to disable 2753 * FBC WMs instead of disabling a WM level. 2754 */ 2755 if (wm->fbc_val > max->fbc) { 2756 if (wm->enable) 2757 merged->fbc_wm_enabled = false; 2758 wm->fbc_val = 0; 2759 } 2760 } 2761 2762 /* ILK: LP2+ must be disabled when FBC WM is disabled but FBC enabled */ 2763 /* 2764 * FIXME this is racy. FBC might get enabled later. 2765 * What we should check here is whether FBC can be 2766 * enabled sometime later. 2767 */ 2768 if (IS_GEN5(dev_priv) && !merged->fbc_wm_enabled && 2769 intel_fbc_is_active(dev_priv)) { 2770 for (level = 2; level <= max_level; level++) { 2771 struct intel_wm_level *wm = &merged->wm[level]; 2772 2773 wm->enable = false; 2774 } 2775 } 2776 } 2777 2778 static int ilk_wm_lp_to_level(int wm_lp, const struct intel_pipe_wm *pipe_wm) 2779 { 2780 /* LP1,LP2,LP3 levels are either 1,2,3 or 1,3,4 */ 2781 return wm_lp + (wm_lp >= 2 && pipe_wm->wm[4].enable); 2782 } 2783 2784 /* The value we need to program into the WM_LPx latency field */ 2785 static unsigned int ilk_wm_lp_latency(struct drm_device *dev, int level) 2786 { 2787 struct drm_i915_private *dev_priv = to_i915(dev); 2788 2789 if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) 2790 return 2 * level; 2791 else 2792 return dev_priv->wm.pri_latency[level]; 2793 } 2794 2795 static void ilk_compute_wm_results(struct drm_device *dev, 2796 const struct intel_pipe_wm *merged, 2797 enum intel_ddb_partitioning partitioning, 2798 struct ilk_wm_values *results) 2799 { 2800 struct drm_i915_private *dev_priv = to_i915(dev); 2801 struct intel_crtc *intel_crtc; 2802 int level, wm_lp; 2803 2804 results->enable_fbc_wm = merged->fbc_wm_enabled; 2805 results->partitioning = partitioning; 2806 2807 /* LP1+ register values */ 2808 for (wm_lp = 1; wm_lp <= 3; wm_lp++) { 2809 const struct intel_wm_level *r; 2810 2811 level = ilk_wm_lp_to_level(wm_lp, merged); 2812 2813 r = &merged->wm[level]; 2814 2815 /* 2816 * Maintain the watermark values even if the level is 2817 * disabled. Doing otherwise could cause underruns. 2818 */ 2819 results->wm_lp[wm_lp - 1] = 2820 (ilk_wm_lp_latency(dev, level) << WM1_LP_LATENCY_SHIFT) | 2821 (r->pri_val << WM1_LP_SR_SHIFT) | 2822 r->cur_val; 2823 2824 if (r->enable) 2825 results->wm_lp[wm_lp - 1] |= WM1_LP_SR_EN; 2826 2827 if (INTEL_GEN(dev_priv) >= 8) 2828 results->wm_lp[wm_lp - 1] |= 2829 r->fbc_val << WM1_LP_FBC_SHIFT_BDW; 2830 else 2831 results->wm_lp[wm_lp - 1] |= 2832 r->fbc_val << WM1_LP_FBC_SHIFT; 2833 2834 /* 2835 * Always set WM1S_LP_EN when spr_val != 0, even if the 2836 * level is disabled. Doing otherwise could cause underruns. 2837 */ 2838 if (INTEL_GEN(dev_priv) <= 6 && r->spr_val) { 2839 WARN_ON(wm_lp != 1); 2840 results->wm_lp_spr[wm_lp - 1] = WM1S_LP_EN | r->spr_val; 2841 } else 2842 results->wm_lp_spr[wm_lp - 1] = r->spr_val; 2843 } 2844 2845 /* LP0 register values */ 2846 for_each_intel_crtc(dev, intel_crtc) { 2847 enum i915_pipe pipe = intel_crtc->pipe; 2848 const struct intel_wm_level *r = 2849 &intel_crtc->wm.active.ilk.wm[0]; 2850 2851 if (WARN_ON(!r->enable)) 2852 continue; 2853 2854 results->wm_linetime[pipe] = intel_crtc->wm.active.ilk.linetime; 2855 2856 results->wm_pipe[pipe] = 2857 (r->pri_val << WM0_PIPE_PLANE_SHIFT) | 2858 (r->spr_val << WM0_PIPE_SPRITE_SHIFT) | 2859 r->cur_val; 2860 } 2861 } 2862 2863 /* Find the result with the highest level enabled. Check for enable_fbc_wm in 2864 * case both are at the same level. Prefer r1 in case they're the same. */ 2865 static struct intel_pipe_wm *ilk_find_best_result(struct drm_device *dev, 2866 struct intel_pipe_wm *r1, 2867 struct intel_pipe_wm *r2) 2868 { 2869 int level, max_level = ilk_wm_max_level(to_i915(dev)); 2870 int level1 = 0, level2 = 0; 2871 2872 for (level = 1; level <= max_level; level++) { 2873 if (r1->wm[level].enable) 2874 level1 = level; 2875 if (r2->wm[level].enable) 2876 level2 = level; 2877 } 2878 2879 if (level1 == level2) { 2880 if (r2->fbc_wm_enabled && !r1->fbc_wm_enabled) 2881 return r2; 2882 else 2883 return r1; 2884 } else if (level1 > level2) { 2885 return r1; 2886 } else { 2887 return r2; 2888 } 2889 } 2890 2891 /* dirty bits used to track which watermarks need changes */ 2892 #define WM_DIRTY_PIPE(pipe) (1 << (pipe)) 2893 #define WM_DIRTY_LINETIME(pipe) (1 << (8 + (pipe))) 2894 #define WM_DIRTY_LP(wm_lp) (1 << (15 + (wm_lp))) 2895 #define WM_DIRTY_LP_ALL (WM_DIRTY_LP(1) | WM_DIRTY_LP(2) | WM_DIRTY_LP(3)) 2896 #define WM_DIRTY_FBC (1 << 24) 2897 #define WM_DIRTY_DDB (1 << 25) 2898 2899 static unsigned int ilk_compute_wm_dirty(struct drm_i915_private *dev_priv, 2900 const struct ilk_wm_values *old, 2901 const struct ilk_wm_values *new) 2902 { 2903 unsigned int dirty = 0; 2904 enum i915_pipe pipe; 2905 int wm_lp; 2906 2907 for_each_pipe(dev_priv, pipe) { 2908 if (old->wm_linetime[pipe] != new->wm_linetime[pipe]) { 2909 dirty |= WM_DIRTY_LINETIME(pipe); 2910 /* Must disable LP1+ watermarks too */ 2911 dirty |= WM_DIRTY_LP_ALL; 2912 } 2913 2914 if (old->wm_pipe[pipe] != new->wm_pipe[pipe]) { 2915 dirty |= WM_DIRTY_PIPE(pipe); 2916 /* Must disable LP1+ watermarks too */ 2917 dirty |= WM_DIRTY_LP_ALL; 2918 } 2919 } 2920 2921 if (old->enable_fbc_wm != new->enable_fbc_wm) { 2922 dirty |= WM_DIRTY_FBC; 2923 /* Must disable LP1+ watermarks too */ 2924 dirty |= WM_DIRTY_LP_ALL; 2925 } 2926 2927 if (old->partitioning != new->partitioning) { 2928 dirty |= WM_DIRTY_DDB; 2929 /* Must disable LP1+ watermarks too */ 2930 dirty |= WM_DIRTY_LP_ALL; 2931 } 2932 2933 /* LP1+ watermarks already deemed dirty, no need to continue */ 2934 if (dirty & WM_DIRTY_LP_ALL) 2935 return dirty; 2936 2937 /* Find the lowest numbered LP1+ watermark in need of an update... */ 2938 for (wm_lp = 1; wm_lp <= 3; wm_lp++) { 2939 if (old->wm_lp[wm_lp - 1] != new->wm_lp[wm_lp - 1] || 2940 old->wm_lp_spr[wm_lp - 1] != new->wm_lp_spr[wm_lp - 1]) 2941 break; 2942 } 2943 2944 /* ...and mark it and all higher numbered LP1+ watermarks as dirty */ 2945 for (; wm_lp <= 3; wm_lp++) 2946 dirty |= WM_DIRTY_LP(wm_lp); 2947 2948 return dirty; 2949 } 2950 2951 static bool _ilk_disable_lp_wm(struct drm_i915_private *dev_priv, 2952 unsigned int dirty) 2953 { 2954 struct ilk_wm_values *previous = &dev_priv->wm.hw; 2955 bool changed = false; 2956 2957 if (dirty & WM_DIRTY_LP(3) && previous->wm_lp[2] & WM1_LP_SR_EN) { 2958 previous->wm_lp[2] &= ~WM1_LP_SR_EN; 2959 I915_WRITE(WM3_LP_ILK, previous->wm_lp[2]); 2960 changed = true; 2961 } 2962 if (dirty & WM_DIRTY_LP(2) && previous->wm_lp[1] & WM1_LP_SR_EN) { 2963 previous->wm_lp[1] &= ~WM1_LP_SR_EN; 2964 I915_WRITE(WM2_LP_ILK, previous->wm_lp[1]); 2965 changed = true; 2966 } 2967 if (dirty & WM_DIRTY_LP(1) && previous->wm_lp[0] & WM1_LP_SR_EN) { 2968 previous->wm_lp[0] &= ~WM1_LP_SR_EN; 2969 I915_WRITE(WM1_LP_ILK, previous->wm_lp[0]); 2970 changed = true; 2971 } 2972 2973 /* 2974 * Don't touch WM1S_LP_EN here. 2975 * Doing so could cause underruns. 2976 */ 2977 2978 return changed; 2979 } 2980 2981 /* 2982 * The spec says we shouldn't write when we don't need, because every write 2983 * causes WMs to be re-evaluated, expending some power. 2984 */ 2985 static void ilk_write_wm_values(struct drm_i915_private *dev_priv, 2986 struct ilk_wm_values *results) 2987 { 2988 struct ilk_wm_values *previous = &dev_priv->wm.hw; 2989 unsigned int dirty; 2990 uint32_t val; 2991 2992 dirty = ilk_compute_wm_dirty(dev_priv, previous, results); 2993 if (!dirty) 2994 return; 2995 2996 _ilk_disable_lp_wm(dev_priv, dirty); 2997 2998 if (dirty & WM_DIRTY_PIPE(PIPE_A)) 2999 I915_WRITE(WM0_PIPEA_ILK, results->wm_pipe[0]); 3000 if (dirty & WM_DIRTY_PIPE(PIPE_B)) 3001 I915_WRITE(WM0_PIPEB_ILK, results->wm_pipe[1]); 3002 if (dirty & WM_DIRTY_PIPE(PIPE_C)) 3003 I915_WRITE(WM0_PIPEC_IVB, results->wm_pipe[2]); 3004 3005 if (dirty & WM_DIRTY_LINETIME(PIPE_A)) 3006 I915_WRITE(PIPE_WM_LINETIME(PIPE_A), results->wm_linetime[0]); 3007 if (dirty & WM_DIRTY_LINETIME(PIPE_B)) 3008 I915_WRITE(PIPE_WM_LINETIME(PIPE_B), results->wm_linetime[1]); 3009 if (dirty & WM_DIRTY_LINETIME(PIPE_C)) 3010 I915_WRITE(PIPE_WM_LINETIME(PIPE_C), results->wm_linetime[2]); 3011 3012 if (dirty & WM_DIRTY_DDB) { 3013 if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) { 3014 val = I915_READ(WM_MISC); 3015 if (results->partitioning == INTEL_DDB_PART_1_2) 3016 val &= ~WM_MISC_DATA_PARTITION_5_6; 3017 else 3018 val |= WM_MISC_DATA_PARTITION_5_6; 3019 I915_WRITE(WM_MISC, val); 3020 } else { 3021 val = I915_READ(DISP_ARB_CTL2); 3022 if (results->partitioning == INTEL_DDB_PART_1_2) 3023 val &= ~DISP_DATA_PARTITION_5_6; 3024 else 3025 val |= DISP_DATA_PARTITION_5_6; 3026 I915_WRITE(DISP_ARB_CTL2, val); 3027 } 3028 } 3029 3030 if (dirty & WM_DIRTY_FBC) { 3031 val = I915_READ(DISP_ARB_CTL); 3032 if (results->enable_fbc_wm) 3033 val &= ~DISP_FBC_WM_DIS; 3034 else 3035 val |= DISP_FBC_WM_DIS; 3036 I915_WRITE(DISP_ARB_CTL, val); 3037 } 3038 3039 if (dirty & WM_DIRTY_LP(1) && 3040 previous->wm_lp_spr[0] != results->wm_lp_spr[0]) 3041 I915_WRITE(WM1S_LP_ILK, results->wm_lp_spr[0]); 3042 3043 if (INTEL_GEN(dev_priv) >= 7) { 3044 if (dirty & WM_DIRTY_LP(2) && previous->wm_lp_spr[1] != results->wm_lp_spr[1]) 3045 I915_WRITE(WM2S_LP_IVB, results->wm_lp_spr[1]); 3046 if (dirty & WM_DIRTY_LP(3) && previous->wm_lp_spr[2] != results->wm_lp_spr[2]) 3047 I915_WRITE(WM3S_LP_IVB, results->wm_lp_spr[2]); 3048 } 3049 3050 if (dirty & WM_DIRTY_LP(1) && previous->wm_lp[0] != results->wm_lp[0]) 3051 I915_WRITE(WM1_LP_ILK, results->wm_lp[0]); 3052 if (dirty & WM_DIRTY_LP(2) && previous->wm_lp[1] != results->wm_lp[1]) 3053 I915_WRITE(WM2_LP_ILK, results->wm_lp[1]); 3054 if (dirty & WM_DIRTY_LP(3) && previous->wm_lp[2] != results->wm_lp[2]) 3055 I915_WRITE(WM3_LP_ILK, results->wm_lp[2]); 3056 3057 dev_priv->wm.hw = *results; 3058 } 3059 3060 bool ilk_disable_lp_wm(struct drm_device *dev) 3061 { 3062 struct drm_i915_private *dev_priv = to_i915(dev); 3063 3064 return _ilk_disable_lp_wm(dev_priv, WM_DIRTY_LP_ALL); 3065 } 3066 3067 #define SKL_SAGV_BLOCK_TIME 30 /* µs */ 3068 3069 /* 3070 * FIXME: We still don't have the proper code detect if we need to apply the WA, 3071 * so assume we'll always need it in order to avoid underruns. 3072 */ 3073 static bool skl_needs_memory_bw_wa(struct intel_atomic_state *state) 3074 { 3075 struct drm_i915_private *dev_priv = to_i915(state->base.dev); 3076 3077 if (IS_GEN9_BC(dev_priv) || IS_BROXTON(dev_priv)) 3078 return true; 3079 3080 return false; 3081 } 3082 3083 static bool 3084 intel_has_sagv(struct drm_i915_private *dev_priv) 3085 { 3086 if (IS_KABYLAKE(dev_priv)) 3087 return true; 3088 3089 if (IS_SKYLAKE(dev_priv) && 3090 dev_priv->sagv_status != I915_SAGV_NOT_CONTROLLED) 3091 return true; 3092 3093 return false; 3094 } 3095 3096 /* 3097 * SAGV dynamically adjusts the system agent voltage and clock frequencies 3098 * depending on power and performance requirements. The display engine access 3099 * to system memory is blocked during the adjustment time. Because of the 3100 * blocking time, having this enabled can cause full system hangs and/or pipe 3101 * underruns if we don't meet all of the following requirements: 3102 * 3103 * - <= 1 pipe enabled 3104 * - All planes can enable watermarks for latencies >= SAGV engine block time 3105 * - We're not using an interlaced display configuration 3106 */ 3107 int 3108 intel_enable_sagv(struct drm_i915_private *dev_priv) 3109 { 3110 int ret; 3111 3112 if (!intel_has_sagv(dev_priv)) 3113 return 0; 3114 3115 if (dev_priv->sagv_status == I915_SAGV_ENABLED) 3116 return 0; 3117 3118 DRM_DEBUG_KMS("Enabling the SAGV\n"); 3119 mutex_lock(&dev_priv->rps.hw_lock); 3120 3121 ret = sandybridge_pcode_write(dev_priv, GEN9_PCODE_SAGV_CONTROL, 3122 GEN9_SAGV_ENABLE); 3123 3124 /* We don't need to wait for the SAGV when enabling */ 3125 mutex_unlock(&dev_priv->rps.hw_lock); 3126 3127 /* 3128 * Some skl systems, pre-release machines in particular, 3129 * don't actually have an SAGV. 3130 */ 3131 if (IS_SKYLAKE(dev_priv) && ret == -ENXIO) { 3132 DRM_DEBUG_DRIVER("No SAGV found on system, ignoring\n"); 3133 dev_priv->sagv_status = I915_SAGV_NOT_CONTROLLED; 3134 return 0; 3135 } else if (ret < 0) { 3136 DRM_ERROR("Failed to enable the SAGV\n"); 3137 return ret; 3138 } 3139 3140 dev_priv->sagv_status = I915_SAGV_ENABLED; 3141 return 0; 3142 } 3143 3144 int 3145 intel_disable_sagv(struct drm_i915_private *dev_priv) 3146 { 3147 int ret; 3148 3149 if (!intel_has_sagv(dev_priv)) 3150 return 0; 3151 3152 if (dev_priv->sagv_status == I915_SAGV_DISABLED) 3153 return 0; 3154 3155 DRM_DEBUG_KMS("Disabling the SAGV\n"); 3156 mutex_lock(&dev_priv->rps.hw_lock); 3157 3158 /* bspec says to keep retrying for at least 1 ms */ 3159 ret = skl_pcode_request(dev_priv, GEN9_PCODE_SAGV_CONTROL, 3160 GEN9_SAGV_DISABLE, 3161 GEN9_SAGV_IS_DISABLED, GEN9_SAGV_IS_DISABLED, 3162 1); 3163 mutex_unlock(&dev_priv->rps.hw_lock); 3164 3165 /* 3166 * Some skl systems, pre-release machines in particular, 3167 * don't actually have an SAGV. 3168 */ 3169 if (IS_SKYLAKE(dev_priv) && ret == -ENXIO) { 3170 DRM_DEBUG_DRIVER("No SAGV found on system, ignoring\n"); 3171 dev_priv->sagv_status = I915_SAGV_NOT_CONTROLLED; 3172 return 0; 3173 } else if (ret < 0) { 3174 DRM_ERROR("Failed to disable the SAGV (%d)\n", ret); 3175 return ret; 3176 } 3177 3178 dev_priv->sagv_status = I915_SAGV_DISABLED; 3179 return 0; 3180 } 3181 3182 bool intel_can_enable_sagv(struct drm_atomic_state *state) 3183 { 3184 struct drm_device *dev = state->dev; 3185 struct drm_i915_private *dev_priv = to_i915(dev); 3186 struct intel_atomic_state *intel_state = to_intel_atomic_state(state); 3187 struct intel_crtc *crtc; 3188 struct intel_plane *plane; 3189 struct intel_crtc_state *cstate; 3190 enum i915_pipe pipe; 3191 int level, latency; 3192 3193 if (!intel_has_sagv(dev_priv)) 3194 return false; 3195 3196 /* 3197 * SKL workaround: bspec recommends we disable the SAGV when we have 3198 * more then one pipe enabled 3199 * 3200 * If there are no active CRTCs, no additional checks need be performed 3201 */ 3202 if (hweight32(intel_state->active_crtcs) == 0) 3203 return true; 3204 else if (hweight32(intel_state->active_crtcs) > 1) 3205 return false; 3206 3207 /* Since we're now guaranteed to only have one active CRTC... */ 3208 pipe = ffs(intel_state->active_crtcs) - 1; 3209 crtc = intel_get_crtc_for_pipe(dev_priv, pipe); 3210 cstate = to_intel_crtc_state(crtc->base.state); 3211 3212 if (crtc->base.state->adjusted_mode.flags & DRM_MODE_FLAG_INTERLACE) 3213 return false; 3214 3215 for_each_intel_plane_on_crtc(dev, crtc, plane) { 3216 struct skl_plane_wm *wm = 3217 &cstate->wm.skl.optimal.planes[plane->id]; 3218 3219 /* Skip this plane if it's not enabled */ 3220 if (!wm->wm[0].plane_en) 3221 continue; 3222 3223 /* Find the highest enabled wm level for this plane */ 3224 for (level = ilk_wm_max_level(dev_priv); 3225 !wm->wm[level].plane_en; --level) 3226 { } 3227 3228 latency = dev_priv->wm.skl_latency[level]; 3229 3230 if (skl_needs_memory_bw_wa(intel_state) && 3231 plane->base.state->fb->modifier == 3232 I915_FORMAT_MOD_X_TILED) 3233 latency += 15; 3234 3235 /* 3236 * If any of the planes on this pipe don't enable wm levels 3237 * that incur memory latencies higher then 30µs we can't enable 3238 * the SAGV 3239 */ 3240 if (latency < SKL_SAGV_BLOCK_TIME) 3241 return false; 3242 } 3243 3244 return true; 3245 } 3246 3247 static void 3248 skl_ddb_get_pipe_allocation_limits(struct drm_device *dev, 3249 const struct intel_crtc_state *cstate, 3250 struct skl_ddb_entry *alloc, /* out */ 3251 int *num_active /* out */) 3252 { 3253 struct drm_atomic_state *state = cstate->base.state; 3254 struct intel_atomic_state *intel_state = to_intel_atomic_state(state); 3255 struct drm_i915_private *dev_priv = to_i915(dev); 3256 struct drm_crtc *for_crtc = cstate->base.crtc; 3257 unsigned int pipe_size, ddb_size; 3258 int nth_active_pipe; 3259 3260 if (WARN_ON(!state) || !cstate->base.active) { 3261 alloc->start = 0; 3262 alloc->end = 0; 3263 *num_active = hweight32(dev_priv->active_crtcs); 3264 return; 3265 } 3266 3267 if (intel_state->active_pipe_changes) 3268 *num_active = hweight32(intel_state->active_crtcs); 3269 else 3270 *num_active = hweight32(dev_priv->active_crtcs); 3271 3272 ddb_size = INTEL_INFO(dev_priv)->ddb_size; 3273 WARN_ON(ddb_size == 0); 3274 3275 ddb_size -= 4; /* 4 blocks for bypass path allocation */ 3276 3277 /* 3278 * If the state doesn't change the active CRTC's, then there's 3279 * no need to recalculate; the existing pipe allocation limits 3280 * should remain unchanged. Note that we're safe from racing 3281 * commits since any racing commit that changes the active CRTC 3282 * list would need to grab _all_ crtc locks, including the one 3283 * we currently hold. 3284 */ 3285 if (!intel_state->active_pipe_changes) { 3286 /* 3287 * alloc may be cleared by clear_intel_crtc_state, 3288 * copy from old state to be sure 3289 */ 3290 *alloc = to_intel_crtc_state(for_crtc->state)->wm.skl.ddb; 3291 return; 3292 } 3293 3294 nth_active_pipe = hweight32(intel_state->active_crtcs & 3295 (drm_crtc_mask(for_crtc) - 1)); 3296 pipe_size = ddb_size / hweight32(intel_state->active_crtcs); 3297 alloc->start = nth_active_pipe * ddb_size / *num_active; 3298 alloc->end = alloc->start + pipe_size; 3299 } 3300 3301 static unsigned int skl_cursor_allocation(int num_active) 3302 { 3303 if (num_active == 1) 3304 return 32; 3305 3306 return 8; 3307 } 3308 3309 static void skl_ddb_entry_init_from_hw(struct skl_ddb_entry *entry, u32 reg) 3310 { 3311 entry->start = reg & 0x3ff; 3312 entry->end = (reg >> 16) & 0x3ff; 3313 if (entry->end) 3314 entry->end += 1; 3315 } 3316 3317 void skl_ddb_get_hw_state(struct drm_i915_private *dev_priv, 3318 struct skl_ddb_allocation *ddb /* out */) 3319 { 3320 struct intel_crtc *crtc; 3321 3322 memset(ddb, 0, sizeof(*ddb)); 3323 3324 for_each_intel_crtc(&dev_priv->drm, crtc) { 3325 enum intel_display_power_domain power_domain; 3326 enum plane_id plane_id; 3327 enum i915_pipe pipe = crtc->pipe; 3328 3329 power_domain = POWER_DOMAIN_PIPE(pipe); 3330 if (!intel_display_power_get_if_enabled(dev_priv, power_domain)) 3331 continue; 3332 3333 for_each_plane_id_on_crtc(crtc, plane_id) { 3334 u32 val; 3335 3336 if (plane_id != PLANE_CURSOR) 3337 val = I915_READ(PLANE_BUF_CFG(pipe, plane_id)); 3338 else 3339 val = I915_READ(CUR_BUF_CFG(pipe)); 3340 3341 skl_ddb_entry_init_from_hw(&ddb->plane[pipe][plane_id], val); 3342 } 3343 3344 intel_display_power_put(dev_priv, power_domain); 3345 } 3346 } 3347 3348 /* 3349 * Determines the downscale amount of a plane for the purposes of watermark calculations. 3350 * The bspec defines downscale amount as: 3351 * 3352 * """ 3353 * Horizontal down scale amount = maximum[1, Horizontal source size / 3354 * Horizontal destination size] 3355 * Vertical down scale amount = maximum[1, Vertical source size / 3356 * Vertical destination size] 3357 * Total down scale amount = Horizontal down scale amount * 3358 * Vertical down scale amount 3359 * """ 3360 * 3361 * Return value is provided in 16.16 fixed point form to retain fractional part. 3362 * Caller should take care of dividing & rounding off the value. 3363 */ 3364 static uint32_t 3365 skl_plane_downscale_amount(const struct intel_crtc_state *cstate, 3366 const struct intel_plane_state *pstate) 3367 { 3368 struct intel_plane *plane = to_intel_plane(pstate->base.plane); 3369 uint32_t downscale_h, downscale_w; 3370 uint32_t src_w, src_h, dst_w, dst_h; 3371 3372 if (WARN_ON(!intel_wm_plane_visible(cstate, pstate))) 3373 return DRM_PLANE_HELPER_NO_SCALING; 3374 3375 /* n.b., src is 16.16 fixed point, dst is whole integer */ 3376 if (plane->id == PLANE_CURSOR) { 3377 /* 3378 * Cursors only support 0/180 degree rotation, 3379 * hence no need to account for rotation here. 3380 */ 3381 src_w = pstate->base.src_w; 3382 src_h = pstate->base.src_h; 3383 dst_w = pstate->base.crtc_w; 3384 dst_h = pstate->base.crtc_h; 3385 } else { 3386 /* 3387 * Src coordinates are already rotated by 270 degrees for 3388 * the 90/270 degree plane rotation cases (to match the 3389 * GTT mapping), hence no need to account for rotation here. 3390 */ 3391 src_w = drm_rect_width(&pstate->base.src); 3392 src_h = drm_rect_height(&pstate->base.src); 3393 dst_w = drm_rect_width(&pstate->base.dst); 3394 dst_h = drm_rect_height(&pstate->base.dst); 3395 } 3396 3397 downscale_h = max(src_h / dst_h, (uint32_t)DRM_PLANE_HELPER_NO_SCALING); 3398 downscale_w = max(src_w / dst_w, (uint32_t)DRM_PLANE_HELPER_NO_SCALING); 3399 3400 /* Provide result in 16.16 fixed point */ 3401 return (uint64_t)downscale_w * downscale_h >> 16; 3402 } 3403 3404 static unsigned int 3405 skl_plane_relative_data_rate(const struct intel_crtc_state *cstate, 3406 const struct drm_plane_state *pstate, 3407 int y) 3408 { 3409 struct intel_plane *plane = to_intel_plane(pstate->plane); 3410 struct intel_plane_state *intel_pstate = to_intel_plane_state(pstate); 3411 uint32_t down_scale_amount, data_rate; 3412 uint32_t width = 0, height = 0; 3413 struct drm_framebuffer *fb; 3414 u32 format; 3415 3416 if (!intel_pstate->base.visible) 3417 return 0; 3418 3419 fb = pstate->fb; 3420 format = fb->format->format; 3421 3422 if (plane->id == PLANE_CURSOR) 3423 return 0; 3424 if (y && format != DRM_FORMAT_NV12) 3425 return 0; 3426 3427 /* 3428 * Src coordinates are already rotated by 270 degrees for 3429 * the 90/270 degree plane rotation cases (to match the 3430 * GTT mapping), hence no need to account for rotation here. 3431 */ 3432 width = drm_rect_width(&intel_pstate->base.src) >> 16; 3433 height = drm_rect_height(&intel_pstate->base.src) >> 16; 3434 3435 /* for planar format */ 3436 if (format == DRM_FORMAT_NV12) { 3437 if (y) /* y-plane data rate */ 3438 data_rate = width * height * 3439 fb->format->cpp[0]; 3440 else /* uv-plane data rate */ 3441 data_rate = (width / 2) * (height / 2) * 3442 fb->format->cpp[1]; 3443 } else { 3444 /* for packed formats */ 3445 data_rate = width * height * fb->format->cpp[0]; 3446 } 3447 3448 down_scale_amount = skl_plane_downscale_amount(cstate, intel_pstate); 3449 3450 return (uint64_t)data_rate * down_scale_amount >> 16; 3451 } 3452 3453 /* 3454 * We don't overflow 32 bits. Worst case is 3 planes enabled, each fetching 3455 * a 8192x4096@32bpp framebuffer: 3456 * 3 * 4096 * 8192 * 4 < 2^32 3457 */ 3458 static unsigned int 3459 skl_get_total_relative_data_rate(struct intel_crtc_state *intel_cstate, 3460 unsigned *plane_data_rate, 3461 unsigned *plane_y_data_rate) 3462 { 3463 struct drm_crtc_state *cstate = &intel_cstate->base; 3464 struct drm_atomic_state *state = cstate->state; 3465 struct drm_plane *plane; 3466 const struct drm_plane_state *pstate; 3467 unsigned int total_data_rate = 0; 3468 3469 if (WARN_ON(!state)) 3470 return 0; 3471 3472 /* Calculate and cache data rate for each plane */ 3473 drm_atomic_crtc_state_for_each_plane_state(plane, pstate, cstate) { 3474 enum plane_id plane_id = to_intel_plane(plane)->id; 3475 unsigned int rate; 3476 3477 /* packed/uv */ 3478 rate = skl_plane_relative_data_rate(intel_cstate, 3479 pstate, 0); 3480 plane_data_rate[plane_id] = rate; 3481 3482 total_data_rate += rate; 3483 3484 /* y-plane */ 3485 rate = skl_plane_relative_data_rate(intel_cstate, 3486 pstate, 1); 3487 plane_y_data_rate[plane_id] = rate; 3488 3489 total_data_rate += rate; 3490 } 3491 3492 return total_data_rate; 3493 } 3494 3495 static uint16_t 3496 skl_ddb_min_alloc(const struct drm_plane_state *pstate, 3497 const int y) 3498 { 3499 struct drm_framebuffer *fb = pstate->fb; 3500 struct intel_plane_state *intel_pstate = to_intel_plane_state(pstate); 3501 uint32_t src_w, src_h; 3502 uint32_t min_scanlines = 8; 3503 uint8_t plane_bpp; 3504 3505 if (WARN_ON(!fb)) 3506 return 0; 3507 3508 /* For packed formats, no y-plane, return 0 */ 3509 if (y && fb->format->format != DRM_FORMAT_NV12) 3510 return 0; 3511 3512 /* For Non Y-tile return 8-blocks */ 3513 if (fb->modifier != I915_FORMAT_MOD_Y_TILED && 3514 fb->modifier != I915_FORMAT_MOD_Yf_TILED) 3515 return 8; 3516 3517 /* 3518 * Src coordinates are already rotated by 270 degrees for 3519 * the 90/270 degree plane rotation cases (to match the 3520 * GTT mapping), hence no need to account for rotation here. 3521 */ 3522 src_w = drm_rect_width(&intel_pstate->base.src) >> 16; 3523 src_h = drm_rect_height(&intel_pstate->base.src) >> 16; 3524 3525 /* Halve UV plane width and height for NV12 */ 3526 if (fb->format->format == DRM_FORMAT_NV12 && !y) { 3527 src_w /= 2; 3528 src_h /= 2; 3529 } 3530 3531 if (fb->format->format == DRM_FORMAT_NV12 && !y) 3532 plane_bpp = fb->format->cpp[1]; 3533 else 3534 plane_bpp = fb->format->cpp[0]; 3535 3536 if (drm_rotation_90_or_270(pstate->rotation)) { 3537 switch (plane_bpp) { 3538 case 1: 3539 min_scanlines = 32; 3540 break; 3541 case 2: 3542 min_scanlines = 16; 3543 break; 3544 case 4: 3545 min_scanlines = 8; 3546 break; 3547 case 8: 3548 min_scanlines = 4; 3549 break; 3550 default: 3551 WARN(1, "Unsupported pixel depth %u for rotation", 3552 plane_bpp); 3553 min_scanlines = 32; 3554 } 3555 } 3556 3557 return DIV_ROUND_UP((4 * src_w * plane_bpp), 512) * min_scanlines/4 + 3; 3558 } 3559 3560 static void 3561 skl_ddb_calc_min(const struct intel_crtc_state *cstate, int num_active, 3562 uint16_t *minimum, uint16_t *y_minimum) 3563 { 3564 const struct drm_plane_state *pstate; 3565 struct drm_plane *plane; 3566 3567 drm_atomic_crtc_state_for_each_plane_state(plane, pstate, &cstate->base) { 3568 enum plane_id plane_id = to_intel_plane(plane)->id; 3569 3570 if (plane_id == PLANE_CURSOR) 3571 continue; 3572 3573 if (!pstate->visible) 3574 continue; 3575 3576 minimum[plane_id] = skl_ddb_min_alloc(pstate, 0); 3577 y_minimum[plane_id] = skl_ddb_min_alloc(pstate, 1); 3578 } 3579 3580 minimum[PLANE_CURSOR] = skl_cursor_allocation(num_active); 3581 } 3582 3583 static int 3584 skl_allocate_pipe_ddb(struct intel_crtc_state *cstate, 3585 struct skl_ddb_allocation *ddb /* out */) 3586 { 3587 struct drm_atomic_state *state = cstate->base.state; 3588 struct drm_crtc *crtc = cstate->base.crtc; 3589 struct drm_device *dev = crtc->dev; 3590 struct intel_crtc *intel_crtc = to_intel_crtc(crtc); 3591 enum i915_pipe pipe = intel_crtc->pipe; 3592 struct skl_ddb_entry *alloc = &cstate->wm.skl.ddb; 3593 uint16_t alloc_size, start; 3594 uint16_t minimum[I915_MAX_PLANES] = {}; 3595 uint16_t y_minimum[I915_MAX_PLANES] = {}; 3596 unsigned int total_data_rate; 3597 enum plane_id plane_id; 3598 int num_active; 3599 unsigned plane_data_rate[I915_MAX_PLANES] = {}; 3600 unsigned plane_y_data_rate[I915_MAX_PLANES] = {}; 3601 3602 /* Clear the partitioning for disabled planes. */ 3603 memset(ddb->plane[pipe], 0, sizeof(ddb->plane[pipe])); 3604 memset(ddb->y_plane[pipe], 0, sizeof(ddb->y_plane[pipe])); 3605 3606 if (WARN_ON(!state)) 3607 return 0; 3608 3609 if (!cstate->base.active) { 3610 alloc->start = alloc->end = 0; 3611 return 0; 3612 } 3613 3614 skl_ddb_get_pipe_allocation_limits(dev, cstate, alloc, &num_active); 3615 alloc_size = skl_ddb_entry_size(alloc); 3616 if (alloc_size == 0) { 3617 memset(ddb->plane[pipe], 0, sizeof(ddb->plane[pipe])); 3618 return 0; 3619 } 3620 3621 skl_ddb_calc_min(cstate, num_active, minimum, y_minimum); 3622 3623 /* 3624 * 1. Allocate the mininum required blocks for each active plane 3625 * and allocate the cursor, it doesn't require extra allocation 3626 * proportional to the data rate. 3627 */ 3628 3629 for_each_plane_id_on_crtc(intel_crtc, plane_id) { 3630 alloc_size -= minimum[plane_id]; 3631 alloc_size -= y_minimum[plane_id]; 3632 } 3633 3634 ddb->plane[pipe][PLANE_CURSOR].start = alloc->end - minimum[PLANE_CURSOR]; 3635 ddb->plane[pipe][PLANE_CURSOR].end = alloc->end; 3636 3637 /* 3638 * 2. Distribute the remaining space in proportion to the amount of 3639 * data each plane needs to fetch from memory. 3640 * 3641 * FIXME: we may not allocate every single block here. 3642 */ 3643 total_data_rate = skl_get_total_relative_data_rate(cstate, 3644 plane_data_rate, 3645 plane_y_data_rate); 3646 if (total_data_rate == 0) 3647 return 0; 3648 3649 start = alloc->start; 3650 for_each_plane_id_on_crtc(intel_crtc, plane_id) { 3651 unsigned int data_rate, y_data_rate; 3652 uint16_t plane_blocks, y_plane_blocks = 0; 3653 3654 if (plane_id == PLANE_CURSOR) 3655 continue; 3656 3657 data_rate = plane_data_rate[plane_id]; 3658 3659 /* 3660 * allocation for (packed formats) or (uv-plane part of planar format): 3661 * promote the expression to 64 bits to avoid overflowing, the 3662 * result is < available as data_rate / total_data_rate < 1 3663 */ 3664 plane_blocks = minimum[plane_id]; 3665 plane_blocks += div_u64((uint64_t)alloc_size * data_rate, 3666 total_data_rate); 3667 3668 /* Leave disabled planes at (0,0) */ 3669 if (data_rate) { 3670 ddb->plane[pipe][plane_id].start = start; 3671 ddb->plane[pipe][plane_id].end = start + plane_blocks; 3672 } 3673 3674 start += plane_blocks; 3675 3676 /* 3677 * allocation for y_plane part of planar format: 3678 */ 3679 y_data_rate = plane_y_data_rate[plane_id]; 3680 3681 y_plane_blocks = y_minimum[plane_id]; 3682 y_plane_blocks += div_u64((uint64_t)alloc_size * y_data_rate, 3683 total_data_rate); 3684 3685 if (y_data_rate) { 3686 ddb->y_plane[pipe][plane_id].start = start; 3687 ddb->y_plane[pipe][plane_id].end = start + y_plane_blocks; 3688 } 3689 3690 start += y_plane_blocks; 3691 } 3692 3693 return 0; 3694 } 3695 3696 /* 3697 * The max latency should be 257 (max the punit can code is 255 and we add 2us 3698 * for the read latency) and cpp should always be <= 8, so that 3699 * should allow pixel_rate up to ~2 GHz which seems sufficient since max 3700 * 2xcdclk is 1350 MHz and the pixel rate should never exceed that. 3701 */ 3702 static uint_fixed_16_16_t skl_wm_method1(uint32_t pixel_rate, uint8_t cpp, 3703 uint32_t latency) 3704 { 3705 uint32_t wm_intermediate_val; 3706 uint_fixed_16_16_t ret; 3707 3708 if (latency == 0) 3709 return FP_16_16_MAX; 3710 3711 wm_intermediate_val = latency * pixel_rate * cpp; 3712 ret = fixed_16_16_div_round_up_u64(wm_intermediate_val, 1000 * 512); 3713 return ret; 3714 } 3715 3716 static uint_fixed_16_16_t skl_wm_method2(uint32_t pixel_rate, 3717 uint32_t pipe_htotal, 3718 uint32_t latency, 3719 uint_fixed_16_16_t plane_blocks_per_line) 3720 { 3721 uint32_t wm_intermediate_val; 3722 uint_fixed_16_16_t ret; 3723 3724 if (latency == 0) 3725 return FP_16_16_MAX; 3726 3727 wm_intermediate_val = latency * pixel_rate; 3728 wm_intermediate_val = DIV_ROUND_UP(wm_intermediate_val, 3729 pipe_htotal * 1000); 3730 ret = mul_u32_fixed_16_16(wm_intermediate_val, plane_blocks_per_line); 3731 return ret; 3732 } 3733 3734 static uint32_t skl_adjusted_plane_pixel_rate(const struct intel_crtc_state *cstate, 3735 struct intel_plane_state *pstate) 3736 { 3737 uint64_t adjusted_pixel_rate; 3738 uint64_t downscale_amount; 3739 uint64_t pixel_rate; 3740 3741 /* Shouldn't reach here on disabled planes... */ 3742 if (WARN_ON(!intel_wm_plane_visible(cstate, pstate))) 3743 return 0; 3744 3745 /* 3746 * Adjusted plane pixel rate is just the pipe's adjusted pixel rate 3747 * with additional adjustments for plane-specific scaling. 3748 */ 3749 adjusted_pixel_rate = cstate->pixel_rate; 3750 downscale_amount = skl_plane_downscale_amount(cstate, pstate); 3751 3752 pixel_rate = adjusted_pixel_rate * downscale_amount >> 16; 3753 WARN_ON(pixel_rate != clamp_t(uint32_t, pixel_rate, 0, ~0)); 3754 3755 return pixel_rate; 3756 } 3757 3758 static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv, 3759 struct intel_crtc_state *cstate, 3760 struct intel_plane_state *intel_pstate, 3761 uint16_t ddb_allocation, 3762 int level, 3763 uint16_t *out_blocks, /* out */ 3764 uint8_t *out_lines, /* out */ 3765 bool *enabled /* out */) 3766 { 3767 struct intel_plane *plane = to_intel_plane(intel_pstate->base.plane); 3768 struct drm_plane_state *pstate = &intel_pstate->base; 3769 struct drm_framebuffer *fb = pstate->fb; 3770 uint32_t latency = dev_priv->wm.skl_latency[level]; 3771 uint_fixed_16_16_t method1, method2; 3772 uint_fixed_16_16_t plane_blocks_per_line; 3773 uint_fixed_16_16_t selected_result; 3774 uint32_t interm_pbpl; 3775 uint32_t plane_bytes_per_line; 3776 uint32_t res_blocks, res_lines; 3777 uint8_t cpp; 3778 uint32_t width = 0, height = 0; 3779 uint32_t plane_pixel_rate; 3780 uint_fixed_16_16_t y_tile_minimum; 3781 uint32_t y_min_scanlines; 3782 struct intel_atomic_state *state = 3783 to_intel_atomic_state(cstate->base.state); 3784 bool apply_memory_bw_wa = skl_needs_memory_bw_wa(state); 3785 bool y_tiled, x_tiled; 3786 3787 if (latency == 0 || 3788 !intel_wm_plane_visible(cstate, intel_pstate)) { 3789 *enabled = false; 3790 return 0; 3791 } 3792 3793 y_tiled = fb->modifier == I915_FORMAT_MOD_Y_TILED || 3794 fb->modifier == I915_FORMAT_MOD_Yf_TILED; 3795 x_tiled = fb->modifier == I915_FORMAT_MOD_X_TILED; 3796 3797 /* Display WA #1141: kbl. */ 3798 if (IS_KABYLAKE(dev_priv) && dev_priv->ipc_enabled) 3799 latency += 4; 3800 3801 if (apply_memory_bw_wa && x_tiled) 3802 latency += 15; 3803 3804 if (plane->id == PLANE_CURSOR) { 3805 width = intel_pstate->base.crtc_w; 3806 height = intel_pstate->base.crtc_h; 3807 } else { 3808 /* 3809 * Src coordinates are already rotated by 270 degrees for 3810 * the 90/270 degree plane rotation cases (to match the 3811 * GTT mapping), hence no need to account for rotation here. 3812 */ 3813 width = drm_rect_width(&intel_pstate->base.src) >> 16; 3814 height = drm_rect_height(&intel_pstate->base.src) >> 16; 3815 } 3816 3817 cpp = fb->format->cpp[0]; 3818 plane_pixel_rate = skl_adjusted_plane_pixel_rate(cstate, intel_pstate); 3819 3820 if (drm_rotation_90_or_270(pstate->rotation)) { 3821 int cpp = (fb->format->format == DRM_FORMAT_NV12) ? 3822 fb->format->cpp[1] : 3823 fb->format->cpp[0]; 3824 3825 switch (cpp) { 3826 case 1: 3827 y_min_scanlines = 16; 3828 break; 3829 case 2: 3830 y_min_scanlines = 8; 3831 break; 3832 case 4: 3833 y_min_scanlines = 4; 3834 break; 3835 default: 3836 MISSING_CASE(cpp); 3837 return -EINVAL; 3838 } 3839 } else { 3840 y_min_scanlines = 4; 3841 } 3842 3843 if (apply_memory_bw_wa) 3844 y_min_scanlines *= 2; 3845 3846 plane_bytes_per_line = width * cpp; 3847 if (y_tiled) { 3848 interm_pbpl = DIV_ROUND_UP(plane_bytes_per_line * 3849 y_min_scanlines, 512); 3850 plane_blocks_per_line = 3851 fixed_16_16_div_round_up(interm_pbpl, y_min_scanlines); 3852 } else if (x_tiled) { 3853 interm_pbpl = DIV_ROUND_UP(plane_bytes_per_line, 512); 3854 plane_blocks_per_line = u32_to_fixed_16_16(interm_pbpl); 3855 } else { 3856 interm_pbpl = DIV_ROUND_UP(plane_bytes_per_line, 512) + 1; 3857 plane_blocks_per_line = u32_to_fixed_16_16(interm_pbpl); 3858 } 3859 3860 method1 = skl_wm_method1(plane_pixel_rate, cpp, latency); 3861 method2 = skl_wm_method2(plane_pixel_rate, 3862 cstate->base.adjusted_mode.crtc_htotal, 3863 latency, 3864 plane_blocks_per_line); 3865 3866 y_tile_minimum = mul_u32_fixed_16_16(y_min_scanlines, 3867 plane_blocks_per_line); 3868 3869 if (y_tiled) { 3870 selected_result = max_fixed_16_16(method2, y_tile_minimum); 3871 } else { 3872 if ((cpp * cstate->base.adjusted_mode.crtc_htotal / 512 < 1) && 3873 (plane_bytes_per_line / 512 < 1)) 3874 selected_result = method2; 3875 else if ((ddb_allocation / 3876 fixed_16_16_to_u32_round_up(plane_blocks_per_line)) >= 1) 3877 selected_result = min_fixed_16_16(method1, method2); 3878 else 3879 selected_result = method1; 3880 } 3881 3882 res_blocks = fixed_16_16_to_u32_round_up(selected_result) + 1; 3883 res_lines = DIV_ROUND_UP(selected_result.val, 3884 plane_blocks_per_line.val); 3885 3886 if (level >= 1 && level <= 7) { 3887 if (y_tiled) { 3888 res_blocks += fixed_16_16_to_u32_round_up(y_tile_minimum); 3889 res_lines += y_min_scanlines; 3890 } else { 3891 res_blocks++; 3892 } 3893 } 3894 3895 if (res_blocks >= ddb_allocation || res_lines > 31) { 3896 *enabled = false; 3897 3898 /* 3899 * If there are no valid level 0 watermarks, then we can't 3900 * support this display configuration. 3901 */ 3902 if (level) { 3903 return 0; 3904 } else { 3905 struct drm_plane *plane = pstate->plane; 3906 3907 DRM_DEBUG_KMS("Requested display configuration exceeds system watermark limitations\n"); 3908 DRM_DEBUG_KMS("[PLANE:%d:%s] blocks required = %u/%u, lines required = %u/31\n", 3909 plane->base.id, plane->name, 3910 res_blocks, ddb_allocation, res_lines); 3911 return -EINVAL; 3912 } 3913 } 3914 3915 *out_blocks = res_blocks; 3916 *out_lines = res_lines; 3917 *enabled = true; 3918 3919 return 0; 3920 } 3921 3922 static int 3923 skl_compute_wm_level(const struct drm_i915_private *dev_priv, 3924 struct skl_ddb_allocation *ddb, 3925 struct intel_crtc_state *cstate, 3926 struct intel_plane *intel_plane, 3927 int level, 3928 struct skl_wm_level *result) 3929 { 3930 struct drm_atomic_state *state = cstate->base.state; 3931 struct intel_crtc *intel_crtc = to_intel_crtc(cstate->base.crtc); 3932 struct drm_plane *plane = &intel_plane->base; 3933 struct intel_plane_state *intel_pstate = NULL; 3934 uint16_t ddb_blocks; 3935 enum i915_pipe pipe = intel_crtc->pipe; 3936 int ret; 3937 3938 if (state) 3939 intel_pstate = 3940 intel_atomic_get_existing_plane_state(state, 3941 intel_plane); 3942 3943 /* 3944 * Note: If we start supporting multiple pending atomic commits against 3945 * the same planes/CRTC's in the future, plane->state will no longer be 3946 * the correct pre-state to use for the calculations here and we'll 3947 * need to change where we get the 'unchanged' plane data from. 3948 * 3949 * For now this is fine because we only allow one queued commit against 3950 * a CRTC. Even if the plane isn't modified by this transaction and we 3951 * don't have a plane lock, we still have the CRTC's lock, so we know 3952 * that no other transactions are racing with us to update it. 3953 */ 3954 if (!intel_pstate) 3955 intel_pstate = to_intel_plane_state(plane->state); 3956 3957 WARN_ON(!intel_pstate->base.fb); 3958 3959 ddb_blocks = skl_ddb_entry_size(&ddb->plane[pipe][intel_plane->id]); 3960 3961 ret = skl_compute_plane_wm(dev_priv, 3962 cstate, 3963 intel_pstate, 3964 ddb_blocks, 3965 level, 3966 &result->plane_res_b, 3967 &result->plane_res_l, 3968 &result->plane_en); 3969 if (ret) 3970 return ret; 3971 3972 return 0; 3973 } 3974 3975 static uint32_t 3976 skl_compute_linetime_wm(struct intel_crtc_state *cstate) 3977 { 3978 struct drm_atomic_state *state = cstate->base.state; 3979 struct drm_i915_private *dev_priv = to_i915(state->dev); 3980 uint32_t pixel_rate; 3981 uint32_t linetime_wm; 3982 3983 if (!cstate->base.active) 3984 return 0; 3985 3986 pixel_rate = cstate->pixel_rate; 3987 3988 if (WARN_ON(pixel_rate == 0)) 3989 return 0; 3990 3991 linetime_wm = DIV_ROUND_UP(8 * cstate->base.adjusted_mode.crtc_htotal * 3992 1000, pixel_rate); 3993 3994 /* Display WA #1135: bxt. */ 3995 if (IS_BROXTON(dev_priv) && dev_priv->ipc_enabled) 3996 linetime_wm = DIV_ROUND_UP(linetime_wm, 2); 3997 3998 return linetime_wm; 3999 } 4000 4001 static void skl_compute_transition_wm(struct intel_crtc_state *cstate, 4002 struct skl_wm_level *trans_wm /* out */) 4003 { 4004 if (!cstate->base.active) 4005 return; 4006 4007 /* Until we know more, just disable transition WMs */ 4008 trans_wm->plane_en = false; 4009 } 4010 4011 static int skl_build_pipe_wm(struct intel_crtc_state *cstate, 4012 struct skl_ddb_allocation *ddb, 4013 struct skl_pipe_wm *pipe_wm) 4014 { 4015 struct drm_device *dev = cstate->base.crtc->dev; 4016 const struct drm_i915_private *dev_priv = to_i915(dev); 4017 struct intel_plane *intel_plane; 4018 struct skl_plane_wm *wm; 4019 int level, max_level = ilk_wm_max_level(dev_priv); 4020 int ret; 4021 4022 /* 4023 * We'll only calculate watermarks for planes that are actually 4024 * enabled, so make sure all other planes are set as disabled. 4025 */ 4026 memset(pipe_wm->planes, 0, sizeof(pipe_wm->planes)); 4027 4028 for_each_intel_plane_mask(&dev_priv->drm, 4029 intel_plane, 4030 cstate->base.plane_mask) { 4031 wm = &pipe_wm->planes[intel_plane->id]; 4032 4033 for (level = 0; level <= max_level; level++) { 4034 ret = skl_compute_wm_level(dev_priv, ddb, cstate, 4035 intel_plane, level, 4036 &wm->wm[level]); 4037 if (ret) 4038 return ret; 4039 } 4040 skl_compute_transition_wm(cstate, &wm->trans_wm); 4041 } 4042 pipe_wm->linetime = skl_compute_linetime_wm(cstate); 4043 4044 return 0; 4045 } 4046 4047 static void skl_ddb_entry_write(struct drm_i915_private *dev_priv, 4048 i915_reg_t reg, 4049 const struct skl_ddb_entry *entry) 4050 { 4051 if (entry->end) 4052 I915_WRITE(reg, (entry->end - 1) << 16 | entry->start); 4053 else 4054 I915_WRITE(reg, 0); 4055 } 4056 4057 static void skl_write_wm_level(struct drm_i915_private *dev_priv, 4058 i915_reg_t reg, 4059 const struct skl_wm_level *level) 4060 { 4061 uint32_t val = 0; 4062 4063 if (level->plane_en) { 4064 val |= PLANE_WM_EN; 4065 val |= level->plane_res_b; 4066 val |= level->plane_res_l << PLANE_WM_LINES_SHIFT; 4067 } 4068 4069 I915_WRITE(reg, val); 4070 } 4071 4072 static void skl_write_plane_wm(struct intel_crtc *intel_crtc, 4073 const struct skl_plane_wm *wm, 4074 const struct skl_ddb_allocation *ddb, 4075 enum plane_id plane_id) 4076 { 4077 struct drm_crtc *crtc = &intel_crtc->base; 4078 struct drm_device *dev = crtc->dev; 4079 struct drm_i915_private *dev_priv = to_i915(dev); 4080 int level, max_level = ilk_wm_max_level(dev_priv); 4081 enum i915_pipe pipe = intel_crtc->pipe; 4082 4083 for (level = 0; level <= max_level; level++) { 4084 skl_write_wm_level(dev_priv, PLANE_WM(pipe, plane_id, level), 4085 &wm->wm[level]); 4086 } 4087 skl_write_wm_level(dev_priv, PLANE_WM_TRANS(pipe, plane_id), 4088 &wm->trans_wm); 4089 4090 skl_ddb_entry_write(dev_priv, PLANE_BUF_CFG(pipe, plane_id), 4091 &ddb->plane[pipe][plane_id]); 4092 skl_ddb_entry_write(dev_priv, PLANE_NV12_BUF_CFG(pipe, plane_id), 4093 &ddb->y_plane[pipe][plane_id]); 4094 } 4095 4096 static void skl_write_cursor_wm(struct intel_crtc *intel_crtc, 4097 const struct skl_plane_wm *wm, 4098 const struct skl_ddb_allocation *ddb) 4099 { 4100 struct drm_crtc *crtc = &intel_crtc->base; 4101 struct drm_device *dev = crtc->dev; 4102 struct drm_i915_private *dev_priv = to_i915(dev); 4103 int level, max_level = ilk_wm_max_level(dev_priv); 4104 enum i915_pipe pipe = intel_crtc->pipe; 4105 4106 for (level = 0; level <= max_level; level++) { 4107 skl_write_wm_level(dev_priv, CUR_WM(pipe, level), 4108 &wm->wm[level]); 4109 } 4110 skl_write_wm_level(dev_priv, CUR_WM_TRANS(pipe), &wm->trans_wm); 4111 4112 skl_ddb_entry_write(dev_priv, CUR_BUF_CFG(pipe), 4113 &ddb->plane[pipe][PLANE_CURSOR]); 4114 } 4115 4116 bool skl_wm_level_equals(const struct skl_wm_level *l1, 4117 const struct skl_wm_level *l2) 4118 { 4119 if (l1->plane_en != l2->plane_en) 4120 return false; 4121 4122 /* If both planes aren't enabled, the rest shouldn't matter */ 4123 if (!l1->plane_en) 4124 return true; 4125 4126 return (l1->plane_res_l == l2->plane_res_l && 4127 l1->plane_res_b == l2->plane_res_b); 4128 } 4129 4130 static inline bool skl_ddb_entries_overlap(const struct skl_ddb_entry *a, 4131 const struct skl_ddb_entry *b) 4132 { 4133 return a->start < b->end && b->start < a->end; 4134 } 4135 4136 bool skl_ddb_allocation_overlaps(const struct skl_ddb_entry **entries, 4137 const struct skl_ddb_entry *ddb, 4138 int ignore) 4139 { 4140 int i; 4141 4142 for (i = 0; i < I915_MAX_PIPES; i++) 4143 if (i != ignore && entries[i] && 4144 skl_ddb_entries_overlap(ddb, entries[i])) 4145 return true; 4146 4147 return false; 4148 } 4149 4150 static int skl_update_pipe_wm(struct drm_crtc_state *cstate, 4151 const struct skl_pipe_wm *old_pipe_wm, 4152 struct skl_pipe_wm *pipe_wm, /* out */ 4153 struct skl_ddb_allocation *ddb, /* out */ 4154 bool *changed /* out */) 4155 { 4156 struct intel_crtc_state *intel_cstate = to_intel_crtc_state(cstate); 4157 int ret; 4158 4159 ret = skl_build_pipe_wm(intel_cstate, ddb, pipe_wm); 4160 if (ret) 4161 return ret; 4162 4163 if (!memcmp(old_pipe_wm, pipe_wm, sizeof(*pipe_wm))) 4164 *changed = false; 4165 else 4166 *changed = true; 4167 4168 return 0; 4169 } 4170 4171 static uint32_t 4172 pipes_modified(struct drm_atomic_state *state) 4173 { 4174 struct drm_crtc *crtc; 4175 struct drm_crtc_state *cstate; 4176 uint32_t i, ret = 0; 4177 4178 for_each_new_crtc_in_state(state, crtc, cstate, i) 4179 ret |= drm_crtc_mask(crtc); 4180 4181 return ret; 4182 } 4183 4184 static int 4185 skl_ddb_add_affected_planes(struct intel_crtc_state *cstate) 4186 { 4187 struct drm_atomic_state *state = cstate->base.state; 4188 struct drm_device *dev = state->dev; 4189 struct drm_crtc *crtc = cstate->base.crtc; 4190 struct intel_crtc *intel_crtc = to_intel_crtc(crtc); 4191 struct drm_i915_private *dev_priv = to_i915(dev); 4192 struct intel_atomic_state *intel_state = to_intel_atomic_state(state); 4193 struct skl_ddb_allocation *new_ddb = &intel_state->wm_results.ddb; 4194 struct skl_ddb_allocation *cur_ddb = &dev_priv->wm.skl_hw.ddb; 4195 struct drm_plane_state *plane_state; 4196 struct drm_plane *plane; 4197 enum i915_pipe pipe = intel_crtc->pipe; 4198 4199 WARN_ON(!drm_atomic_get_existing_crtc_state(state, crtc)); 4200 4201 drm_for_each_plane_mask(plane, dev, cstate->base.plane_mask) { 4202 enum plane_id plane_id = to_intel_plane(plane)->id; 4203 4204 if (skl_ddb_entry_equal(&cur_ddb->plane[pipe][plane_id], 4205 &new_ddb->plane[pipe][plane_id]) && 4206 skl_ddb_entry_equal(&cur_ddb->y_plane[pipe][plane_id], 4207 &new_ddb->y_plane[pipe][plane_id])) 4208 continue; 4209 4210 plane_state = drm_atomic_get_plane_state(state, plane); 4211 if (IS_ERR(plane_state)) 4212 return PTR_ERR(plane_state); 4213 } 4214 4215 return 0; 4216 } 4217 4218 static int 4219 skl_compute_ddb(struct drm_atomic_state *state) 4220 { 4221 struct drm_device *dev = state->dev; 4222 struct drm_i915_private *dev_priv = to_i915(dev); 4223 struct intel_atomic_state *intel_state = to_intel_atomic_state(state); 4224 struct intel_crtc *intel_crtc; 4225 struct skl_ddb_allocation *ddb = &intel_state->wm_results.ddb; 4226 uint32_t realloc_pipes = pipes_modified(state); 4227 int ret; 4228 4229 /* 4230 * If this is our first atomic update following hardware readout, 4231 * we can't trust the DDB that the BIOS programmed for us. Let's 4232 * pretend that all pipes switched active status so that we'll 4233 * ensure a full DDB recompute. 4234 */ 4235 if (dev_priv->wm.distrust_bios_wm) { 4236 ret = drm_modeset_lock(&dev->mode_config.connection_mutex, 4237 state->acquire_ctx); 4238 if (ret) 4239 return ret; 4240 4241 intel_state->active_pipe_changes = ~0; 4242 4243 /* 4244 * We usually only initialize intel_state->active_crtcs if we 4245 * we're doing a modeset; make sure this field is always 4246 * initialized during the sanitization process that happens 4247 * on the first commit too. 4248 */ 4249 if (!intel_state->modeset) 4250 intel_state->active_crtcs = dev_priv->active_crtcs; 4251 } 4252 4253 /* 4254 * If the modeset changes which CRTC's are active, we need to 4255 * recompute the DDB allocation for *all* active pipes, even 4256 * those that weren't otherwise being modified in any way by this 4257 * atomic commit. Due to the shrinking of the per-pipe allocations 4258 * when new active CRTC's are added, it's possible for a pipe that 4259 * we were already using and aren't changing at all here to suddenly 4260 * become invalid if its DDB needs exceeds its new allocation. 4261 * 4262 * Note that if we wind up doing a full DDB recompute, we can't let 4263 * any other display updates race with this transaction, so we need 4264 * to grab the lock on *all* CRTC's. 4265 */ 4266 if (intel_state->active_pipe_changes) { 4267 realloc_pipes = ~0; 4268 intel_state->wm_results.dirty_pipes = ~0; 4269 } 4270 4271 /* 4272 * We're not recomputing for the pipes not included in the commit, so 4273 * make sure we start with the current state. 4274 */ 4275 memcpy(ddb, &dev_priv->wm.skl_hw.ddb, sizeof(*ddb)); 4276 4277 for_each_intel_crtc_mask(dev, intel_crtc, realloc_pipes) { 4278 struct intel_crtc_state *cstate; 4279 4280 cstate = intel_atomic_get_crtc_state(state, intel_crtc); 4281 if (IS_ERR(cstate)) 4282 return PTR_ERR(cstate); 4283 4284 ret = skl_allocate_pipe_ddb(cstate, ddb); 4285 if (ret) 4286 return ret; 4287 4288 ret = skl_ddb_add_affected_planes(cstate); 4289 if (ret) 4290 return ret; 4291 } 4292 4293 return 0; 4294 } 4295 4296 static void 4297 skl_copy_wm_for_pipe(struct skl_wm_values *dst, 4298 struct skl_wm_values *src, 4299 enum i915_pipe pipe) 4300 { 4301 memcpy(dst->ddb.y_plane[pipe], src->ddb.y_plane[pipe], 4302 sizeof(dst->ddb.y_plane[pipe])); 4303 memcpy(dst->ddb.plane[pipe], src->ddb.plane[pipe], 4304 sizeof(dst->ddb.plane[pipe])); 4305 } 4306 4307 static void 4308 skl_print_wm_changes(const struct drm_atomic_state *state) 4309 { 4310 const struct drm_device *dev = state->dev; 4311 const struct drm_i915_private *dev_priv = to_i915(dev); 4312 const struct intel_atomic_state *intel_state = 4313 to_intel_atomic_state(state); 4314 const struct drm_crtc *crtc; 4315 const struct drm_crtc_state *cstate; 4316 const struct intel_plane *intel_plane; 4317 const struct skl_ddb_allocation *old_ddb = &dev_priv->wm.skl_hw.ddb; 4318 const struct skl_ddb_allocation *new_ddb = &intel_state->wm_results.ddb; 4319 int i; 4320 4321 for_each_new_crtc_in_state(state, crtc, cstate, i) { 4322 const struct intel_crtc *intel_crtc = to_intel_crtc(crtc); 4323 enum i915_pipe pipe = intel_crtc->pipe; 4324 4325 for_each_intel_plane_on_crtc(dev, intel_crtc, intel_plane) { 4326 enum plane_id plane_id = intel_plane->id; 4327 const struct skl_ddb_entry *old, *new; 4328 4329 old = &old_ddb->plane[pipe][plane_id]; 4330 new = &new_ddb->plane[pipe][plane_id]; 4331 4332 if (skl_ddb_entry_equal(old, new)) 4333 continue; 4334 4335 DRM_DEBUG_ATOMIC("[PLANE:%d:%s] ddb (%d - %d) -> (%d - %d)\n", 4336 intel_plane->base.base.id, 4337 intel_plane->base.name, 4338 old->start, old->end, 4339 new->start, new->end); 4340 } 4341 } 4342 } 4343 4344 static int 4345 skl_compute_wm(struct drm_atomic_state *state) 4346 { 4347 struct drm_crtc *crtc; 4348 struct drm_crtc_state *cstate; 4349 struct intel_atomic_state *intel_state = to_intel_atomic_state(state); 4350 struct skl_wm_values *results = &intel_state->wm_results; 4351 struct drm_device *dev = state->dev; 4352 struct skl_pipe_wm *pipe_wm; 4353 bool changed = false; 4354 int ret, i; 4355 4356 /* 4357 * When we distrust bios wm we always need to recompute to set the 4358 * expected DDB allocations for each CRTC. 4359 */ 4360 if (to_i915(dev)->wm.distrust_bios_wm) 4361 changed = true; 4362 4363 /* 4364 * If this transaction isn't actually touching any CRTC's, don't 4365 * bother with watermark calculation. Note that if we pass this 4366 * test, we're guaranteed to hold at least one CRTC state mutex, 4367 * which means we can safely use values like dev_priv->active_crtcs 4368 * since any racing commits that want to update them would need to 4369 * hold _all_ CRTC state mutexes. 4370 */ 4371 for_each_new_crtc_in_state(state, crtc, cstate, i) 4372 changed = true; 4373 4374 if (!changed) 4375 return 0; 4376 4377 /* Clear all dirty flags */ 4378 results->dirty_pipes = 0; 4379 4380 ret = skl_compute_ddb(state); 4381 if (ret) 4382 return ret; 4383 4384 /* 4385 * Calculate WM's for all pipes that are part of this transaction. 4386 * Note that the DDB allocation above may have added more CRTC's that 4387 * weren't otherwise being modified (and set bits in dirty_pipes) if 4388 * pipe allocations had to change. 4389 * 4390 * FIXME: Now that we're doing this in the atomic check phase, we 4391 * should allow skl_update_pipe_wm() to return failure in cases where 4392 * no suitable watermark values can be found. 4393 */ 4394 for_each_new_crtc_in_state(state, crtc, cstate, i) { 4395 struct intel_crtc_state *intel_cstate = 4396 to_intel_crtc_state(cstate); 4397 const struct skl_pipe_wm *old_pipe_wm = 4398 &to_intel_crtc_state(crtc->state)->wm.skl.optimal; 4399 4400 pipe_wm = &intel_cstate->wm.skl.optimal; 4401 ret = skl_update_pipe_wm(cstate, old_pipe_wm, pipe_wm, 4402 &results->ddb, &changed); 4403 if (ret) 4404 return ret; 4405 4406 if (changed) 4407 results->dirty_pipes |= drm_crtc_mask(crtc); 4408 4409 if ((results->dirty_pipes & drm_crtc_mask(crtc)) == 0) 4410 /* This pipe's WM's did not change */ 4411 continue; 4412 4413 intel_cstate->update_wm_pre = true; 4414 } 4415 4416 skl_print_wm_changes(state); 4417 4418 return 0; 4419 } 4420 4421 static void skl_atomic_update_crtc_wm(struct intel_atomic_state *state, 4422 struct intel_crtc_state *cstate) 4423 { 4424 struct intel_crtc *crtc = to_intel_crtc(cstate->base.crtc); 4425 struct drm_i915_private *dev_priv = to_i915(state->base.dev); 4426 struct skl_pipe_wm *pipe_wm = &cstate->wm.skl.optimal; 4427 const struct skl_ddb_allocation *ddb = &state->wm_results.ddb; 4428 enum i915_pipe pipe = crtc->pipe; 4429 enum plane_id plane_id; 4430 4431 if (!(state->wm_results.dirty_pipes & drm_crtc_mask(&crtc->base))) 4432 return; 4433 4434 I915_WRITE(PIPE_WM_LINETIME(pipe), pipe_wm->linetime); 4435 4436 for_each_plane_id_on_crtc(crtc, plane_id) { 4437 if (plane_id != PLANE_CURSOR) 4438 skl_write_plane_wm(crtc, &pipe_wm->planes[plane_id], 4439 ddb, plane_id); 4440 else 4441 skl_write_cursor_wm(crtc, &pipe_wm->planes[plane_id], 4442 ddb); 4443 } 4444 } 4445 4446 static void skl_initial_wm(struct intel_atomic_state *state, 4447 struct intel_crtc_state *cstate) 4448 { 4449 struct intel_crtc *intel_crtc = to_intel_crtc(cstate->base.crtc); 4450 struct drm_device *dev = intel_crtc->base.dev; 4451 struct drm_i915_private *dev_priv = to_i915(dev); 4452 struct skl_wm_values *results = &state->wm_results; 4453 struct skl_wm_values *hw_vals = &dev_priv->wm.skl_hw; 4454 enum i915_pipe pipe = intel_crtc->pipe; 4455 4456 if ((results->dirty_pipes & drm_crtc_mask(&intel_crtc->base)) == 0) 4457 return; 4458 4459 mutex_lock(&dev_priv->wm.wm_mutex); 4460 4461 if (cstate->base.active_changed) 4462 skl_atomic_update_crtc_wm(state, cstate); 4463 4464 skl_copy_wm_for_pipe(hw_vals, results, pipe); 4465 4466 mutex_unlock(&dev_priv->wm.wm_mutex); 4467 } 4468 4469 static void ilk_compute_wm_config(struct drm_device *dev, 4470 struct intel_wm_config *config) 4471 { 4472 struct intel_crtc *crtc; 4473 4474 /* Compute the currently _active_ config */ 4475 for_each_intel_crtc(dev, crtc) { 4476 const struct intel_pipe_wm *wm = &crtc->wm.active.ilk; 4477 4478 if (!wm->pipe_enabled) 4479 continue; 4480 4481 config->sprites_enabled |= wm->sprites_enabled; 4482 config->sprites_scaled |= wm->sprites_scaled; 4483 config->num_pipes_active++; 4484 } 4485 } 4486 4487 static void ilk_program_watermarks(struct drm_i915_private *dev_priv) 4488 { 4489 struct drm_device *dev = &dev_priv->drm; 4490 struct intel_pipe_wm lp_wm_1_2 = {}, lp_wm_5_6 = {}, *best_lp_wm; 4491 struct ilk_wm_maximums max; 4492 struct intel_wm_config config = {}; 4493 struct ilk_wm_values results = {}; 4494 enum intel_ddb_partitioning partitioning; 4495 4496 ilk_compute_wm_config(dev, &config); 4497 4498 ilk_compute_wm_maximums(dev, 1, &config, INTEL_DDB_PART_1_2, &max); 4499 ilk_wm_merge(dev, &config, &max, &lp_wm_1_2); 4500 4501 /* 5/6 split only in single pipe config on IVB+ */ 4502 if (INTEL_GEN(dev_priv) >= 7 && 4503 config.num_pipes_active == 1 && config.sprites_enabled) { 4504 ilk_compute_wm_maximums(dev, 1, &config, INTEL_DDB_PART_5_6, &max); 4505 ilk_wm_merge(dev, &config, &max, &lp_wm_5_6); 4506 4507 best_lp_wm = ilk_find_best_result(dev, &lp_wm_1_2, &lp_wm_5_6); 4508 } else { 4509 best_lp_wm = &lp_wm_1_2; 4510 } 4511 4512 partitioning = (best_lp_wm == &lp_wm_1_2) ? 4513 INTEL_DDB_PART_1_2 : INTEL_DDB_PART_5_6; 4514 4515 ilk_compute_wm_results(dev, best_lp_wm, partitioning, &results); 4516 4517 ilk_write_wm_values(dev_priv, &results); 4518 } 4519 4520 static void ilk_initial_watermarks(struct intel_atomic_state *state, 4521 struct intel_crtc_state *cstate) 4522 { 4523 struct drm_i915_private *dev_priv = to_i915(cstate->base.crtc->dev); 4524 struct intel_crtc *intel_crtc = to_intel_crtc(cstate->base.crtc); 4525 4526 mutex_lock(&dev_priv->wm.wm_mutex); 4527 intel_crtc->wm.active.ilk = cstate->wm.ilk.intermediate; 4528 ilk_program_watermarks(dev_priv); 4529 mutex_unlock(&dev_priv->wm.wm_mutex); 4530 } 4531 4532 static void ilk_optimize_watermarks(struct intel_atomic_state *state, 4533 struct intel_crtc_state *cstate) 4534 { 4535 struct drm_i915_private *dev_priv = to_i915(cstate->base.crtc->dev); 4536 struct intel_crtc *intel_crtc = to_intel_crtc(cstate->base.crtc); 4537 4538 mutex_lock(&dev_priv->wm.wm_mutex); 4539 if (cstate->wm.need_postvbl_update) { 4540 intel_crtc->wm.active.ilk = cstate->wm.ilk.optimal; 4541 ilk_program_watermarks(dev_priv); 4542 } 4543 mutex_unlock(&dev_priv->wm.wm_mutex); 4544 } 4545 4546 static inline void skl_wm_level_from_reg_val(uint32_t val, 4547 struct skl_wm_level *level) 4548 { 4549 level->plane_en = val & PLANE_WM_EN; 4550 level->plane_res_b = val & PLANE_WM_BLOCKS_MASK; 4551 level->plane_res_l = (val >> PLANE_WM_LINES_SHIFT) & 4552 PLANE_WM_LINES_MASK; 4553 } 4554 4555 void skl_pipe_wm_get_hw_state(struct drm_crtc *crtc, 4556 struct skl_pipe_wm *out) 4557 { 4558 struct drm_i915_private *dev_priv = to_i915(crtc->dev); 4559 struct intel_crtc *intel_crtc = to_intel_crtc(crtc); 4560 enum i915_pipe pipe = intel_crtc->pipe; 4561 int level, max_level; 4562 enum plane_id plane_id; 4563 uint32_t val; 4564 4565 max_level = ilk_wm_max_level(dev_priv); 4566 4567 for_each_plane_id_on_crtc(intel_crtc, plane_id) { 4568 struct skl_plane_wm *wm = &out->planes[plane_id]; 4569 4570 for (level = 0; level <= max_level; level++) { 4571 if (plane_id != PLANE_CURSOR) 4572 val = I915_READ(PLANE_WM(pipe, plane_id, level)); 4573 else 4574 val = I915_READ(CUR_WM(pipe, level)); 4575 4576 skl_wm_level_from_reg_val(val, &wm->wm[level]); 4577 } 4578 4579 if (plane_id != PLANE_CURSOR) 4580 val = I915_READ(PLANE_WM_TRANS(pipe, plane_id)); 4581 else 4582 val = I915_READ(CUR_WM_TRANS(pipe)); 4583 4584 skl_wm_level_from_reg_val(val, &wm->trans_wm); 4585 } 4586 4587 if (!intel_crtc->active) 4588 return; 4589 4590 out->linetime = I915_READ(PIPE_WM_LINETIME(pipe)); 4591 } 4592 4593 void skl_wm_get_hw_state(struct drm_device *dev) 4594 { 4595 struct drm_i915_private *dev_priv = to_i915(dev); 4596 struct skl_wm_values *hw = &dev_priv->wm.skl_hw; 4597 struct skl_ddb_allocation *ddb = &dev_priv->wm.skl_hw.ddb; 4598 struct drm_crtc *crtc; 4599 struct intel_crtc *intel_crtc; 4600 struct intel_crtc_state *cstate; 4601 4602 skl_ddb_get_hw_state(dev_priv, ddb); 4603 list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { 4604 intel_crtc = to_intel_crtc(crtc); 4605 cstate = to_intel_crtc_state(crtc->state); 4606 4607 skl_pipe_wm_get_hw_state(crtc, &cstate->wm.skl.optimal); 4608 4609 if (intel_crtc->active) 4610 hw->dirty_pipes |= drm_crtc_mask(crtc); 4611 } 4612 4613 if (dev_priv->active_crtcs) { 4614 /* Fully recompute DDB on first atomic commit */ 4615 dev_priv->wm.distrust_bios_wm = true; 4616 } else { 4617 /* Easy/common case; just sanitize DDB now if everything off */ 4618 memset(ddb, 0, sizeof(*ddb)); 4619 } 4620 } 4621 4622 static void ilk_pipe_wm_get_hw_state(struct drm_crtc *crtc) 4623 { 4624 struct drm_device *dev = crtc->dev; 4625 struct drm_i915_private *dev_priv = to_i915(dev); 4626 struct ilk_wm_values *hw = &dev_priv->wm.hw; 4627 struct intel_crtc *intel_crtc = to_intel_crtc(crtc); 4628 struct intel_crtc_state *cstate = to_intel_crtc_state(crtc->state); 4629 struct intel_pipe_wm *active = &cstate->wm.ilk.optimal; 4630 enum i915_pipe pipe = intel_crtc->pipe; 4631 static const i915_reg_t wm0_pipe_reg[] = { 4632 [PIPE_A] = WM0_PIPEA_ILK, 4633 [PIPE_B] = WM0_PIPEB_ILK, 4634 [PIPE_C] = WM0_PIPEC_IVB, 4635 }; 4636 4637 hw->wm_pipe[pipe] = I915_READ(wm0_pipe_reg[pipe]); 4638 if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) 4639 hw->wm_linetime[pipe] = I915_READ(PIPE_WM_LINETIME(pipe)); 4640 4641 memset(active, 0, sizeof(*active)); 4642 4643 active->pipe_enabled = intel_crtc->active; 4644 4645 if (active->pipe_enabled) { 4646 u32 tmp = hw->wm_pipe[pipe]; 4647 4648 /* 4649 * For active pipes LP0 watermark is marked as 4650 * enabled, and LP1+ watermaks as disabled since 4651 * we can't really reverse compute them in case 4652 * multiple pipes are active. 4653 */ 4654 active->wm[0].enable = true; 4655 active->wm[0].pri_val = (tmp & WM0_PIPE_PLANE_MASK) >> WM0_PIPE_PLANE_SHIFT; 4656 active->wm[0].spr_val = (tmp & WM0_PIPE_SPRITE_MASK) >> WM0_PIPE_SPRITE_SHIFT; 4657 active->wm[0].cur_val = tmp & WM0_PIPE_CURSOR_MASK; 4658 active->linetime = hw->wm_linetime[pipe]; 4659 } else { 4660 int level, max_level = ilk_wm_max_level(dev_priv); 4661 4662 /* 4663 * For inactive pipes, all watermark levels 4664 * should be marked as enabled but zeroed, 4665 * which is what we'd compute them to. 4666 */ 4667 for (level = 0; level <= max_level; level++) 4668 active->wm[level].enable = true; 4669 } 4670 4671 intel_crtc->wm.active.ilk = *active; 4672 } 4673 4674 #define _FW_WM(value, plane) \ 4675 (((value) & DSPFW_ ## plane ## _MASK) >> DSPFW_ ## plane ## _SHIFT) 4676 #define _FW_WM_VLV(value, plane) \ 4677 (((value) & DSPFW_ ## plane ## _MASK_VLV) >> DSPFW_ ## plane ## _SHIFT) 4678 4679 static void vlv_read_wm_values(struct drm_i915_private *dev_priv, 4680 struct vlv_wm_values *wm) 4681 { 4682 enum i915_pipe pipe; 4683 uint32_t tmp; 4684 4685 for_each_pipe(dev_priv, pipe) { 4686 tmp = I915_READ(VLV_DDL(pipe)); 4687 4688 wm->ddl[pipe].plane[PLANE_PRIMARY] = 4689 (tmp >> DDL_PLANE_SHIFT) & (DDL_PRECISION_HIGH | DRAIN_LATENCY_MASK); 4690 wm->ddl[pipe].plane[PLANE_CURSOR] = 4691 (tmp >> DDL_CURSOR_SHIFT) & (DDL_PRECISION_HIGH | DRAIN_LATENCY_MASK); 4692 wm->ddl[pipe].plane[PLANE_SPRITE0] = 4693 (tmp >> DDL_SPRITE_SHIFT(0)) & (DDL_PRECISION_HIGH | DRAIN_LATENCY_MASK); 4694 wm->ddl[pipe].plane[PLANE_SPRITE1] = 4695 (tmp >> DDL_SPRITE_SHIFT(1)) & (DDL_PRECISION_HIGH | DRAIN_LATENCY_MASK); 4696 } 4697 4698 tmp = I915_READ(DSPFW1); 4699 wm->sr.plane = _FW_WM(tmp, SR); 4700 wm->pipe[PIPE_B].plane[PLANE_CURSOR] = _FW_WM(tmp, CURSORB); 4701 wm->pipe[PIPE_B].plane[PLANE_PRIMARY] = _FW_WM_VLV(tmp, PLANEB); 4702 wm->pipe[PIPE_A].plane[PLANE_PRIMARY] = _FW_WM_VLV(tmp, PLANEA); 4703 4704 tmp = I915_READ(DSPFW2); 4705 wm->pipe[PIPE_A].plane[PLANE_SPRITE1] = _FW_WM_VLV(tmp, SPRITEB); 4706 wm->pipe[PIPE_A].plane[PLANE_CURSOR] = _FW_WM(tmp, CURSORA); 4707 wm->pipe[PIPE_A].plane[PLANE_SPRITE0] = _FW_WM_VLV(tmp, SPRITEA); 4708 4709 tmp = I915_READ(DSPFW3); 4710 wm->sr.cursor = _FW_WM(tmp, CURSOR_SR); 4711 4712 if (IS_CHERRYVIEW(dev_priv)) { 4713 tmp = I915_READ(DSPFW7_CHV); 4714 wm->pipe[PIPE_B].plane[PLANE_SPRITE1] = _FW_WM_VLV(tmp, SPRITED); 4715 wm->pipe[PIPE_B].plane[PLANE_SPRITE0] = _FW_WM_VLV(tmp, SPRITEC); 4716 4717 tmp = I915_READ(DSPFW8_CHV); 4718 wm->pipe[PIPE_C].plane[PLANE_SPRITE1] = _FW_WM_VLV(tmp, SPRITEF); 4719 wm->pipe[PIPE_C].plane[PLANE_SPRITE0] = _FW_WM_VLV(tmp, SPRITEE); 4720 4721 tmp = I915_READ(DSPFW9_CHV); 4722 wm->pipe[PIPE_C].plane[PLANE_PRIMARY] = _FW_WM_VLV(tmp, PLANEC); 4723 wm->pipe[PIPE_C].plane[PLANE_CURSOR] = _FW_WM(tmp, CURSORC); 4724 4725 tmp = I915_READ(DSPHOWM); 4726 wm->sr.plane |= _FW_WM(tmp, SR_HI) << 9; 4727 wm->pipe[PIPE_C].plane[PLANE_SPRITE1] |= _FW_WM(tmp, SPRITEF_HI) << 8; 4728 wm->pipe[PIPE_C].plane[PLANE_SPRITE0] |= _FW_WM(tmp, SPRITEE_HI) << 8; 4729 wm->pipe[PIPE_C].plane[PLANE_PRIMARY] |= _FW_WM(tmp, PLANEC_HI) << 8; 4730 wm->pipe[PIPE_B].plane[PLANE_SPRITE1] |= _FW_WM(tmp, SPRITED_HI) << 8; 4731 wm->pipe[PIPE_B].plane[PLANE_SPRITE0] |= _FW_WM(tmp, SPRITEC_HI) << 8; 4732 wm->pipe[PIPE_B].plane[PLANE_PRIMARY] |= _FW_WM(tmp, PLANEB_HI) << 8; 4733 wm->pipe[PIPE_A].plane[PLANE_SPRITE1] |= _FW_WM(tmp, SPRITEB_HI) << 8; 4734 wm->pipe[PIPE_A].plane[PLANE_SPRITE0] |= _FW_WM(tmp, SPRITEA_HI) << 8; 4735 wm->pipe[PIPE_A].plane[PLANE_PRIMARY] |= _FW_WM(tmp, PLANEA_HI) << 8; 4736 } else { 4737 tmp = I915_READ(DSPFW7); 4738 wm->pipe[PIPE_B].plane[PLANE_SPRITE1] = _FW_WM_VLV(tmp, SPRITED); 4739 wm->pipe[PIPE_B].plane[PLANE_SPRITE0] = _FW_WM_VLV(tmp, SPRITEC); 4740 4741 tmp = I915_READ(DSPHOWM); 4742 wm->sr.plane |= _FW_WM(tmp, SR_HI) << 9; 4743 wm->pipe[PIPE_B].plane[PLANE_SPRITE1] |= _FW_WM(tmp, SPRITED_HI) << 8; 4744 wm->pipe[PIPE_B].plane[PLANE_SPRITE0] |= _FW_WM(tmp, SPRITEC_HI) << 8; 4745 wm->pipe[PIPE_B].plane[PLANE_PRIMARY] |= _FW_WM(tmp, PLANEB_HI) << 8; 4746 wm->pipe[PIPE_A].plane[PLANE_SPRITE1] |= _FW_WM(tmp, SPRITEB_HI) << 8; 4747 wm->pipe[PIPE_A].plane[PLANE_SPRITE0] |= _FW_WM(tmp, SPRITEA_HI) << 8; 4748 wm->pipe[PIPE_A].plane[PLANE_PRIMARY] |= _FW_WM(tmp, PLANEA_HI) << 8; 4749 } 4750 } 4751 4752 #undef _FW_WM 4753 #undef _FW_WM_VLV 4754 4755 void vlv_wm_get_hw_state(struct drm_device *dev) 4756 { 4757 struct drm_i915_private *dev_priv = to_i915(dev); 4758 struct vlv_wm_values *wm = &dev_priv->wm.vlv; 4759 struct intel_crtc *crtc; 4760 u32 val; 4761 4762 vlv_read_wm_values(dev_priv, wm); 4763 4764 wm->cxsr = I915_READ(FW_BLC_SELF_VLV) & FW_CSPWRDWNEN; 4765 wm->level = VLV_WM_LEVEL_PM2; 4766 4767 if (IS_CHERRYVIEW(dev_priv)) { 4768 mutex_lock(&dev_priv->rps.hw_lock); 4769 4770 val = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ); 4771 if (val & DSP_MAXFIFO_PM5_ENABLE) 4772 wm->level = VLV_WM_LEVEL_PM5; 4773 4774 /* 4775 * If DDR DVFS is disabled in the BIOS, Punit 4776 * will never ack the request. So if that happens 4777 * assume we don't have to enable/disable DDR DVFS 4778 * dynamically. To test that just set the REQ_ACK 4779 * bit to poke the Punit, but don't change the 4780 * HIGH/LOW bits so that we don't actually change 4781 * the current state. 4782 */ 4783 val = vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2); 4784 val |= FORCE_DDR_FREQ_REQ_ACK; 4785 vlv_punit_write(dev_priv, PUNIT_REG_DDR_SETUP2, val); 4786 4787 if (wait_for((vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2) & 4788 FORCE_DDR_FREQ_REQ_ACK) == 0, 3)) { 4789 DRM_DEBUG_KMS("Punit not acking DDR DVFS request, " 4790 "assuming DDR DVFS is disabled\n"); 4791 dev_priv->wm.max_level = VLV_WM_LEVEL_PM5; 4792 } else { 4793 val = vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2); 4794 if ((val & FORCE_DDR_HIGH_FREQ) == 0) 4795 wm->level = VLV_WM_LEVEL_DDR_DVFS; 4796 } 4797 4798 mutex_unlock(&dev_priv->rps.hw_lock); 4799 } 4800 4801 for_each_intel_crtc(dev, crtc) { 4802 struct intel_crtc_state *crtc_state = 4803 to_intel_crtc_state(crtc->base.state); 4804 struct vlv_wm_state *active = &crtc->wm.active.vlv; 4805 const struct vlv_fifo_state *fifo_state = 4806 &crtc_state->wm.vlv.fifo_state; 4807 enum i915_pipe pipe = crtc->pipe; 4808 enum plane_id plane_id; 4809 int level; 4810 4811 vlv_get_fifo_size(crtc_state); 4812 4813 active->num_levels = wm->level + 1; 4814 active->cxsr = wm->cxsr; 4815 4816 for (level = 0; level < active->num_levels; level++) { 4817 struct vlv_pipe_wm *raw = 4818 &crtc_state->wm.vlv.raw[level]; 4819 4820 active->sr[level].plane = wm->sr.plane; 4821 active->sr[level].cursor = wm->sr.cursor; 4822 4823 for_each_plane_id_on_crtc(crtc, plane_id) { 4824 active->wm[level].plane[plane_id] = 4825 wm->pipe[pipe].plane[plane_id]; 4826 4827 raw->plane[plane_id] = 4828 vlv_invert_wm_value(active->wm[level].plane[plane_id], 4829 fifo_state->plane[plane_id]); 4830 } 4831 } 4832 4833 for_each_plane_id_on_crtc(crtc, plane_id) 4834 vlv_raw_plane_wm_set(crtc_state, level, 4835 plane_id, USHRT_MAX); 4836 vlv_invalidate_wms(crtc, active, level); 4837 4838 crtc_state->wm.vlv.optimal = *active; 4839 crtc_state->wm.vlv.intermediate = *active; 4840 4841 DRM_DEBUG_KMS("Initial watermarks: pipe %c, plane=%d, cursor=%d, sprite0=%d, sprite1=%d\n", 4842 pipe_name(pipe), 4843 wm->pipe[pipe].plane[PLANE_PRIMARY], 4844 wm->pipe[pipe].plane[PLANE_CURSOR], 4845 wm->pipe[pipe].plane[PLANE_SPRITE0], 4846 wm->pipe[pipe].plane[PLANE_SPRITE1]); 4847 } 4848 4849 DRM_DEBUG_KMS("Initial watermarks: SR plane=%d, SR cursor=%d level=%d cxsr=%d\n", 4850 wm->sr.plane, wm->sr.cursor, wm->level, wm->cxsr); 4851 } 4852 4853 void vlv_wm_sanitize(struct drm_i915_private *dev_priv) 4854 { 4855 struct intel_plane *plane; 4856 struct intel_crtc *crtc; 4857 4858 mutex_lock(&dev_priv->wm.wm_mutex); 4859 4860 for_each_intel_plane(&dev_priv->drm, plane) { 4861 struct intel_crtc *crtc = 4862 intel_get_crtc_for_pipe(dev_priv, plane->pipe); 4863 struct intel_crtc_state *crtc_state = 4864 to_intel_crtc_state(crtc->base.state); 4865 struct intel_plane_state *plane_state = 4866 to_intel_plane_state(plane->base.state); 4867 struct vlv_wm_state *wm_state = &crtc_state->wm.vlv.optimal; 4868 const struct vlv_fifo_state *fifo_state = 4869 &crtc_state->wm.vlv.fifo_state; 4870 enum plane_id plane_id = plane->id; 4871 int level; 4872 4873 if (plane_state->base.visible) 4874 continue; 4875 4876 for (level = 0; level < wm_state->num_levels; level++) { 4877 struct vlv_pipe_wm *raw = 4878 &crtc_state->wm.vlv.raw[level]; 4879 4880 raw->plane[plane_id] = 0; 4881 4882 wm_state->wm[level].plane[plane_id] = 4883 vlv_invert_wm_value(raw->plane[plane_id], 4884 fifo_state->plane[plane_id]); 4885 } 4886 } 4887 4888 for_each_intel_crtc(&dev_priv->drm, crtc) { 4889 struct intel_crtc_state *crtc_state = 4890 to_intel_crtc_state(crtc->base.state); 4891 4892 crtc_state->wm.vlv.intermediate = 4893 crtc_state->wm.vlv.optimal; 4894 crtc->wm.active.vlv = crtc_state->wm.vlv.optimal; 4895 } 4896 4897 vlv_program_watermarks(dev_priv); 4898 4899 mutex_unlock(&dev_priv->wm.wm_mutex); 4900 } 4901 4902 void ilk_wm_get_hw_state(struct drm_device *dev) 4903 { 4904 struct drm_i915_private *dev_priv = to_i915(dev); 4905 struct ilk_wm_values *hw = &dev_priv->wm.hw; 4906 struct drm_crtc *crtc; 4907 4908 for_each_crtc(dev, crtc) 4909 ilk_pipe_wm_get_hw_state(crtc); 4910 4911 hw->wm_lp[0] = I915_READ(WM1_LP_ILK); 4912 hw->wm_lp[1] = I915_READ(WM2_LP_ILK); 4913 hw->wm_lp[2] = I915_READ(WM3_LP_ILK); 4914 4915 hw->wm_lp_spr[0] = I915_READ(WM1S_LP_ILK); 4916 if (INTEL_GEN(dev_priv) >= 7) { 4917 hw->wm_lp_spr[1] = I915_READ(WM2S_LP_IVB); 4918 hw->wm_lp_spr[2] = I915_READ(WM3S_LP_IVB); 4919 } 4920 4921 if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) 4922 hw->partitioning = (I915_READ(WM_MISC) & WM_MISC_DATA_PARTITION_5_6) ? 4923 INTEL_DDB_PART_5_6 : INTEL_DDB_PART_1_2; 4924 else if (IS_IVYBRIDGE(dev_priv)) 4925 hw->partitioning = (I915_READ(DISP_ARB_CTL2) & DISP_DATA_PARTITION_5_6) ? 4926 INTEL_DDB_PART_5_6 : INTEL_DDB_PART_1_2; 4927 4928 hw->enable_fbc_wm = 4929 !(I915_READ(DISP_ARB_CTL) & DISP_FBC_WM_DIS); 4930 } 4931 4932 /** 4933 * intel_update_watermarks - update FIFO watermark values based on current modes 4934 * 4935 * Calculate watermark values for the various WM regs based on current mode 4936 * and plane configuration. 4937 * 4938 * There are several cases to deal with here: 4939 * - normal (i.e. non-self-refresh) 4940 * - self-refresh (SR) mode 4941 * - lines are large relative to FIFO size (buffer can hold up to 2) 4942 * - lines are small relative to FIFO size (buffer can hold more than 2 4943 * lines), so need to account for TLB latency 4944 * 4945 * The normal calculation is: 4946 * watermark = dotclock * bytes per pixel * latency 4947 * where latency is platform & configuration dependent (we assume pessimal 4948 * values here). 4949 * 4950 * The SR calculation is: 4951 * watermark = (trunc(latency/line time)+1) * surface width * 4952 * bytes per pixel 4953 * where 4954 * line time = htotal / dotclock 4955 * surface width = hdisplay for normal plane and 64 for cursor 4956 * and latency is assumed to be high, as above. 4957 * 4958 * The final value programmed to the register should always be rounded up, 4959 * and include an extra 2 entries to account for clock crossings. 4960 * 4961 * We don't use the sprite, so we can ignore that. And on Crestline we have 4962 * to set the non-SR watermarks to 8. 4963 */ 4964 void intel_update_watermarks(struct intel_crtc *crtc) 4965 { 4966 struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); 4967 4968 if (dev_priv->display.update_wm) 4969 dev_priv->display.update_wm(crtc); 4970 } 4971 4972 /* 4973 * Lock protecting IPS related data structures 4974 */ 4975 DEFINE_SPINLOCK(mchdev_lock); 4976 4977 /* Global for IPS driver to get at the current i915 device. Protected by 4978 * mchdev_lock. */ 4979 static struct drm_i915_private *i915_mch_dev; 4980 4981 bool ironlake_set_drps(struct drm_i915_private *dev_priv, u8 val) 4982 { 4983 u16 rgvswctl; 4984 4985 lockdep_assert_held(&mchdev_lock); 4986 4987 rgvswctl = I915_READ16(MEMSWCTL); 4988 if (rgvswctl & MEMCTL_CMD_STS) { 4989 DRM_DEBUG("gpu busy, RCS change rejected\n"); 4990 return false; /* still busy with another command */ 4991 } 4992 4993 rgvswctl = (MEMCTL_CMD_CHFREQ << MEMCTL_CMD_SHIFT) | 4994 (val << MEMCTL_FREQ_SHIFT) | MEMCTL_SFCAVM; 4995 I915_WRITE16(MEMSWCTL, rgvswctl); 4996 POSTING_READ16(MEMSWCTL); 4997 4998 rgvswctl |= MEMCTL_CMD_STS; 4999 I915_WRITE16(MEMSWCTL, rgvswctl); 5000 5001 return true; 5002 } 5003 5004 static void ironlake_enable_drps(struct drm_i915_private *dev_priv) 5005 { 5006 u32 rgvmodectl; 5007 u8 fmax, fmin, fstart, vstart; 5008 5009 spin_lock_irq(&mchdev_lock); 5010 5011 rgvmodectl = I915_READ(MEMMODECTL); 5012 5013 /* Enable temp reporting */ 5014 I915_WRITE16(PMMISC, I915_READ(PMMISC) | MCPPCE_EN); 5015 I915_WRITE16(TSC1, I915_READ(TSC1) | TSE); 5016 5017 /* 100ms RC evaluation intervals */ 5018 I915_WRITE(RCUPEI, 100000); 5019 I915_WRITE(RCDNEI, 100000); 5020 5021 /* Set max/min thresholds to 90ms and 80ms respectively */ 5022 I915_WRITE(RCBMAXAVG, 90000); 5023 I915_WRITE(RCBMINAVG, 80000); 5024 5025 I915_WRITE(MEMIHYST, 1); 5026 5027 /* Set up min, max, and cur for interrupt handling */ 5028 fmax = (rgvmodectl & MEMMODE_FMAX_MASK) >> MEMMODE_FMAX_SHIFT; 5029 fmin = (rgvmodectl & MEMMODE_FMIN_MASK); 5030 fstart = (rgvmodectl & MEMMODE_FSTART_MASK) >> 5031 MEMMODE_FSTART_SHIFT; 5032 5033 vstart = (I915_READ(PXVFREQ(fstart)) & PXVFREQ_PX_MASK) >> 5034 PXVFREQ_PX_SHIFT; 5035 5036 dev_priv->ips.fmax = fmax; /* IPS callback will increase this */ 5037 dev_priv->ips.fstart = fstart; 5038 5039 dev_priv->ips.max_delay = fstart; 5040 dev_priv->ips.min_delay = fmin; 5041 dev_priv->ips.cur_delay = fstart; 5042 5043 DRM_DEBUG_DRIVER("fmax: %d, fmin: %d, fstart: %d\n", 5044 fmax, fmin, fstart); 5045 5046 I915_WRITE(MEMINTREN, MEMINT_CX_SUPR_EN | MEMINT_EVAL_CHG_EN); 5047 5048 /* 5049 * Interrupts will be enabled in ironlake_irq_postinstall 5050 */ 5051 5052 I915_WRITE(VIDSTART, vstart); 5053 POSTING_READ(VIDSTART); 5054 5055 rgvmodectl |= MEMMODE_SWMODE_EN; 5056 I915_WRITE(MEMMODECTL, rgvmodectl); 5057 5058 if (wait_for_atomic((I915_READ(MEMSWCTL) & MEMCTL_CMD_STS) == 0, 10)) 5059 DRM_ERROR("stuck trying to change perf mode\n"); 5060 mdelay(1); 5061 5062 ironlake_set_drps(dev_priv, fstart); 5063 5064 dev_priv->ips.last_count1 = I915_READ(DMIEC) + 5065 I915_READ(DDREC) + I915_READ(CSIEC); 5066 dev_priv->ips.last_time1 = jiffies_to_msecs(jiffies); 5067 dev_priv->ips.last_count2 = I915_READ(GFXEC); 5068 dev_priv->ips.last_time2 = ktime_get_raw_ns(); 5069 5070 spin_unlock_irq(&mchdev_lock); 5071 } 5072 5073 static void ironlake_disable_drps(struct drm_i915_private *dev_priv) 5074 { 5075 u16 rgvswctl; 5076 5077 spin_lock_irq(&mchdev_lock); 5078 5079 rgvswctl = I915_READ16(MEMSWCTL); 5080 5081 /* Ack interrupts, disable EFC interrupt */ 5082 I915_WRITE(MEMINTREN, I915_READ(MEMINTREN) & ~MEMINT_EVAL_CHG_EN); 5083 I915_WRITE(MEMINTRSTS, MEMINT_EVAL_CHG); 5084 I915_WRITE(DEIER, I915_READ(DEIER) & ~DE_PCU_EVENT); 5085 I915_WRITE(DEIIR, DE_PCU_EVENT); 5086 I915_WRITE(DEIMR, I915_READ(DEIMR) | DE_PCU_EVENT); 5087 5088 /* Go back to the starting frequency */ 5089 ironlake_set_drps(dev_priv, dev_priv->ips.fstart); 5090 mdelay(1); 5091 rgvswctl |= MEMCTL_CMD_STS; 5092 I915_WRITE(MEMSWCTL, rgvswctl); 5093 mdelay(1); 5094 5095 spin_unlock_irq(&mchdev_lock); 5096 } 5097 5098 /* There's a funny hw issue where the hw returns all 0 when reading from 5099 * GEN6_RP_INTERRUPT_LIMITS. Hence we always need to compute the desired value 5100 * ourselves, instead of doing a rmw cycle (which might result in us clearing 5101 * all limits and the gpu stuck at whatever frequency it is at atm). 5102 */ 5103 static u32 intel_rps_limits(struct drm_i915_private *dev_priv, u8 val) 5104 { 5105 u32 limits; 5106 5107 /* Only set the down limit when we've reached the lowest level to avoid 5108 * getting more interrupts, otherwise leave this clear. This prevents a 5109 * race in the hw when coming out of rc6: There's a tiny window where 5110 * the hw runs at the minimal clock before selecting the desired 5111 * frequency, if the down threshold expires in that window we will not 5112 * receive a down interrupt. */ 5113 if (IS_GEN9(dev_priv)) { 5114 limits = (dev_priv->rps.max_freq_softlimit) << 23; 5115 if (val <= dev_priv->rps.min_freq_softlimit) 5116 limits |= (dev_priv->rps.min_freq_softlimit) << 14; 5117 } else { 5118 limits = dev_priv->rps.max_freq_softlimit << 24; 5119 if (val <= dev_priv->rps.min_freq_softlimit) 5120 limits |= dev_priv->rps.min_freq_softlimit << 16; 5121 } 5122 5123 return limits; 5124 } 5125 5126 static void gen6_set_rps_thresholds(struct drm_i915_private *dev_priv, u8 val) 5127 { 5128 int new_power; 5129 u32 threshold_up = 0, threshold_down = 0; /* in % */ 5130 u32 ei_up = 0, ei_down = 0; 5131 5132 new_power = dev_priv->rps.power; 5133 switch (dev_priv->rps.power) { 5134 case LOW_POWER: 5135 if (val > dev_priv->rps.efficient_freq + 1 && 5136 val > dev_priv->rps.cur_freq) 5137 new_power = BETWEEN; 5138 break; 5139 5140 case BETWEEN: 5141 if (val <= dev_priv->rps.efficient_freq && 5142 val < dev_priv->rps.cur_freq) 5143 new_power = LOW_POWER; 5144 else if (val >= dev_priv->rps.rp0_freq && 5145 val > dev_priv->rps.cur_freq) 5146 new_power = HIGH_POWER; 5147 break; 5148 5149 case HIGH_POWER: 5150 if (val < (dev_priv->rps.rp1_freq + dev_priv->rps.rp0_freq) >> 1 && 5151 val < dev_priv->rps.cur_freq) 5152 new_power = BETWEEN; 5153 break; 5154 } 5155 /* Max/min bins are special */ 5156 if (val <= dev_priv->rps.min_freq_softlimit) 5157 new_power = LOW_POWER; 5158 if (val >= dev_priv->rps.max_freq_softlimit) 5159 new_power = HIGH_POWER; 5160 if (new_power == dev_priv->rps.power) 5161 return; 5162 5163 /* Note the units here are not exactly 1us, but 1280ns. */ 5164 switch (new_power) { 5165 case LOW_POWER: 5166 /* Upclock if more than 95% busy over 16ms */ 5167 ei_up = 16000; 5168 threshold_up = 95; 5169 5170 /* Downclock if less than 85% busy over 32ms */ 5171 ei_down = 32000; 5172 threshold_down = 85; 5173 break; 5174 5175 case BETWEEN: 5176 /* Upclock if more than 90% busy over 13ms */ 5177 ei_up = 13000; 5178 threshold_up = 90; 5179 5180 /* Downclock if less than 75% busy over 32ms */ 5181 ei_down = 32000; 5182 threshold_down = 75; 5183 break; 5184 5185 case HIGH_POWER: 5186 /* Upclock if more than 85% busy over 10ms */ 5187 ei_up = 10000; 5188 threshold_up = 85; 5189 5190 /* Downclock if less than 60% busy over 32ms */ 5191 ei_down = 32000; 5192 threshold_down = 60; 5193 break; 5194 } 5195 5196 /* When byt can survive without system hang with dynamic 5197 * sw freq adjustments, this restriction can be lifted. 5198 */ 5199 if (IS_VALLEYVIEW(dev_priv)) 5200 goto skip_hw_write; 5201 5202 I915_WRITE(GEN6_RP_UP_EI, 5203 GT_INTERVAL_FROM_US(dev_priv, ei_up)); 5204 I915_WRITE(GEN6_RP_UP_THRESHOLD, 5205 GT_INTERVAL_FROM_US(dev_priv, 5206 ei_up * threshold_up / 100)); 5207 5208 I915_WRITE(GEN6_RP_DOWN_EI, 5209 GT_INTERVAL_FROM_US(dev_priv, ei_down)); 5210 I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 5211 GT_INTERVAL_FROM_US(dev_priv, 5212 ei_down * threshold_down / 100)); 5213 5214 I915_WRITE(GEN6_RP_CONTROL, 5215 GEN6_RP_MEDIA_TURBO | 5216 GEN6_RP_MEDIA_HW_NORMAL_MODE | 5217 GEN6_RP_MEDIA_IS_GFX | 5218 GEN6_RP_ENABLE | 5219 GEN6_RP_UP_BUSY_AVG | 5220 GEN6_RP_DOWN_IDLE_AVG); 5221 5222 skip_hw_write: 5223 dev_priv->rps.power = new_power; 5224 dev_priv->rps.up_threshold = threshold_up; 5225 dev_priv->rps.down_threshold = threshold_down; 5226 dev_priv->rps.last_adj = 0; 5227 } 5228 5229 static u32 gen6_rps_pm_mask(struct drm_i915_private *dev_priv, u8 val) 5230 { 5231 u32 mask = 0; 5232 5233 /* We use UP_EI_EXPIRED interupts for both up/down in manual mode */ 5234 if (val > dev_priv->rps.min_freq_softlimit) 5235 mask |= GEN6_PM_RP_UP_EI_EXPIRED | GEN6_PM_RP_DOWN_THRESHOLD | GEN6_PM_RP_DOWN_TIMEOUT; 5236 if (val < dev_priv->rps.max_freq_softlimit) 5237 mask |= GEN6_PM_RP_UP_EI_EXPIRED | GEN6_PM_RP_UP_THRESHOLD; 5238 5239 mask &= dev_priv->pm_rps_events; 5240 5241 return gen6_sanitize_rps_pm_mask(dev_priv, ~mask); 5242 } 5243 5244 /* gen6_set_rps is called to update the frequency request, but should also be 5245 * called when the range (min_delay and max_delay) is modified so that we can 5246 * update the GEN6_RP_INTERRUPT_LIMITS register accordingly. */ 5247 static int gen6_set_rps(struct drm_i915_private *dev_priv, u8 val) 5248 { 5249 /* min/max delay may still have been modified so be sure to 5250 * write the limits value. 5251 */ 5252 if (val != dev_priv->rps.cur_freq) { 5253 gen6_set_rps_thresholds(dev_priv, val); 5254 5255 if (IS_GEN9(dev_priv)) 5256 I915_WRITE(GEN6_RPNSWREQ, 5257 GEN9_FREQUENCY(val)); 5258 else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) 5259 I915_WRITE(GEN6_RPNSWREQ, 5260 HSW_FREQUENCY(val)); 5261 else 5262 I915_WRITE(GEN6_RPNSWREQ, 5263 GEN6_FREQUENCY(val) | 5264 GEN6_OFFSET(0) | 5265 GEN6_AGGRESSIVE_TURBO); 5266 } 5267 5268 /* Make sure we continue to get interrupts 5269 * until we hit the minimum or maximum frequencies. 5270 */ 5271 I915_WRITE(GEN6_RP_INTERRUPT_LIMITS, intel_rps_limits(dev_priv, val)); 5272 I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val)); 5273 5274 dev_priv->rps.cur_freq = val; 5275 trace_intel_gpu_freq_change(intel_gpu_freq(dev_priv, val)); 5276 5277 return 0; 5278 } 5279 5280 static int valleyview_set_rps(struct drm_i915_private *dev_priv, u8 val) 5281 { 5282 int err; 5283 5284 if (WARN_ONCE(IS_CHERRYVIEW(dev_priv) && (val & 1), 5285 "Odd GPU freq value\n")) 5286 val &= ~1; 5287 5288 I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val)); 5289 5290 if (val != dev_priv->rps.cur_freq) { 5291 err = vlv_punit_write(dev_priv, PUNIT_REG_GPU_FREQ_REQ, val); 5292 if (err) 5293 return err; 5294 5295 gen6_set_rps_thresholds(dev_priv, val); 5296 } 5297 5298 dev_priv->rps.cur_freq = val; 5299 trace_intel_gpu_freq_change(intel_gpu_freq(dev_priv, val)); 5300 5301 return 0; 5302 } 5303 5304 /* vlv_set_rps_idle: Set the frequency to idle, if Gfx clocks are down 5305 * 5306 * * If Gfx is Idle, then 5307 * 1. Forcewake Media well. 5308 * 2. Request idle freq. 5309 * 3. Release Forcewake of Media well. 5310 */ 5311 static void vlv_set_rps_idle(struct drm_i915_private *dev_priv) 5312 { 5313 u32 val = dev_priv->rps.idle_freq; 5314 int err; 5315 5316 if (dev_priv->rps.cur_freq <= val) 5317 return; 5318 5319 /* The punit delays the write of the frequency and voltage until it 5320 * determines the GPU is awake. During normal usage we don't want to 5321 * waste power changing the frequency if the GPU is sleeping (rc6). 5322 * However, the GPU and driver is now idle and we do not want to delay 5323 * switching to minimum voltage (reducing power whilst idle) as we do 5324 * not expect to be woken in the near future and so must flush the 5325 * change by waking the device. 5326 * 5327 * We choose to take the media powerwell (either would do to trick the 5328 * punit into committing the voltage change) as that takes a lot less 5329 * power than the render powerwell. 5330 */ 5331 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_MEDIA); 5332 err = valleyview_set_rps(dev_priv, val); 5333 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_MEDIA); 5334 5335 if (err) 5336 DRM_ERROR("Failed to set RPS for idle\n"); 5337 } 5338 5339 void gen6_rps_busy(struct drm_i915_private *dev_priv) 5340 { 5341 mutex_lock(&dev_priv->rps.hw_lock); 5342 if (dev_priv->rps.enabled) { 5343 u8 freq; 5344 5345 if (dev_priv->pm_rps_events & GEN6_PM_RP_UP_EI_EXPIRED) 5346 gen6_rps_reset_ei(dev_priv); 5347 I915_WRITE(GEN6_PMINTRMSK, 5348 gen6_rps_pm_mask(dev_priv, dev_priv->rps.cur_freq)); 5349 5350 gen6_enable_rps_interrupts(dev_priv); 5351 5352 /* Use the user's desired frequency as a guide, but for better 5353 * performance, jump directly to RPe as our starting frequency. 5354 */ 5355 freq = max(dev_priv->rps.cur_freq, 5356 dev_priv->rps.efficient_freq); 5357 5358 if (intel_set_rps(dev_priv, 5359 clamp(freq, 5360 dev_priv->rps.min_freq_softlimit, 5361 dev_priv->rps.max_freq_softlimit))) 5362 DRM_DEBUG_DRIVER("Failed to set idle frequency\n"); 5363 } 5364 mutex_unlock(&dev_priv->rps.hw_lock); 5365 } 5366 5367 void gen6_rps_idle(struct drm_i915_private *dev_priv) 5368 { 5369 /* Flush our bottom-half so that it does not race with us 5370 * setting the idle frequency and so that it is bounded by 5371 * our rpm wakeref. And then disable the interrupts to stop any 5372 * futher RPS reclocking whilst we are asleep. 5373 */ 5374 gen6_disable_rps_interrupts(dev_priv); 5375 5376 mutex_lock(&dev_priv->rps.hw_lock); 5377 if (dev_priv->rps.enabled) { 5378 if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) 5379 vlv_set_rps_idle(dev_priv); 5380 else 5381 gen6_set_rps(dev_priv, dev_priv->rps.idle_freq); 5382 dev_priv->rps.last_adj = 0; 5383 I915_WRITE(GEN6_PMINTRMSK, 5384 gen6_sanitize_rps_pm_mask(dev_priv, ~0)); 5385 } 5386 mutex_unlock(&dev_priv->rps.hw_lock); 5387 5388 lockmgr(&dev_priv->rps.client_lock, LK_EXCLUSIVE); 5389 while (!list_empty(&dev_priv->rps.clients)) 5390 list_del_init(dev_priv->rps.clients.next); 5391 lockmgr(&dev_priv->rps.client_lock, LK_RELEASE); 5392 } 5393 5394 void gen6_rps_boost(struct drm_i915_private *dev_priv, 5395 struct intel_rps_client *rps, 5396 unsigned long submitted) 5397 { 5398 /* This is intentionally racy! We peek at the state here, then 5399 * validate inside the RPS worker. 5400 */ 5401 if (!(dev_priv->gt.awake && 5402 dev_priv->rps.enabled && 5403 dev_priv->rps.cur_freq < dev_priv->rps.boost_freq)) 5404 return; 5405 5406 /* Force a RPS boost (and don't count it against the client) if 5407 * the GPU is severely congested. 5408 */ 5409 if (rps && time_after(jiffies, submitted + DRM_I915_THROTTLE_JIFFIES)) 5410 rps = NULL; 5411 5412 lockmgr(&dev_priv->rps.client_lock, LK_EXCLUSIVE); 5413 if (rps == NULL || list_empty(&rps->link)) { 5414 spin_lock_irq(&dev_priv->irq_lock); 5415 if (dev_priv->rps.interrupts_enabled) { 5416 dev_priv->rps.client_boost = true; 5417 schedule_work(&dev_priv->rps.work); 5418 } 5419 spin_unlock_irq(&dev_priv->irq_lock); 5420 5421 if (rps != NULL) { 5422 list_add(&rps->link, &dev_priv->rps.clients); 5423 rps->boosts++; 5424 } else 5425 dev_priv->rps.boosts++; 5426 } 5427 lockmgr(&dev_priv->rps.client_lock, LK_RELEASE); 5428 } 5429 5430 int intel_set_rps(struct drm_i915_private *dev_priv, u8 val) 5431 { 5432 int err; 5433 5434 lockdep_assert_held(&dev_priv->rps.hw_lock); 5435 GEM_BUG_ON(val > dev_priv->rps.max_freq); 5436 GEM_BUG_ON(val < dev_priv->rps.min_freq); 5437 5438 if (!dev_priv->rps.enabled) { 5439 dev_priv->rps.cur_freq = val; 5440 return 0; 5441 } 5442 5443 if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) 5444 err = valleyview_set_rps(dev_priv, val); 5445 else 5446 err = gen6_set_rps(dev_priv, val); 5447 5448 return err; 5449 } 5450 5451 static void gen9_disable_rc6(struct drm_i915_private *dev_priv) 5452 { 5453 I915_WRITE(GEN6_RC_CONTROL, 0); 5454 I915_WRITE(GEN9_PG_ENABLE, 0); 5455 } 5456 5457 static void gen9_disable_rps(struct drm_i915_private *dev_priv) 5458 { 5459 I915_WRITE(GEN6_RP_CONTROL, 0); 5460 } 5461 5462 static void gen6_disable_rps(struct drm_i915_private *dev_priv) 5463 { 5464 I915_WRITE(GEN6_RC_CONTROL, 0); 5465 I915_WRITE(GEN6_RPNSWREQ, 1 << 31); 5466 I915_WRITE(GEN6_RP_CONTROL, 0); 5467 } 5468 5469 static void cherryview_disable_rps(struct drm_i915_private *dev_priv) 5470 { 5471 I915_WRITE(GEN6_RC_CONTROL, 0); 5472 } 5473 5474 static void valleyview_disable_rps(struct drm_i915_private *dev_priv) 5475 { 5476 /* we're doing forcewake before Disabling RC6, 5477 * This what the BIOS expects when going into suspend */ 5478 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 5479 5480 I915_WRITE(GEN6_RC_CONTROL, 0); 5481 5482 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 5483 } 5484 5485 static void intel_print_rc6_info(struct drm_i915_private *dev_priv, u32 mode) 5486 { 5487 if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) { 5488 if (mode & (GEN7_RC_CTL_TO_MODE | GEN6_RC_CTL_EI_MODE(1))) 5489 mode = GEN6_RC_CTL_RC6_ENABLE; 5490 else 5491 mode = 0; 5492 } 5493 if (HAS_RC6p(dev_priv)) 5494 DRM_DEBUG_DRIVER("Enabling RC6 states: " 5495 "RC6 %s RC6p %s RC6pp %s\n", 5496 onoff(mode & GEN6_RC_CTL_RC6_ENABLE), 5497 onoff(mode & GEN6_RC_CTL_RC6p_ENABLE), 5498 onoff(mode & GEN6_RC_CTL_RC6pp_ENABLE)); 5499 5500 else 5501 DRM_DEBUG_DRIVER("Enabling RC6 states: RC6 %s\n", 5502 onoff(mode & GEN6_RC_CTL_RC6_ENABLE)); 5503 } 5504 5505 static bool bxt_check_bios_rc6_setup(struct drm_i915_private *dev_priv) 5506 { 5507 struct i915_ggtt *ggtt = &dev_priv->ggtt; 5508 bool enable_rc6 = true; 5509 unsigned long rc6_ctx_base; 5510 u32 rc_ctl; 5511 int rc_sw_target; 5512 5513 rc_ctl = I915_READ(GEN6_RC_CONTROL); 5514 rc_sw_target = (I915_READ(GEN6_RC_STATE) & RC_SW_TARGET_STATE_MASK) >> 5515 RC_SW_TARGET_STATE_SHIFT; 5516 DRM_DEBUG_DRIVER("BIOS enabled RC states: " 5517 "HW_CTRL %s HW_RC6 %s SW_TARGET_STATE %x\n", 5518 onoff(rc_ctl & GEN6_RC_CTL_HW_ENABLE), 5519 onoff(rc_ctl & GEN6_RC_CTL_RC6_ENABLE), 5520 rc_sw_target); 5521 5522 if (!(I915_READ(RC6_LOCATION) & RC6_CTX_IN_DRAM)) { 5523 DRM_DEBUG_DRIVER("RC6 Base location not set properly.\n"); 5524 enable_rc6 = false; 5525 } 5526 5527 /* 5528 * The exact context size is not known for BXT, so assume a page size 5529 * for this check. 5530 */ 5531 rc6_ctx_base = I915_READ(RC6_CTX_BASE) & RC6_CTX_BASE_MASK; 5532 if (!((rc6_ctx_base >= ggtt->stolen_reserved_base) && 5533 (rc6_ctx_base + PAGE_SIZE <= ggtt->stolen_reserved_base + 5534 ggtt->stolen_reserved_size))) { 5535 DRM_DEBUG_DRIVER("RC6 Base address not as expected.\n"); 5536 enable_rc6 = false; 5537 } 5538 5539 if (!(((I915_READ(PWRCTX_MAXCNT_RCSUNIT) & IDLE_TIME_MASK) > 1) && 5540 ((I915_READ(PWRCTX_MAXCNT_VCSUNIT0) & IDLE_TIME_MASK) > 1) && 5541 ((I915_READ(PWRCTX_MAXCNT_BCSUNIT) & IDLE_TIME_MASK) > 1) && 5542 ((I915_READ(PWRCTX_MAXCNT_VECSUNIT) & IDLE_TIME_MASK) > 1))) { 5543 DRM_DEBUG_DRIVER("Engine Idle wait time not set properly.\n"); 5544 enable_rc6 = false; 5545 } 5546 5547 if (!I915_READ(GEN8_PUSHBUS_CONTROL) || 5548 !I915_READ(GEN8_PUSHBUS_ENABLE) || 5549 !I915_READ(GEN8_PUSHBUS_SHIFT)) { 5550 DRM_DEBUG_DRIVER("Pushbus not setup properly.\n"); 5551 enable_rc6 = false; 5552 } 5553 5554 if (!I915_READ(GEN6_GFXPAUSE)) { 5555 DRM_DEBUG_DRIVER("GFX pause not setup properly.\n"); 5556 enable_rc6 = false; 5557 } 5558 5559 if (!I915_READ(GEN8_MISC_CTRL0)) { 5560 DRM_DEBUG_DRIVER("GPM control not setup properly.\n"); 5561 enable_rc6 = false; 5562 } 5563 5564 return enable_rc6; 5565 } 5566 5567 int sanitize_rc6_option(struct drm_i915_private *dev_priv, int enable_rc6) 5568 { 5569 /* No RC6 before Ironlake and code is gone for ilk. */ 5570 if (INTEL_INFO(dev_priv)->gen < 6) 5571 return 0; 5572 5573 if (!enable_rc6) 5574 return 0; 5575 5576 if (IS_GEN9_LP(dev_priv) && !bxt_check_bios_rc6_setup(dev_priv)) { 5577 DRM_INFO("RC6 disabled by BIOS\n"); 5578 return 0; 5579 } 5580 5581 /* Respect the kernel parameter if it is set */ 5582 if (enable_rc6 >= 0) { 5583 int mask; 5584 5585 if (HAS_RC6p(dev_priv)) 5586 mask = INTEL_RC6_ENABLE | INTEL_RC6p_ENABLE | 5587 INTEL_RC6pp_ENABLE; 5588 else 5589 mask = INTEL_RC6_ENABLE; 5590 5591 if ((enable_rc6 & mask) != enable_rc6) 5592 DRM_DEBUG_DRIVER("Adjusting RC6 mask to %d " 5593 "(requested %d, valid %d)\n", 5594 enable_rc6 & mask, enable_rc6, mask); 5595 5596 return enable_rc6 & mask; 5597 } 5598 5599 if (IS_IVYBRIDGE(dev_priv)) 5600 return (INTEL_RC6_ENABLE | INTEL_RC6p_ENABLE); 5601 5602 return INTEL_RC6_ENABLE; 5603 } 5604 5605 static void gen6_init_rps_frequencies(struct drm_i915_private *dev_priv) 5606 { 5607 /* All of these values are in units of 50MHz */ 5608 5609 /* static values from HW: RP0 > RP1 > RPn (min_freq) */ 5610 if (IS_GEN9_LP(dev_priv)) { 5611 u32 rp_state_cap = I915_READ(BXT_RP_STATE_CAP); 5612 dev_priv->rps.rp0_freq = (rp_state_cap >> 16) & 0xff; 5613 dev_priv->rps.rp1_freq = (rp_state_cap >> 8) & 0xff; 5614 dev_priv->rps.min_freq = (rp_state_cap >> 0) & 0xff; 5615 } else { 5616 u32 rp_state_cap = I915_READ(GEN6_RP_STATE_CAP); 5617 dev_priv->rps.rp0_freq = (rp_state_cap >> 0) & 0xff; 5618 dev_priv->rps.rp1_freq = (rp_state_cap >> 8) & 0xff; 5619 dev_priv->rps.min_freq = (rp_state_cap >> 16) & 0xff; 5620 } 5621 /* hw_max = RP0 until we check for overclocking */ 5622 dev_priv->rps.max_freq = dev_priv->rps.rp0_freq; 5623 5624 dev_priv->rps.efficient_freq = dev_priv->rps.rp1_freq; 5625 if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv) || 5626 IS_GEN9_BC(dev_priv)) { 5627 u32 ddcc_status = 0; 5628 5629 if (sandybridge_pcode_read(dev_priv, 5630 HSW_PCODE_DYNAMIC_DUTY_CYCLE_CONTROL, 5631 &ddcc_status) == 0) 5632 dev_priv->rps.efficient_freq = 5633 clamp_t(u8, 5634 ((ddcc_status >> 8) & 0xff), 5635 dev_priv->rps.min_freq, 5636 dev_priv->rps.max_freq); 5637 } 5638 5639 if (IS_GEN9_BC(dev_priv)) { 5640 /* Store the frequency values in 16.66 MHZ units, which is 5641 * the natural hardware unit for SKL 5642 */ 5643 dev_priv->rps.rp0_freq *= GEN9_FREQ_SCALER; 5644 dev_priv->rps.rp1_freq *= GEN9_FREQ_SCALER; 5645 dev_priv->rps.min_freq *= GEN9_FREQ_SCALER; 5646 dev_priv->rps.max_freq *= GEN9_FREQ_SCALER; 5647 dev_priv->rps.efficient_freq *= GEN9_FREQ_SCALER; 5648 } 5649 } 5650 5651 static void reset_rps(struct drm_i915_private *dev_priv, 5652 int (*set)(struct drm_i915_private *, u8)) 5653 { 5654 u8 freq = dev_priv->rps.cur_freq; 5655 5656 /* force a reset */ 5657 dev_priv->rps.power = -1; 5658 dev_priv->rps.cur_freq = -1; 5659 5660 if (set(dev_priv, freq)) 5661 DRM_ERROR("Failed to reset RPS to initial values\n"); 5662 } 5663 5664 /* See the Gen9_GT_PM_Programming_Guide doc for the below */ 5665 static void gen9_enable_rps(struct drm_i915_private *dev_priv) 5666 { 5667 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 5668 5669 /* Program defaults and thresholds for RPS*/ 5670 I915_WRITE(GEN6_RC_VIDEO_FREQ, 5671 GEN9_FREQUENCY(dev_priv->rps.rp1_freq)); 5672 5673 /* 1 second timeout*/ 5674 I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 5675 GT_INTERVAL_FROM_US(dev_priv, 1000000)); 5676 5677 I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 0xa); 5678 5679 /* Leaning on the below call to gen6_set_rps to program/setup the 5680 * Up/Down EI & threshold registers, as well as the RP_CONTROL, 5681 * RP_INTERRUPT_LIMITS & RPNSWREQ registers */ 5682 reset_rps(dev_priv, gen6_set_rps); 5683 5684 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 5685 } 5686 5687 static void gen9_enable_rc6(struct drm_i915_private *dev_priv) 5688 { 5689 struct intel_engine_cs *engine; 5690 enum intel_engine_id id; 5691 uint32_t rc6_mask = 0; 5692 5693 /* 1a: Software RC state - RC0 */ 5694 I915_WRITE(GEN6_RC_STATE, 0); 5695 5696 /* 1b: Get forcewake during program sequence. Although the driver 5697 * hasn't enabled a state yet where we need forcewake, BIOS may have.*/ 5698 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 5699 5700 /* 2a: Disable RC states. */ 5701 I915_WRITE(GEN6_RC_CONTROL, 0); 5702 5703 /* 2b: Program RC6 thresholds.*/ 5704 5705 /* WaRsDoubleRc6WrlWithCoarsePowerGating: Doubling WRL only when CPG is enabled */ 5706 if (IS_SKYLAKE(dev_priv)) 5707 I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 108 << 16); 5708 else 5709 I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16); 5710 I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */ 5711 I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */ 5712 for_each_engine(engine, dev_priv, id) 5713 I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10); 5714 5715 if (HAS_GUC(dev_priv)) 5716 I915_WRITE(GUC_MAX_IDLE_COUNT, 0xA); 5717 5718 I915_WRITE(GEN6_RC_SLEEP, 0); 5719 5720 /* 2c: Program Coarse Power Gating Policies. */ 5721 I915_WRITE(GEN9_MEDIA_PG_IDLE_HYSTERESIS, 25); 5722 I915_WRITE(GEN9_RENDER_PG_IDLE_HYSTERESIS, 25); 5723 5724 /* 3a: Enable RC6 */ 5725 if (intel_enable_rc6() & INTEL_RC6_ENABLE) 5726 rc6_mask = GEN6_RC_CTL_RC6_ENABLE; 5727 DRM_INFO("RC6 %s\n", onoff(rc6_mask & GEN6_RC_CTL_RC6_ENABLE)); 5728 I915_WRITE(GEN6_RC6_THRESHOLD, 37500); /* 37.5/125ms per EI */ 5729 I915_WRITE(GEN6_RC_CONTROL, 5730 GEN6_RC_CTL_HW_ENABLE | GEN6_RC_CTL_EI_MODE(1) | rc6_mask); 5731 5732 /* 5733 * 3b: Enable Coarse Power Gating only when RC6 is enabled. 5734 * WaRsDisableCoarsePowerGating:skl,bxt - Render/Media PG need to be disabled with RC6. 5735 */ 5736 if (NEEDS_WaRsDisableCoarsePowerGating(dev_priv)) 5737 I915_WRITE(GEN9_PG_ENABLE, 0); 5738 else 5739 I915_WRITE(GEN9_PG_ENABLE, (rc6_mask & GEN6_RC_CTL_RC6_ENABLE) ? 5740 (GEN9_RENDER_PG_ENABLE | GEN9_MEDIA_PG_ENABLE) : 0); 5741 5742 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 5743 } 5744 5745 static void gen8_enable_rps(struct drm_i915_private *dev_priv) 5746 { 5747 struct intel_engine_cs *engine; 5748 enum intel_engine_id id; 5749 uint32_t rc6_mask = 0; 5750 5751 /* 1a: Software RC state - RC0 */ 5752 I915_WRITE(GEN6_RC_STATE, 0); 5753 5754 /* 1c & 1d: Get forcewake during program sequence. Although the driver 5755 * hasn't enabled a state yet where we need forcewake, BIOS may have.*/ 5756 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 5757 5758 /* 2a: Disable RC states. */ 5759 I915_WRITE(GEN6_RC_CONTROL, 0); 5760 5761 /* 2b: Program RC6 thresholds.*/ 5762 I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16); 5763 I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */ 5764 I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */ 5765 for_each_engine(engine, dev_priv, id) 5766 I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10); 5767 I915_WRITE(GEN6_RC_SLEEP, 0); 5768 if (IS_BROADWELL(dev_priv)) 5769 I915_WRITE(GEN6_RC6_THRESHOLD, 625); /* 800us/1.28 for TO */ 5770 else 5771 I915_WRITE(GEN6_RC6_THRESHOLD, 50000); /* 50/125ms per EI */ 5772 5773 /* 3: Enable RC6 */ 5774 if (intel_enable_rc6() & INTEL_RC6_ENABLE) 5775 rc6_mask = GEN6_RC_CTL_RC6_ENABLE; 5776 intel_print_rc6_info(dev_priv, rc6_mask); 5777 if (IS_BROADWELL(dev_priv)) 5778 I915_WRITE(GEN6_RC_CONTROL, GEN6_RC_CTL_HW_ENABLE | 5779 GEN7_RC_CTL_TO_MODE | 5780 rc6_mask); 5781 else 5782 I915_WRITE(GEN6_RC_CONTROL, GEN6_RC_CTL_HW_ENABLE | 5783 GEN6_RC_CTL_EI_MODE(1) | 5784 rc6_mask); 5785 5786 /* 4 Program defaults and thresholds for RPS*/ 5787 I915_WRITE(GEN6_RPNSWREQ, 5788 HSW_FREQUENCY(dev_priv->rps.rp1_freq)); 5789 I915_WRITE(GEN6_RC_VIDEO_FREQ, 5790 HSW_FREQUENCY(dev_priv->rps.rp1_freq)); 5791 /* NB: Docs say 1s, and 1000000 - which aren't equivalent */ 5792 I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 100000000 / 128); /* 1 second timeout */ 5793 5794 /* Docs recommend 900MHz, and 300 MHz respectively */ 5795 I915_WRITE(GEN6_RP_INTERRUPT_LIMITS, 5796 dev_priv->rps.max_freq_softlimit << 24 | 5797 dev_priv->rps.min_freq_softlimit << 16); 5798 5799 I915_WRITE(GEN6_RP_UP_THRESHOLD, 7600000 / 128); /* 76ms busyness per EI, 90% */ 5800 I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 31300000 / 128); /* 313ms busyness per EI, 70%*/ 5801 I915_WRITE(GEN6_RP_UP_EI, 66000); /* 84.48ms, XXX: random? */ 5802 I915_WRITE(GEN6_RP_DOWN_EI, 350000); /* 448ms, XXX: random? */ 5803 5804 I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10); 5805 5806 /* 5: Enable RPS */ 5807 I915_WRITE(GEN6_RP_CONTROL, 5808 GEN6_RP_MEDIA_TURBO | 5809 GEN6_RP_MEDIA_HW_NORMAL_MODE | 5810 GEN6_RP_MEDIA_IS_GFX | 5811 GEN6_RP_ENABLE | 5812 GEN6_RP_UP_BUSY_AVG | 5813 GEN6_RP_DOWN_IDLE_AVG); 5814 5815 /* 6: Ring frequency + overclocking (our driver does this later */ 5816 5817 reset_rps(dev_priv, gen6_set_rps); 5818 5819 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 5820 } 5821 5822 static void gen6_enable_rps(struct drm_i915_private *dev_priv) 5823 { 5824 struct intel_engine_cs *engine; 5825 enum intel_engine_id id; 5826 u32 rc6vids, rc6_mask = 0; 5827 u32 gtfifodbg; 5828 int rc6_mode; 5829 int ret; 5830 5831 WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); 5832 5833 /* Here begins a magic sequence of register writes to enable 5834 * auto-downclocking. 5835 * 5836 * Perhaps there might be some value in exposing these to 5837 * userspace... 5838 */ 5839 I915_WRITE(GEN6_RC_STATE, 0); 5840 5841 /* Clear the DBG now so we don't confuse earlier errors */ 5842 gtfifodbg = I915_READ(GTFIFODBG); 5843 if (gtfifodbg) { 5844 DRM_ERROR("GT fifo had a previous error %x\n", gtfifodbg); 5845 I915_WRITE(GTFIFODBG, gtfifodbg); 5846 } 5847 5848 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 5849 5850 /* disable the counters and set deterministic thresholds */ 5851 I915_WRITE(GEN6_RC_CONTROL, 0); 5852 5853 I915_WRITE(GEN6_RC1_WAKE_RATE_LIMIT, 1000 << 16); 5854 I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16 | 30); 5855 I915_WRITE(GEN6_RC6pp_WAKE_RATE_LIMIT, 30); 5856 I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); 5857 I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); 5858 5859 for_each_engine(engine, dev_priv, id) 5860 I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10); 5861 5862 I915_WRITE(GEN6_RC_SLEEP, 0); 5863 I915_WRITE(GEN6_RC1e_THRESHOLD, 1000); 5864 if (IS_IVYBRIDGE(dev_priv)) 5865 I915_WRITE(GEN6_RC6_THRESHOLD, 125000); 5866 else 5867 I915_WRITE(GEN6_RC6_THRESHOLD, 50000); 5868 I915_WRITE(GEN6_RC6p_THRESHOLD, 150000); 5869 I915_WRITE(GEN6_RC6pp_THRESHOLD, 64000); /* unused */ 5870 5871 /* Check if we are enabling RC6 */ 5872 rc6_mode = intel_enable_rc6(); 5873 if (rc6_mode & INTEL_RC6_ENABLE) 5874 rc6_mask |= GEN6_RC_CTL_RC6_ENABLE; 5875 5876 /* We don't use those on Haswell */ 5877 if (!IS_HASWELL(dev_priv)) { 5878 if (rc6_mode & INTEL_RC6p_ENABLE) 5879 rc6_mask |= GEN6_RC_CTL_RC6p_ENABLE; 5880 5881 if (rc6_mode & INTEL_RC6pp_ENABLE) 5882 rc6_mask |= GEN6_RC_CTL_RC6pp_ENABLE; 5883 } 5884 5885 intel_print_rc6_info(dev_priv, rc6_mask); 5886 5887 I915_WRITE(GEN6_RC_CONTROL, 5888 rc6_mask | 5889 GEN6_RC_CTL_EI_MODE(1) | 5890 GEN6_RC_CTL_HW_ENABLE); 5891 5892 /* Power down if completely idle for over 50ms */ 5893 I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 50000); 5894 I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10); 5895 5896 reset_rps(dev_priv, gen6_set_rps); 5897 5898 rc6vids = 0; 5899 ret = sandybridge_pcode_read(dev_priv, GEN6_PCODE_READ_RC6VIDS, &rc6vids); 5900 if (IS_GEN6(dev_priv) && ret) { 5901 DRM_DEBUG_DRIVER("Couldn't check for BIOS workaround\n"); 5902 } else if (IS_GEN6(dev_priv) && (GEN6_DECODE_RC6_VID(rc6vids & 0xff) < 450)) { 5903 DRM_DEBUG_DRIVER("You should update your BIOS. Correcting minimum rc6 voltage (%dmV->%dmV)\n", 5904 GEN6_DECODE_RC6_VID(rc6vids & 0xff), 450); 5905 rc6vids &= 0xffff00; 5906 rc6vids |= GEN6_ENCODE_RC6_VID(450); 5907 ret = sandybridge_pcode_write(dev_priv, GEN6_PCODE_WRITE_RC6VIDS, rc6vids); 5908 if (ret) 5909 DRM_ERROR("Couldn't fix incorrect rc6 voltage\n"); 5910 } 5911 5912 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 5913 } 5914 5915 static void gen6_update_ring_freq(struct drm_i915_private *dev_priv) 5916 { 5917 int min_freq = 15; 5918 unsigned int gpu_freq; 5919 unsigned int max_ia_freq, min_ring_freq; 5920 unsigned int max_gpu_freq, min_gpu_freq; 5921 int scaling_factor = 180; 5922 5923 WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); 5924 5925 #if 0 5926 policy = cpufreq_cpu_get(0); 5927 if (policy) { 5928 max_ia_freq = policy->cpuinfo.max_freq; 5929 cpufreq_cpu_put(policy); 5930 } else { 5931 /* 5932 * Default to measured freq if none found, PCU will ensure we 5933 * don't go over 5934 */ 5935 max_ia_freq = tsc_khz; 5936 } 5937 #else 5938 max_ia_freq = tsc_frequency / 1000; 5939 #endif 5940 5941 /* Convert from kHz to MHz */ 5942 max_ia_freq /= 1000; 5943 5944 min_ring_freq = I915_READ(DCLK) & 0xf; 5945 /* convert DDR frequency from units of 266.6MHz to bandwidth */ 5946 min_ring_freq = mult_frac(min_ring_freq, 8, 3); 5947 5948 if (IS_GEN9_BC(dev_priv)) { 5949 /* Convert GT frequency to 50 HZ units */ 5950 min_gpu_freq = dev_priv->rps.min_freq / GEN9_FREQ_SCALER; 5951 max_gpu_freq = dev_priv->rps.max_freq / GEN9_FREQ_SCALER; 5952 } else { 5953 min_gpu_freq = dev_priv->rps.min_freq; 5954 max_gpu_freq = dev_priv->rps.max_freq; 5955 } 5956 5957 /* 5958 * For each potential GPU frequency, load a ring frequency we'd like 5959 * to use for memory access. We do this by specifying the IA frequency 5960 * the PCU should use as a reference to determine the ring frequency. 5961 */ 5962 for (gpu_freq = max_gpu_freq; gpu_freq >= min_gpu_freq; gpu_freq--) { 5963 int diff = max_gpu_freq - gpu_freq; 5964 unsigned int ia_freq = 0, ring_freq = 0; 5965 5966 if (IS_GEN9_BC(dev_priv)) { 5967 /* 5968 * ring_freq = 2 * GT. ring_freq is in 100MHz units 5969 * No floor required for ring frequency on SKL. 5970 */ 5971 ring_freq = gpu_freq; 5972 } else if (INTEL_INFO(dev_priv)->gen >= 8) { 5973 /* max(2 * GT, DDR). NB: GT is 50MHz units */ 5974 ring_freq = max(min_ring_freq, gpu_freq); 5975 } else if (IS_HASWELL(dev_priv)) { 5976 ring_freq = mult_frac(gpu_freq, 5, 4); 5977 ring_freq = max(min_ring_freq, ring_freq); 5978 /* leave ia_freq as the default, chosen by cpufreq */ 5979 } else { 5980 /* On older processors, there is no separate ring 5981 * clock domain, so in order to boost the bandwidth 5982 * of the ring, we need to upclock the CPU (ia_freq). 5983 * 5984 * For GPU frequencies less than 750MHz, 5985 * just use the lowest ring freq. 5986 */ 5987 if (gpu_freq < min_freq) 5988 ia_freq = 800; 5989 else 5990 ia_freq = max_ia_freq - ((diff * scaling_factor) / 2); 5991 ia_freq = DIV_ROUND_CLOSEST(ia_freq, 100); 5992 } 5993 5994 sandybridge_pcode_write(dev_priv, 5995 GEN6_PCODE_WRITE_MIN_FREQ_TABLE, 5996 ia_freq << GEN6_PCODE_FREQ_IA_RATIO_SHIFT | 5997 ring_freq << GEN6_PCODE_FREQ_RING_RATIO_SHIFT | 5998 gpu_freq); 5999 } 6000 } 6001 6002 static int cherryview_rps_max_freq(struct drm_i915_private *dev_priv) 6003 { 6004 u32 val, rp0; 6005 6006 val = vlv_punit_read(dev_priv, FB_GFX_FMAX_AT_VMAX_FUSE); 6007 6008 switch (INTEL_INFO(dev_priv)->sseu.eu_total) { 6009 case 8: 6010 /* (2 * 4) config */ 6011 rp0 = (val >> FB_GFX_FMAX_AT_VMAX_2SS4EU_FUSE_SHIFT); 6012 break; 6013 case 12: 6014 /* (2 * 6) config */ 6015 rp0 = (val >> FB_GFX_FMAX_AT_VMAX_2SS6EU_FUSE_SHIFT); 6016 break; 6017 case 16: 6018 /* (2 * 8) config */ 6019 default: 6020 /* Setting (2 * 8) Min RP0 for any other combination */ 6021 rp0 = (val >> FB_GFX_FMAX_AT_VMAX_2SS8EU_FUSE_SHIFT); 6022 break; 6023 } 6024 6025 rp0 = (rp0 & FB_GFX_FREQ_FUSE_MASK); 6026 6027 return rp0; 6028 } 6029 6030 static int cherryview_rps_rpe_freq(struct drm_i915_private *dev_priv) 6031 { 6032 u32 val, rpe; 6033 6034 val = vlv_punit_read(dev_priv, PUNIT_GPU_DUTYCYCLE_REG); 6035 rpe = (val >> PUNIT_GPU_DUTYCYCLE_RPE_FREQ_SHIFT) & PUNIT_GPU_DUTYCYCLE_RPE_FREQ_MASK; 6036 6037 return rpe; 6038 } 6039 6040 static int cherryview_rps_guar_freq(struct drm_i915_private *dev_priv) 6041 { 6042 u32 val, rp1; 6043 6044 val = vlv_punit_read(dev_priv, FB_GFX_FMAX_AT_VMAX_FUSE); 6045 rp1 = (val & FB_GFX_FREQ_FUSE_MASK); 6046 6047 return rp1; 6048 } 6049 6050 static u32 cherryview_rps_min_freq(struct drm_i915_private *dev_priv) 6051 { 6052 u32 val, rpn; 6053 6054 val = vlv_punit_read(dev_priv, FB_GFX_FMIN_AT_VMIN_FUSE); 6055 rpn = ((val >> FB_GFX_FMIN_AT_VMIN_FUSE_SHIFT) & 6056 FB_GFX_FREQ_FUSE_MASK); 6057 6058 return rpn; 6059 } 6060 6061 static int valleyview_rps_guar_freq(struct drm_i915_private *dev_priv) 6062 { 6063 u32 val, rp1; 6064 6065 val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FREQ_FUSE); 6066 6067 rp1 = (val & FB_GFX_FGUARANTEED_FREQ_FUSE_MASK) >> FB_GFX_FGUARANTEED_FREQ_FUSE_SHIFT; 6068 6069 return rp1; 6070 } 6071 6072 static int valleyview_rps_max_freq(struct drm_i915_private *dev_priv) 6073 { 6074 u32 val, rp0; 6075 6076 val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FREQ_FUSE); 6077 6078 rp0 = (val & FB_GFX_MAX_FREQ_FUSE_MASK) >> FB_GFX_MAX_FREQ_FUSE_SHIFT; 6079 /* Clamp to max */ 6080 rp0 = min_t(u32, rp0, 0xea); 6081 6082 return rp0; 6083 } 6084 6085 static int valleyview_rps_rpe_freq(struct drm_i915_private *dev_priv) 6086 { 6087 u32 val, rpe; 6088 6089 val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FMAX_FUSE_LO); 6090 rpe = (val & FB_FMAX_VMIN_FREQ_LO_MASK) >> FB_FMAX_VMIN_FREQ_LO_SHIFT; 6091 val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FMAX_FUSE_HI); 6092 rpe |= (val & FB_FMAX_VMIN_FREQ_HI_MASK) << 5; 6093 6094 return rpe; 6095 } 6096 6097 static int valleyview_rps_min_freq(struct drm_i915_private *dev_priv) 6098 { 6099 u32 val; 6100 6101 val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_LFM) & 0xff; 6102 /* 6103 * According to the BYT Punit GPU turbo HAS 1.1.6.3 the minimum value 6104 * for the minimum frequency in GPLL mode is 0xc1. Contrary to this on 6105 * a BYT-M B0 the above register contains 0xbf. Moreover when setting 6106 * a frequency Punit will not allow values below 0xc0. Clamp it 0xc0 6107 * to make sure it matches what Punit accepts. 6108 */ 6109 return max_t(u32, val, 0xc0); 6110 } 6111 6112 /* Check that the pctx buffer wasn't move under us. */ 6113 static void valleyview_check_pctx(struct drm_i915_private *dev_priv) 6114 { 6115 unsigned long pctx_addr = I915_READ(VLV_PCBR) & ~4095; 6116 6117 WARN_ON(pctx_addr != dev_priv->mm.stolen_base + 6118 dev_priv->vlv_pctx->stolen->start); 6119 } 6120 6121 6122 /* Check that the pcbr address is not empty. */ 6123 static void cherryview_check_pctx(struct drm_i915_private *dev_priv) 6124 { 6125 unsigned long pctx_addr = I915_READ(VLV_PCBR) & ~4095; 6126 6127 WARN_ON((pctx_addr >> VLV_PCBR_ADDR_SHIFT) == 0); 6128 } 6129 6130 static void cherryview_setup_pctx(struct drm_i915_private *dev_priv) 6131 { 6132 struct i915_ggtt *ggtt = &dev_priv->ggtt; 6133 unsigned long pctx_paddr, paddr; 6134 u32 pcbr; 6135 int pctx_size = 32*1024; 6136 6137 pcbr = I915_READ(VLV_PCBR); 6138 if ((pcbr >> VLV_PCBR_ADDR_SHIFT) == 0) { 6139 DRM_DEBUG_DRIVER("BIOS didn't set up PCBR, fixing up\n"); 6140 paddr = (dev_priv->mm.stolen_base + 6141 (ggtt->stolen_size - pctx_size)); 6142 6143 pctx_paddr = (paddr & (~4095)); 6144 I915_WRITE(VLV_PCBR, pctx_paddr); 6145 } 6146 6147 DRM_DEBUG_DRIVER("PCBR: 0x%08x\n", I915_READ(VLV_PCBR)); 6148 } 6149 6150 static void valleyview_setup_pctx(struct drm_i915_private *dev_priv) 6151 { 6152 struct drm_i915_gem_object *pctx; 6153 unsigned long pctx_paddr; 6154 u32 pcbr; 6155 int pctx_size = 24*1024; 6156 6157 pcbr = I915_READ(VLV_PCBR); 6158 if (pcbr) { 6159 /* BIOS set it up already, grab the pre-alloc'd space */ 6160 int pcbr_offset; 6161 6162 pcbr_offset = (pcbr & (~4095)) - dev_priv->mm.stolen_base; 6163 pctx = i915_gem_object_create_stolen_for_preallocated(dev_priv, 6164 pcbr_offset, 6165 I915_GTT_OFFSET_NONE, 6166 pctx_size); 6167 goto out; 6168 } 6169 6170 DRM_DEBUG_DRIVER("BIOS didn't set up PCBR, fixing up\n"); 6171 6172 /* 6173 * From the Gunit register HAS: 6174 * The Gfx driver is expected to program this register and ensure 6175 * proper allocation within Gfx stolen memory. For example, this 6176 * register should be programmed such than the PCBR range does not 6177 * overlap with other ranges, such as the frame buffer, protected 6178 * memory, or any other relevant ranges. 6179 */ 6180 pctx = i915_gem_object_create_stolen(dev_priv, pctx_size); 6181 if (!pctx) { 6182 DRM_DEBUG("not enough stolen space for PCTX, disabling\n"); 6183 goto out; 6184 } 6185 6186 pctx_paddr = dev_priv->mm.stolen_base + pctx->stolen->start; 6187 I915_WRITE(VLV_PCBR, pctx_paddr); 6188 6189 out: 6190 DRM_DEBUG_DRIVER("PCBR: 0x%08x\n", I915_READ(VLV_PCBR)); 6191 dev_priv->vlv_pctx = pctx; 6192 } 6193 6194 static void valleyview_cleanup_pctx(struct drm_i915_private *dev_priv) 6195 { 6196 if (WARN_ON(!dev_priv->vlv_pctx)) 6197 return; 6198 6199 i915_gem_object_put(dev_priv->vlv_pctx); 6200 dev_priv->vlv_pctx = NULL; 6201 } 6202 6203 static void vlv_init_gpll_ref_freq(struct drm_i915_private *dev_priv) 6204 { 6205 dev_priv->rps.gpll_ref_freq = 6206 vlv_get_cck_clock(dev_priv, "GPLL ref", 6207 CCK_GPLL_CLOCK_CONTROL, 6208 dev_priv->czclk_freq); 6209 6210 DRM_DEBUG_DRIVER("GPLL reference freq: %d kHz\n", 6211 dev_priv->rps.gpll_ref_freq); 6212 } 6213 6214 static void valleyview_init_gt_powersave(struct drm_i915_private *dev_priv) 6215 { 6216 u32 val; 6217 6218 valleyview_setup_pctx(dev_priv); 6219 6220 vlv_init_gpll_ref_freq(dev_priv); 6221 6222 val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS); 6223 switch ((val >> 6) & 3) { 6224 case 0: 6225 case 1: 6226 dev_priv->mem_freq = 800; 6227 break; 6228 case 2: 6229 dev_priv->mem_freq = 1066; 6230 break; 6231 case 3: 6232 dev_priv->mem_freq = 1333; 6233 break; 6234 } 6235 DRM_DEBUG_DRIVER("DDR speed: %d MHz\n", dev_priv->mem_freq); 6236 6237 dev_priv->rps.max_freq = valleyview_rps_max_freq(dev_priv); 6238 dev_priv->rps.rp0_freq = dev_priv->rps.max_freq; 6239 DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n", 6240 intel_gpu_freq(dev_priv, dev_priv->rps.max_freq), 6241 dev_priv->rps.max_freq); 6242 6243 dev_priv->rps.efficient_freq = valleyview_rps_rpe_freq(dev_priv); 6244 DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n", 6245 intel_gpu_freq(dev_priv, dev_priv->rps.efficient_freq), 6246 dev_priv->rps.efficient_freq); 6247 6248 dev_priv->rps.rp1_freq = valleyview_rps_guar_freq(dev_priv); 6249 DRM_DEBUG_DRIVER("RP1(Guar Freq) GPU freq: %d MHz (%u)\n", 6250 intel_gpu_freq(dev_priv, dev_priv->rps.rp1_freq), 6251 dev_priv->rps.rp1_freq); 6252 6253 dev_priv->rps.min_freq = valleyview_rps_min_freq(dev_priv); 6254 DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n", 6255 intel_gpu_freq(dev_priv, dev_priv->rps.min_freq), 6256 dev_priv->rps.min_freq); 6257 } 6258 6259 static void cherryview_init_gt_powersave(struct drm_i915_private *dev_priv) 6260 { 6261 u32 val; 6262 6263 cherryview_setup_pctx(dev_priv); 6264 6265 vlv_init_gpll_ref_freq(dev_priv); 6266 6267 mutex_lock(&dev_priv->sb_lock); 6268 val = vlv_cck_read(dev_priv, CCK_FUSE_REG); 6269 mutex_unlock(&dev_priv->sb_lock); 6270 6271 switch ((val >> 2) & 0x7) { 6272 case 3: 6273 dev_priv->mem_freq = 2000; 6274 break; 6275 default: 6276 dev_priv->mem_freq = 1600; 6277 break; 6278 } 6279 DRM_DEBUG_DRIVER("DDR speed: %d MHz\n", dev_priv->mem_freq); 6280 6281 dev_priv->rps.max_freq = cherryview_rps_max_freq(dev_priv); 6282 dev_priv->rps.rp0_freq = dev_priv->rps.max_freq; 6283 DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n", 6284 intel_gpu_freq(dev_priv, dev_priv->rps.max_freq), 6285 dev_priv->rps.max_freq); 6286 6287 dev_priv->rps.efficient_freq = cherryview_rps_rpe_freq(dev_priv); 6288 DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n", 6289 intel_gpu_freq(dev_priv, dev_priv->rps.efficient_freq), 6290 dev_priv->rps.efficient_freq); 6291 6292 dev_priv->rps.rp1_freq = cherryview_rps_guar_freq(dev_priv); 6293 DRM_DEBUG_DRIVER("RP1(Guar) GPU freq: %d MHz (%u)\n", 6294 intel_gpu_freq(dev_priv, dev_priv->rps.rp1_freq), 6295 dev_priv->rps.rp1_freq); 6296 6297 dev_priv->rps.min_freq = cherryview_rps_min_freq(dev_priv); 6298 DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n", 6299 intel_gpu_freq(dev_priv, dev_priv->rps.min_freq), 6300 dev_priv->rps.min_freq); 6301 6302 WARN_ONCE((dev_priv->rps.max_freq | 6303 dev_priv->rps.efficient_freq | 6304 dev_priv->rps.rp1_freq | 6305 dev_priv->rps.min_freq) & 1, 6306 "Odd GPU freq values\n"); 6307 } 6308 6309 static void valleyview_cleanup_gt_powersave(struct drm_i915_private *dev_priv) 6310 { 6311 valleyview_cleanup_pctx(dev_priv); 6312 } 6313 6314 static void cherryview_enable_rps(struct drm_i915_private *dev_priv) 6315 { 6316 struct intel_engine_cs *engine; 6317 enum intel_engine_id id; 6318 u32 gtfifodbg, val, rc6_mode = 0, pcbr; 6319 6320 WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); 6321 6322 gtfifodbg = I915_READ(GTFIFODBG) & ~(GT_FIFO_SBDEDICATE_FREE_ENTRY_CHV | 6323 GT_FIFO_FREE_ENTRIES_CHV); 6324 if (gtfifodbg) { 6325 DRM_DEBUG_DRIVER("GT fifo had a previous error %x\n", 6326 gtfifodbg); 6327 I915_WRITE(GTFIFODBG, gtfifodbg); 6328 } 6329 6330 cherryview_check_pctx(dev_priv); 6331 6332 /* 1a & 1b: Get forcewake during program sequence. Although the driver 6333 * hasn't enabled a state yet where we need forcewake, BIOS may have.*/ 6334 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 6335 6336 /* Disable RC states. */ 6337 I915_WRITE(GEN6_RC_CONTROL, 0); 6338 6339 /* 2a: Program RC6 thresholds.*/ 6340 I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16); 6341 I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */ 6342 I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */ 6343 6344 for_each_engine(engine, dev_priv, id) 6345 I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10); 6346 I915_WRITE(GEN6_RC_SLEEP, 0); 6347 6348 /* TO threshold set to 500 us ( 0x186 * 1.28 us) */ 6349 I915_WRITE(GEN6_RC6_THRESHOLD, 0x186); 6350 6351 /* allows RC6 residency counter to work */ 6352 I915_WRITE(VLV_COUNTER_CONTROL, 6353 _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH | 6354 VLV_MEDIA_RC6_COUNT_EN | 6355 VLV_RENDER_RC6_COUNT_EN)); 6356 6357 /* For now we assume BIOS is allocating and populating the PCBR */ 6358 pcbr = I915_READ(VLV_PCBR); 6359 6360 /* 3: Enable RC6 */ 6361 if ((intel_enable_rc6() & INTEL_RC6_ENABLE) && 6362 (pcbr >> VLV_PCBR_ADDR_SHIFT)) 6363 rc6_mode = GEN7_RC_CTL_TO_MODE; 6364 6365 I915_WRITE(GEN6_RC_CONTROL, rc6_mode); 6366 6367 /* 4 Program defaults and thresholds for RPS*/ 6368 I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 1000000); 6369 I915_WRITE(GEN6_RP_UP_THRESHOLD, 59400); 6370 I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 245000); 6371 I915_WRITE(GEN6_RP_UP_EI, 66000); 6372 I915_WRITE(GEN6_RP_DOWN_EI, 350000); 6373 6374 I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10); 6375 6376 /* 5: Enable RPS */ 6377 I915_WRITE(GEN6_RP_CONTROL, 6378 GEN6_RP_MEDIA_HW_NORMAL_MODE | 6379 GEN6_RP_MEDIA_IS_GFX | 6380 GEN6_RP_ENABLE | 6381 GEN6_RP_UP_BUSY_AVG | 6382 GEN6_RP_DOWN_IDLE_AVG); 6383 6384 /* Setting Fixed Bias */ 6385 val = VLV_OVERRIDE_EN | 6386 VLV_SOC_TDP_EN | 6387 CHV_BIAS_CPU_50_SOC_50; 6388 vlv_punit_write(dev_priv, VLV_TURBO_SOC_OVERRIDE, val); 6389 6390 val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS); 6391 6392 /* RPS code assumes GPLL is used */ 6393 WARN_ONCE((val & GPLLENABLE) == 0, "GPLL not enabled\n"); 6394 6395 DRM_DEBUG_DRIVER("GPLL enabled? %s\n", yesno(val & GPLLENABLE)); 6396 DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val); 6397 6398 reset_rps(dev_priv, valleyview_set_rps); 6399 6400 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 6401 } 6402 6403 static void valleyview_enable_rps(struct drm_i915_private *dev_priv) 6404 { 6405 struct intel_engine_cs *engine; 6406 enum intel_engine_id id; 6407 u32 gtfifodbg, val, rc6_mode = 0; 6408 6409 WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); 6410 6411 valleyview_check_pctx(dev_priv); 6412 6413 gtfifodbg = I915_READ(GTFIFODBG); 6414 if (gtfifodbg) { 6415 DRM_DEBUG_DRIVER("GT fifo had a previous error %x\n", 6416 gtfifodbg); 6417 I915_WRITE(GTFIFODBG, gtfifodbg); 6418 } 6419 6420 /* If VLV, Forcewake all wells, else re-direct to regular path */ 6421 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); 6422 6423 /* Disable RC states. */ 6424 I915_WRITE(GEN6_RC_CONTROL, 0); 6425 6426 I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 1000000); 6427 I915_WRITE(GEN6_RP_UP_THRESHOLD, 59400); 6428 I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 245000); 6429 I915_WRITE(GEN6_RP_UP_EI, 66000); 6430 I915_WRITE(GEN6_RP_DOWN_EI, 350000); 6431 6432 I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10); 6433 6434 I915_WRITE(GEN6_RP_CONTROL, 6435 GEN6_RP_MEDIA_TURBO | 6436 GEN6_RP_MEDIA_HW_NORMAL_MODE | 6437 GEN6_RP_MEDIA_IS_GFX | 6438 GEN6_RP_ENABLE | 6439 GEN6_RP_UP_BUSY_AVG | 6440 GEN6_RP_DOWN_IDLE_CONT); 6441 6442 I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 0x00280000); 6443 I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); 6444 I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); 6445 6446 for_each_engine(engine, dev_priv, id) 6447 I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10); 6448 6449 I915_WRITE(GEN6_RC6_THRESHOLD, 0x557); 6450 6451 /* allows RC6 residency counter to work */ 6452 I915_WRITE(VLV_COUNTER_CONTROL, 6453 _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH | 6454 VLV_MEDIA_RC0_COUNT_EN | 6455 VLV_RENDER_RC0_COUNT_EN | 6456 VLV_MEDIA_RC6_COUNT_EN | 6457 VLV_RENDER_RC6_COUNT_EN)); 6458 6459 if (intel_enable_rc6() & INTEL_RC6_ENABLE) 6460 rc6_mode = GEN7_RC_CTL_TO_MODE | VLV_RC_CTL_CTX_RST_PARALLEL; 6461 6462 intel_print_rc6_info(dev_priv, rc6_mode); 6463 6464 I915_WRITE(GEN6_RC_CONTROL, rc6_mode); 6465 6466 /* Setting Fixed Bias */ 6467 val = VLV_OVERRIDE_EN | 6468 VLV_SOC_TDP_EN | 6469 VLV_BIAS_CPU_125_SOC_875; 6470 vlv_punit_write(dev_priv, VLV_TURBO_SOC_OVERRIDE, val); 6471 6472 val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS); 6473 6474 /* RPS code assumes GPLL is used */ 6475 WARN_ONCE((val & GPLLENABLE) == 0, "GPLL not enabled\n"); 6476 6477 DRM_DEBUG_DRIVER("GPLL enabled? %s\n", yesno(val & GPLLENABLE)); 6478 DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val); 6479 6480 reset_rps(dev_priv, valleyview_set_rps); 6481 6482 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 6483 } 6484 6485 static unsigned long intel_pxfreq(u32 vidfreq) 6486 { 6487 unsigned long freq; 6488 int div = (vidfreq & 0x3f0000) >> 16; 6489 int post = (vidfreq & 0x3000) >> 12; 6490 int pre = (vidfreq & 0x7); 6491 6492 if (!pre) 6493 return 0; 6494 6495 freq = ((div * 133333) / ((1<<post) * pre)); 6496 6497 return freq; 6498 } 6499 6500 static const struct cparams { 6501 u16 i; 6502 u16 t; 6503 u16 m; 6504 u16 c; 6505 } cparams[] = { 6506 { 1, 1333, 301, 28664 }, 6507 { 1, 1066, 294, 24460 }, 6508 { 1, 800, 294, 25192 }, 6509 { 0, 1333, 276, 27605 }, 6510 { 0, 1066, 276, 27605 }, 6511 { 0, 800, 231, 23784 }, 6512 }; 6513 6514 static unsigned long __i915_chipset_val(struct drm_i915_private *dev_priv) 6515 { 6516 u64 total_count, diff, ret; 6517 u32 count1, count2, count3, m = 0, c = 0; 6518 unsigned long now = jiffies_to_msecs(jiffies), diff1; 6519 int i; 6520 6521 lockdep_assert_held(&mchdev_lock); 6522 6523 diff1 = now - dev_priv->ips.last_time1; 6524 6525 /* Prevent division-by-zero if we are asking too fast. 6526 * Also, we don't get interesting results if we are polling 6527 * faster than once in 10ms, so just return the saved value 6528 * in such cases. 6529 */ 6530 if (diff1 <= 10) 6531 return dev_priv->ips.chipset_power; 6532 6533 count1 = I915_READ(DMIEC); 6534 count2 = I915_READ(DDREC); 6535 count3 = I915_READ(CSIEC); 6536 6537 total_count = count1 + count2 + count3; 6538 6539 /* FIXME: handle per-counter overflow */ 6540 if (total_count < dev_priv->ips.last_count1) { 6541 diff = ~0UL - dev_priv->ips.last_count1; 6542 diff += total_count; 6543 } else { 6544 diff = total_count - dev_priv->ips.last_count1; 6545 } 6546 6547 for (i = 0; i < ARRAY_SIZE(cparams); i++) { 6548 if (cparams[i].i == dev_priv->ips.c_m && 6549 cparams[i].t == dev_priv->ips.r_t) { 6550 m = cparams[i].m; 6551 c = cparams[i].c; 6552 break; 6553 } 6554 } 6555 6556 diff = div_u64(diff, diff1); 6557 ret = ((m * diff) + c); 6558 ret = div_u64(ret, 10); 6559 6560 dev_priv->ips.last_count1 = total_count; 6561 dev_priv->ips.last_time1 = now; 6562 6563 dev_priv->ips.chipset_power = ret; 6564 6565 return ret; 6566 } 6567 6568 unsigned long i915_chipset_val(struct drm_i915_private *dev_priv) 6569 { 6570 unsigned long val; 6571 6572 if (INTEL_INFO(dev_priv)->gen != 5) 6573 return 0; 6574 6575 spin_lock_irq(&mchdev_lock); 6576 6577 val = __i915_chipset_val(dev_priv); 6578 6579 spin_unlock_irq(&mchdev_lock); 6580 6581 return val; 6582 } 6583 6584 unsigned long i915_mch_val(struct drm_i915_private *dev_priv) 6585 { 6586 unsigned long m, x, b; 6587 u32 tsfs; 6588 6589 tsfs = I915_READ(TSFS); 6590 6591 m = ((tsfs & TSFS_SLOPE_MASK) >> TSFS_SLOPE_SHIFT); 6592 x = I915_READ8(TR1); 6593 6594 b = tsfs & TSFS_INTR_MASK; 6595 6596 return ((m * x) / 127) - b; 6597 } 6598 6599 static int _pxvid_to_vd(u8 pxvid) 6600 { 6601 if (pxvid == 0) 6602 return 0; 6603 6604 if (pxvid >= 8 && pxvid < 31) 6605 pxvid = 31; 6606 6607 return (pxvid + 2) * 125; 6608 } 6609 6610 static u32 pvid_to_extvid(struct drm_i915_private *dev_priv, u8 pxvid) 6611 { 6612 const int vd = _pxvid_to_vd(pxvid); 6613 const int vm = vd - 1125; 6614 6615 if (INTEL_INFO(dev_priv)->is_mobile) 6616 return vm > 0 ? vm : 0; 6617 6618 return vd; 6619 } 6620 6621 static void __i915_update_gfx_val(struct drm_i915_private *dev_priv) 6622 { 6623 u64 now, diff, diffms; 6624 u32 count; 6625 6626 lockdep_assert_held(&mchdev_lock); 6627 6628 now = ktime_get_raw_ns(); 6629 diffms = now - dev_priv->ips.last_time2; 6630 do_div(diffms, NSEC_PER_MSEC); 6631 6632 /* Don't divide by 0 */ 6633 if (!diffms) 6634 return; 6635 6636 count = I915_READ(GFXEC); 6637 6638 if (count < dev_priv->ips.last_count2) { 6639 diff = ~0UL - dev_priv->ips.last_count2; 6640 diff += count; 6641 } else { 6642 diff = count - dev_priv->ips.last_count2; 6643 } 6644 6645 dev_priv->ips.last_count2 = count; 6646 dev_priv->ips.last_time2 = now; 6647 6648 /* More magic constants... */ 6649 diff = diff * 1181; 6650 diff = div_u64(diff, diffms * 10); 6651 dev_priv->ips.gfx_power = diff; 6652 } 6653 6654 void i915_update_gfx_val(struct drm_i915_private *dev_priv) 6655 { 6656 if (INTEL_INFO(dev_priv)->gen != 5) 6657 return; 6658 6659 spin_lock_irq(&mchdev_lock); 6660 6661 __i915_update_gfx_val(dev_priv); 6662 6663 spin_unlock_irq(&mchdev_lock); 6664 } 6665 6666 static unsigned long __i915_gfx_val(struct drm_i915_private *dev_priv) 6667 { 6668 unsigned long t, corr, state1, corr2, state2; 6669 u32 pxvid, ext_v; 6670 6671 lockdep_assert_held(&mchdev_lock); 6672 6673 pxvid = I915_READ(PXVFREQ(dev_priv->rps.cur_freq)); 6674 pxvid = (pxvid >> 24) & 0x7f; 6675 ext_v = pvid_to_extvid(dev_priv, pxvid); 6676 6677 state1 = ext_v; 6678 6679 t = i915_mch_val(dev_priv); 6680 6681 /* Revel in the empirically derived constants */ 6682 6683 /* Correction factor in 1/100000 units */ 6684 if (t > 80) 6685 corr = ((t * 2349) + 135940); 6686 else if (t >= 50) 6687 corr = ((t * 964) + 29317); 6688 else /* < 50 */ 6689 corr = ((t * 301) + 1004); 6690 6691 corr = corr * ((150142 * state1) / 10000 - 78642); 6692 corr /= 100000; 6693 corr2 = (corr * dev_priv->ips.corr); 6694 6695 state2 = (corr2 * state1) / 10000; 6696 state2 /= 100; /* convert to mW */ 6697 6698 __i915_update_gfx_val(dev_priv); 6699 6700 return dev_priv->ips.gfx_power + state2; 6701 } 6702 6703 unsigned long i915_gfx_val(struct drm_i915_private *dev_priv) 6704 { 6705 unsigned long val; 6706 6707 if (INTEL_INFO(dev_priv)->gen != 5) 6708 return 0; 6709 6710 spin_lock_irq(&mchdev_lock); 6711 6712 val = __i915_gfx_val(dev_priv); 6713 6714 spin_unlock_irq(&mchdev_lock); 6715 6716 return val; 6717 } 6718 6719 /** 6720 * i915_read_mch_val - return value for IPS use 6721 * 6722 * Calculate and return a value for the IPS driver to use when deciding whether 6723 * we have thermal and power headroom to increase CPU or GPU power budget. 6724 */ 6725 unsigned long i915_read_mch_val(void) 6726 { 6727 struct drm_i915_private *dev_priv; 6728 unsigned long chipset_val, graphics_val, ret = 0; 6729 6730 spin_lock_irq(&mchdev_lock); 6731 if (!i915_mch_dev) 6732 goto out_unlock; 6733 dev_priv = i915_mch_dev; 6734 6735 chipset_val = __i915_chipset_val(dev_priv); 6736 graphics_val = __i915_gfx_val(dev_priv); 6737 6738 ret = chipset_val + graphics_val; 6739 6740 out_unlock: 6741 spin_unlock_irq(&mchdev_lock); 6742 6743 return ret; 6744 } 6745 EXPORT_SYMBOL_GPL(i915_read_mch_val); 6746 6747 /** 6748 * i915_gpu_raise - raise GPU frequency limit 6749 * 6750 * Raise the limit; IPS indicates we have thermal headroom. 6751 */ 6752 bool i915_gpu_raise(void) 6753 { 6754 struct drm_i915_private *dev_priv; 6755 bool ret = true; 6756 6757 spin_lock_irq(&mchdev_lock); 6758 if (!i915_mch_dev) { 6759 ret = false; 6760 goto out_unlock; 6761 } 6762 dev_priv = i915_mch_dev; 6763 6764 if (dev_priv->ips.max_delay > dev_priv->ips.fmax) 6765 dev_priv->ips.max_delay--; 6766 6767 out_unlock: 6768 spin_unlock_irq(&mchdev_lock); 6769 6770 return ret; 6771 } 6772 EXPORT_SYMBOL_GPL(i915_gpu_raise); 6773 6774 /** 6775 * i915_gpu_lower - lower GPU frequency limit 6776 * 6777 * IPS indicates we're close to a thermal limit, so throttle back the GPU 6778 * frequency maximum. 6779 */ 6780 bool i915_gpu_lower(void) 6781 { 6782 struct drm_i915_private *dev_priv; 6783 bool ret = true; 6784 6785 spin_lock_irq(&mchdev_lock); 6786 if (!i915_mch_dev) { 6787 ret = false; 6788 goto out_unlock; 6789 } 6790 dev_priv = i915_mch_dev; 6791 6792 if (dev_priv->ips.max_delay < dev_priv->ips.min_delay) 6793 dev_priv->ips.max_delay++; 6794 6795 out_unlock: 6796 spin_unlock_irq(&mchdev_lock); 6797 6798 return ret; 6799 } 6800 EXPORT_SYMBOL_GPL(i915_gpu_lower); 6801 6802 /** 6803 * i915_gpu_busy - indicate GPU business to IPS 6804 * 6805 * Tell the IPS driver whether or not the GPU is busy. 6806 */ 6807 bool i915_gpu_busy(void) 6808 { 6809 bool ret = false; 6810 6811 spin_lock_irq(&mchdev_lock); 6812 if (i915_mch_dev) 6813 ret = i915_mch_dev->gt.awake; 6814 spin_unlock_irq(&mchdev_lock); 6815 6816 return ret; 6817 } 6818 EXPORT_SYMBOL_GPL(i915_gpu_busy); 6819 6820 /** 6821 * i915_gpu_turbo_disable - disable graphics turbo 6822 * 6823 * Disable graphics turbo by resetting the max frequency and setting the 6824 * current frequency to the default. 6825 */ 6826 bool i915_gpu_turbo_disable(void) 6827 { 6828 struct drm_i915_private *dev_priv; 6829 bool ret = true; 6830 6831 spin_lock_irq(&mchdev_lock); 6832 if (!i915_mch_dev) { 6833 ret = false; 6834 goto out_unlock; 6835 } 6836 dev_priv = i915_mch_dev; 6837 6838 dev_priv->ips.max_delay = dev_priv->ips.fstart; 6839 6840 if (!ironlake_set_drps(dev_priv, dev_priv->ips.fstart)) 6841 ret = false; 6842 6843 out_unlock: 6844 spin_unlock_irq(&mchdev_lock); 6845 6846 return ret; 6847 } 6848 EXPORT_SYMBOL_GPL(i915_gpu_turbo_disable); 6849 6850 /** 6851 * Tells the intel_ips driver that the i915 driver is now loaded, if 6852 * IPS got loaded first. 6853 * 6854 * This awkward dance is so that neither module has to depend on the 6855 * other in order for IPS to do the appropriate communication of 6856 * GPU turbo limits to i915. 6857 */ 6858 static void 6859 ips_ping_for_i915_load(void) 6860 { 6861 #if 0 6862 void (*link)(void); 6863 6864 link = symbol_get(ips_link_to_i915_driver); 6865 if (link) { 6866 link(); 6867 symbol_put(ips_link_to_i915_driver); 6868 } 6869 #endif 6870 } 6871 6872 void intel_gpu_ips_init(struct drm_i915_private *dev_priv) 6873 { 6874 /* We only register the i915 ips part with intel-ips once everything is 6875 * set up, to avoid intel-ips sneaking in and reading bogus values. */ 6876 spin_lock_irq(&mchdev_lock); 6877 i915_mch_dev = dev_priv; 6878 spin_unlock_irq(&mchdev_lock); 6879 6880 ips_ping_for_i915_load(); 6881 } 6882 6883 void intel_gpu_ips_teardown(void) 6884 { 6885 spin_lock_irq(&mchdev_lock); 6886 i915_mch_dev = NULL; 6887 spin_unlock_irq(&mchdev_lock); 6888 } 6889 6890 static void intel_init_emon(struct drm_i915_private *dev_priv) 6891 { 6892 u32 lcfuse; 6893 u8 pxw[16]; 6894 int i; 6895 6896 /* Disable to program */ 6897 I915_WRITE(ECR, 0); 6898 POSTING_READ(ECR); 6899 6900 /* Program energy weights for various events */ 6901 I915_WRITE(SDEW, 0x15040d00); 6902 I915_WRITE(CSIEW0, 0x007f0000); 6903 I915_WRITE(CSIEW1, 0x1e220004); 6904 I915_WRITE(CSIEW2, 0x04000004); 6905 6906 for (i = 0; i < 5; i++) 6907 I915_WRITE(PEW(i), 0); 6908 for (i = 0; i < 3; i++) 6909 I915_WRITE(DEW(i), 0); 6910 6911 /* Program P-state weights to account for frequency power adjustment */ 6912 for (i = 0; i < 16; i++) { 6913 u32 pxvidfreq = I915_READ(PXVFREQ(i)); 6914 unsigned long freq = intel_pxfreq(pxvidfreq); 6915 unsigned long vid = (pxvidfreq & PXVFREQ_PX_MASK) >> 6916 PXVFREQ_PX_SHIFT; 6917 unsigned long val; 6918 6919 val = vid * vid; 6920 val *= (freq / 1000); 6921 val *= 255; 6922 val /= (127*127*900); 6923 if (val > 0xff) 6924 DRM_ERROR("bad pxval: %ld\n", val); 6925 pxw[i] = val; 6926 } 6927 /* Render standby states get 0 weight */ 6928 pxw[14] = 0; 6929 pxw[15] = 0; 6930 6931 for (i = 0; i < 4; i++) { 6932 u32 val = (pxw[i*4] << 24) | (pxw[(i*4)+1] << 16) | 6933 (pxw[(i*4)+2] << 8) | (pxw[(i*4)+3]); 6934 I915_WRITE(PXW(i), val); 6935 } 6936 6937 /* Adjust magic regs to magic values (more experimental results) */ 6938 I915_WRITE(OGW0, 0); 6939 I915_WRITE(OGW1, 0); 6940 I915_WRITE(EG0, 0x00007f00); 6941 I915_WRITE(EG1, 0x0000000e); 6942 I915_WRITE(EG2, 0x000e0000); 6943 I915_WRITE(EG3, 0x68000300); 6944 I915_WRITE(EG4, 0x42000000); 6945 I915_WRITE(EG5, 0x00140031); 6946 I915_WRITE(EG6, 0); 6947 I915_WRITE(EG7, 0); 6948 6949 for (i = 0; i < 8; i++) 6950 I915_WRITE(PXWL(i), 0); 6951 6952 /* Enable PMON + select events */ 6953 I915_WRITE(ECR, 0x80000019); 6954 6955 lcfuse = I915_READ(LCFUSE02); 6956 6957 dev_priv->ips.corr = (lcfuse & LCFUSE_HIV_MASK); 6958 } 6959 6960 void intel_init_gt_powersave(struct drm_i915_private *dev_priv) 6961 { 6962 /* 6963 * RPM depends on RC6 to save restore the GT HW context, so make RC6 a 6964 * requirement. 6965 */ 6966 if (!i915.enable_rc6) { 6967 DRM_INFO("RC6 disabled, disabling runtime PM support\n"); 6968 intel_runtime_pm_get(dev_priv); 6969 } 6970 6971 mutex_lock(&dev_priv->drm.struct_mutex); 6972 mutex_lock(&dev_priv->rps.hw_lock); 6973 6974 /* Initialize RPS limits (for userspace) */ 6975 if (IS_CHERRYVIEW(dev_priv)) 6976 cherryview_init_gt_powersave(dev_priv); 6977 else if (IS_VALLEYVIEW(dev_priv)) 6978 valleyview_init_gt_powersave(dev_priv); 6979 else if (INTEL_GEN(dev_priv) >= 6) 6980 gen6_init_rps_frequencies(dev_priv); 6981 6982 /* Derive initial user preferences/limits from the hardware limits */ 6983 dev_priv->rps.idle_freq = dev_priv->rps.min_freq; 6984 dev_priv->rps.cur_freq = dev_priv->rps.idle_freq; 6985 6986 dev_priv->rps.max_freq_softlimit = dev_priv->rps.max_freq; 6987 dev_priv->rps.min_freq_softlimit = dev_priv->rps.min_freq; 6988 6989 if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) 6990 dev_priv->rps.min_freq_softlimit = 6991 max_t(int, 6992 dev_priv->rps.efficient_freq, 6993 intel_freq_opcode(dev_priv, 450)); 6994 6995 /* After setting max-softlimit, find the overclock max freq */ 6996 if (IS_GEN6(dev_priv) || 6997 IS_IVYBRIDGE(dev_priv) || IS_HASWELL(dev_priv)) { 6998 u32 params = 0; 6999 7000 sandybridge_pcode_read(dev_priv, GEN6_READ_OC_PARAMS, ¶ms); 7001 if (params & BIT(31)) { /* OC supported */ 7002 DRM_DEBUG_DRIVER("Overclocking supported, max: %dMHz, overclock: %dMHz\n", 7003 (dev_priv->rps.max_freq & 0xff) * 50, 7004 (params & 0xff) * 50); 7005 dev_priv->rps.max_freq = params & 0xff; 7006 } 7007 } 7008 7009 /* Finally allow us to boost to max by default */ 7010 dev_priv->rps.boost_freq = dev_priv->rps.max_freq; 7011 7012 mutex_unlock(&dev_priv->rps.hw_lock); 7013 mutex_unlock(&dev_priv->drm.struct_mutex); 7014 7015 intel_autoenable_gt_powersave(dev_priv); 7016 } 7017 7018 void intel_cleanup_gt_powersave(struct drm_i915_private *dev_priv) 7019 { 7020 if (IS_VALLEYVIEW(dev_priv)) 7021 valleyview_cleanup_gt_powersave(dev_priv); 7022 7023 if (!i915.enable_rc6) 7024 intel_runtime_pm_put(dev_priv); 7025 } 7026 7027 /** 7028 * intel_suspend_gt_powersave - suspend PM work and helper threads 7029 * @dev_priv: i915 device 7030 * 7031 * We don't want to disable RC6 or other features here, we just want 7032 * to make sure any work we've queued has finished and won't bother 7033 * us while we're suspended. 7034 */ 7035 void intel_suspend_gt_powersave(struct drm_i915_private *dev_priv) 7036 { 7037 if (INTEL_GEN(dev_priv) < 6) 7038 return; 7039 7040 if (cancel_delayed_work_sync(&dev_priv->rps.autoenable_work)) 7041 intel_runtime_pm_put(dev_priv); 7042 7043 /* gen6_rps_idle() will be called later to disable interrupts */ 7044 } 7045 7046 void intel_sanitize_gt_powersave(struct drm_i915_private *dev_priv) 7047 { 7048 dev_priv->rps.enabled = true; /* force disabling */ 7049 intel_disable_gt_powersave(dev_priv); 7050 7051 gen6_reset_rps_interrupts(dev_priv); 7052 } 7053 7054 void intel_disable_gt_powersave(struct drm_i915_private *dev_priv) 7055 { 7056 if (!READ_ONCE(dev_priv->rps.enabled)) 7057 return; 7058 7059 mutex_lock(&dev_priv->rps.hw_lock); 7060 7061 if (INTEL_GEN(dev_priv) >= 9) { 7062 gen9_disable_rc6(dev_priv); 7063 gen9_disable_rps(dev_priv); 7064 } else if (IS_CHERRYVIEW(dev_priv)) { 7065 cherryview_disable_rps(dev_priv); 7066 } else if (IS_VALLEYVIEW(dev_priv)) { 7067 valleyview_disable_rps(dev_priv); 7068 } else if (INTEL_GEN(dev_priv) >= 6) { 7069 gen6_disable_rps(dev_priv); 7070 } else if (IS_IRONLAKE_M(dev_priv)) { 7071 ironlake_disable_drps(dev_priv); 7072 } 7073 7074 dev_priv->rps.enabled = false; 7075 mutex_unlock(&dev_priv->rps.hw_lock); 7076 } 7077 7078 void intel_enable_gt_powersave(struct drm_i915_private *dev_priv) 7079 { 7080 /* We shouldn't be disabling as we submit, so this should be less 7081 * racy than it appears! 7082 */ 7083 if (READ_ONCE(dev_priv->rps.enabled)) 7084 return; 7085 7086 /* Powersaving is controlled by the host when inside a VM */ 7087 if (intel_vgpu_active(dev_priv)) 7088 return; 7089 7090 mutex_lock(&dev_priv->rps.hw_lock); 7091 7092 if (IS_CHERRYVIEW(dev_priv)) { 7093 cherryview_enable_rps(dev_priv); 7094 } else if (IS_VALLEYVIEW(dev_priv)) { 7095 valleyview_enable_rps(dev_priv); 7096 } else if (INTEL_GEN(dev_priv) >= 9) { 7097 gen9_enable_rc6(dev_priv); 7098 gen9_enable_rps(dev_priv); 7099 if (IS_GEN9_BC(dev_priv)) 7100 gen6_update_ring_freq(dev_priv); 7101 } else if (IS_BROADWELL(dev_priv)) { 7102 gen8_enable_rps(dev_priv); 7103 gen6_update_ring_freq(dev_priv); 7104 } else if (INTEL_GEN(dev_priv) >= 6) { 7105 gen6_enable_rps(dev_priv); 7106 gen6_update_ring_freq(dev_priv); 7107 } else if (IS_IRONLAKE_M(dev_priv)) { 7108 ironlake_enable_drps(dev_priv); 7109 intel_init_emon(dev_priv); 7110 } 7111 7112 WARN_ON(dev_priv->rps.max_freq < dev_priv->rps.min_freq); 7113 WARN_ON(dev_priv->rps.idle_freq > dev_priv->rps.max_freq); 7114 7115 WARN_ON(dev_priv->rps.efficient_freq < dev_priv->rps.min_freq); 7116 WARN_ON(dev_priv->rps.efficient_freq > dev_priv->rps.max_freq); 7117 7118 dev_priv->rps.enabled = true; 7119 mutex_unlock(&dev_priv->rps.hw_lock); 7120 } 7121 7122 static void __intel_autoenable_gt_powersave(struct work_struct *work) 7123 { 7124 struct drm_i915_private *dev_priv = 7125 container_of(work, typeof(*dev_priv), rps.autoenable_work.work); 7126 struct intel_engine_cs *rcs; 7127 struct drm_i915_gem_request *req; 7128 7129 if (READ_ONCE(dev_priv->rps.enabled)) 7130 goto out; 7131 7132 rcs = dev_priv->engine[RCS]; 7133 if (rcs->last_retired_context) 7134 goto out; 7135 7136 if (!rcs->init_context) 7137 goto out; 7138 7139 mutex_lock(&dev_priv->drm.struct_mutex); 7140 7141 req = i915_gem_request_alloc(rcs, dev_priv->kernel_context); 7142 if (IS_ERR(req)) 7143 goto unlock; 7144 7145 if (!i915.enable_execlists && i915_switch_context(req) == 0) 7146 rcs->init_context(req); 7147 7148 /* Mark the device busy, calling intel_enable_gt_powersave() */ 7149 i915_add_request(req); 7150 7151 unlock: 7152 mutex_unlock(&dev_priv->drm.struct_mutex); 7153 out: 7154 intel_runtime_pm_put(dev_priv); 7155 } 7156 7157 void intel_autoenable_gt_powersave(struct drm_i915_private *dev_priv) 7158 { 7159 if (READ_ONCE(dev_priv->rps.enabled)) 7160 return; 7161 7162 if (IS_IRONLAKE_M(dev_priv)) { 7163 ironlake_enable_drps(dev_priv); 7164 intel_init_emon(dev_priv); 7165 } else if (INTEL_INFO(dev_priv)->gen >= 6) { 7166 /* 7167 * PCU communication is slow and this doesn't need to be 7168 * done at any specific time, so do this out of our fast path 7169 * to make resume and init faster. 7170 * 7171 * We depend on the HW RC6 power context save/restore 7172 * mechanism when entering D3 through runtime PM suspend. So 7173 * disable RPM until RPS/RC6 is properly setup. We can only 7174 * get here via the driver load/system resume/runtime resume 7175 * paths, so the _noresume version is enough (and in case of 7176 * runtime resume it's necessary). 7177 */ 7178 if (queue_delayed_work(dev_priv->wq, 7179 &dev_priv->rps.autoenable_work, 7180 round_jiffies_up_relative(HZ))) 7181 intel_runtime_pm_get_noresume(dev_priv); 7182 } 7183 } 7184 7185 static void ibx_init_clock_gating(struct drm_i915_private *dev_priv) 7186 { 7187 /* 7188 * On Ibex Peak and Cougar Point, we need to disable clock 7189 * gating for the panel power sequencer or it will fail to 7190 * start up when no ports are active. 7191 */ 7192 I915_WRITE(SOUTH_DSPCLK_GATE_D, PCH_DPLSUNIT_CLOCK_GATE_DISABLE); 7193 } 7194 7195 static void g4x_disable_trickle_feed(struct drm_i915_private *dev_priv) 7196 { 7197 enum i915_pipe pipe; 7198 7199 for_each_pipe(dev_priv, pipe) { 7200 I915_WRITE(DSPCNTR(pipe), 7201 I915_READ(DSPCNTR(pipe)) | 7202 DISPPLANE_TRICKLE_FEED_DISABLE); 7203 7204 I915_WRITE(DSPSURF(pipe), I915_READ(DSPSURF(pipe))); 7205 POSTING_READ(DSPSURF(pipe)); 7206 } 7207 } 7208 7209 static void ilk_init_lp_watermarks(struct drm_i915_private *dev_priv) 7210 { 7211 I915_WRITE(WM3_LP_ILK, I915_READ(WM3_LP_ILK) & ~WM1_LP_SR_EN); 7212 I915_WRITE(WM2_LP_ILK, I915_READ(WM2_LP_ILK) & ~WM1_LP_SR_EN); 7213 I915_WRITE(WM1_LP_ILK, I915_READ(WM1_LP_ILK) & ~WM1_LP_SR_EN); 7214 7215 /* 7216 * Don't touch WM1S_LP_EN here. 7217 * Doing so could cause underruns. 7218 */ 7219 } 7220 7221 static void ironlake_init_clock_gating(struct drm_i915_private *dev_priv) 7222 { 7223 uint32_t dspclk_gate = ILK_VRHUNIT_CLOCK_GATE_DISABLE; 7224 7225 /* 7226 * Required for FBC 7227 * WaFbcDisableDpfcClockGating:ilk 7228 */ 7229 dspclk_gate |= ILK_DPFCRUNIT_CLOCK_GATE_DISABLE | 7230 ILK_DPFCUNIT_CLOCK_GATE_DISABLE | 7231 ILK_DPFDUNIT_CLOCK_GATE_ENABLE; 7232 7233 I915_WRITE(PCH_3DCGDIS0, 7234 MARIUNIT_CLOCK_GATE_DISABLE | 7235 SVSMUNIT_CLOCK_GATE_DISABLE); 7236 I915_WRITE(PCH_3DCGDIS1, 7237 VFMUNIT_CLOCK_GATE_DISABLE); 7238 7239 /* 7240 * According to the spec the following bits should be set in 7241 * order to enable memory self-refresh 7242 * The bit 22/21 of 0x42004 7243 * The bit 5 of 0x42020 7244 * The bit 15 of 0x45000 7245 */ 7246 I915_WRITE(ILK_DISPLAY_CHICKEN2, 7247 (I915_READ(ILK_DISPLAY_CHICKEN2) | 7248 ILK_DPARB_GATE | ILK_VSDPFD_FULL)); 7249 dspclk_gate |= ILK_DPARBUNIT_CLOCK_GATE_ENABLE; 7250 I915_WRITE(DISP_ARB_CTL, 7251 (I915_READ(DISP_ARB_CTL) | 7252 DISP_FBC_WM_DIS)); 7253 7254 ilk_init_lp_watermarks(dev_priv); 7255 7256 /* 7257 * Based on the document from hardware guys the following bits 7258 * should be set unconditionally in order to enable FBC. 7259 * The bit 22 of 0x42000 7260 * The bit 22 of 0x42004 7261 * The bit 7,8,9 of 0x42020. 7262 */ 7263 if (IS_IRONLAKE_M(dev_priv)) { 7264 /* WaFbcAsynchFlipDisableFbcQueue:ilk */ 7265 I915_WRITE(ILK_DISPLAY_CHICKEN1, 7266 I915_READ(ILK_DISPLAY_CHICKEN1) | 7267 ILK_FBCQ_DIS); 7268 I915_WRITE(ILK_DISPLAY_CHICKEN2, 7269 I915_READ(ILK_DISPLAY_CHICKEN2) | 7270 ILK_DPARB_GATE); 7271 } 7272 7273 I915_WRITE(ILK_DSPCLK_GATE_D, dspclk_gate); 7274 7275 I915_WRITE(ILK_DISPLAY_CHICKEN2, 7276 I915_READ(ILK_DISPLAY_CHICKEN2) | 7277 ILK_ELPIN_409_SELECT); 7278 I915_WRITE(_3D_CHICKEN2, 7279 _3D_CHICKEN2_WM_READ_PIPELINED << 16 | 7280 _3D_CHICKEN2_WM_READ_PIPELINED); 7281 7282 /* WaDisableRenderCachePipelinedFlush:ilk */ 7283 I915_WRITE(CACHE_MODE_0, 7284 _MASKED_BIT_ENABLE(CM0_PIPELINED_RENDER_FLUSH_DISABLE)); 7285 7286 /* WaDisable_RenderCache_OperationalFlush:ilk */ 7287 I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); 7288 7289 g4x_disable_trickle_feed(dev_priv); 7290 7291 ibx_init_clock_gating(dev_priv); 7292 } 7293 7294 static void cpt_init_clock_gating(struct drm_i915_private *dev_priv) 7295 { 7296 int pipe; 7297 uint32_t val; 7298 7299 /* 7300 * On Ibex Peak and Cougar Point, we need to disable clock 7301 * gating for the panel power sequencer or it will fail to 7302 * start up when no ports are active. 7303 */ 7304 I915_WRITE(SOUTH_DSPCLK_GATE_D, PCH_DPLSUNIT_CLOCK_GATE_DISABLE | 7305 PCH_DPLUNIT_CLOCK_GATE_DISABLE | 7306 PCH_CPUNIT_CLOCK_GATE_DISABLE); 7307 I915_WRITE(SOUTH_CHICKEN2, I915_READ(SOUTH_CHICKEN2) | 7308 DPLS_EDP_PPS_FIX_DIS); 7309 /* The below fixes the weird display corruption, a few pixels shifted 7310 * downward, on (only) LVDS of some HP laptops with IVY. 7311 */ 7312 for_each_pipe(dev_priv, pipe) { 7313 val = I915_READ(TRANS_CHICKEN2(pipe)); 7314 val |= TRANS_CHICKEN2_TIMING_OVERRIDE; 7315 val &= ~TRANS_CHICKEN2_FDI_POLARITY_REVERSED; 7316 if (dev_priv->vbt.fdi_rx_polarity_inverted) 7317 val |= TRANS_CHICKEN2_FDI_POLARITY_REVERSED; 7318 val &= ~TRANS_CHICKEN2_FRAME_START_DELAY_MASK; 7319 val &= ~TRANS_CHICKEN2_DISABLE_DEEP_COLOR_COUNTER; 7320 val &= ~TRANS_CHICKEN2_DISABLE_DEEP_COLOR_MODESWITCH; 7321 I915_WRITE(TRANS_CHICKEN2(pipe), val); 7322 } 7323 /* WADP0ClockGatingDisable */ 7324 for_each_pipe(dev_priv, pipe) { 7325 I915_WRITE(TRANS_CHICKEN1(pipe), 7326 TRANS_CHICKEN1_DP0UNIT_GC_DISABLE); 7327 } 7328 } 7329 7330 static void gen6_check_mch_setup(struct drm_i915_private *dev_priv) 7331 { 7332 uint32_t tmp; 7333 7334 tmp = I915_READ(MCH_SSKPD); 7335 if ((tmp & MCH_SSKPD_WM0_MASK) != MCH_SSKPD_WM0_VAL) 7336 DRM_DEBUG_KMS("Wrong MCH_SSKPD value: 0x%08x This can cause underruns.\n", 7337 tmp); 7338 } 7339 7340 static void gen6_init_clock_gating(struct drm_i915_private *dev_priv) 7341 { 7342 uint32_t dspclk_gate = ILK_VRHUNIT_CLOCK_GATE_DISABLE; 7343 7344 I915_WRITE(ILK_DSPCLK_GATE_D, dspclk_gate); 7345 7346 I915_WRITE(ILK_DISPLAY_CHICKEN2, 7347 I915_READ(ILK_DISPLAY_CHICKEN2) | 7348 ILK_ELPIN_409_SELECT); 7349 7350 /* WaDisableHiZPlanesWhenMSAAEnabled:snb */ 7351 I915_WRITE(_3D_CHICKEN, 7352 _MASKED_BIT_ENABLE(_3D_CHICKEN_HIZ_PLANE_DISABLE_MSAA_4X_SNB)); 7353 7354 /* WaDisable_RenderCache_OperationalFlush:snb */ 7355 I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); 7356 7357 /* 7358 * BSpec recoomends 8x4 when MSAA is used, 7359 * however in practice 16x4 seems fastest. 7360 * 7361 * Note that PS/WM thread counts depend on the WIZ hashing 7362 * disable bit, which we don't touch here, but it's good 7363 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). 7364 */ 7365 I915_WRITE(GEN6_GT_MODE, 7366 _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4)); 7367 7368 ilk_init_lp_watermarks(dev_priv); 7369 7370 I915_WRITE(CACHE_MODE_0, 7371 _MASKED_BIT_DISABLE(CM0_STC_EVICT_DISABLE_LRA_SNB)); 7372 7373 I915_WRITE(GEN6_UCGCTL1, 7374 I915_READ(GEN6_UCGCTL1) | 7375 GEN6_BLBUNIT_CLOCK_GATE_DISABLE | 7376 GEN6_CSUNIT_CLOCK_GATE_DISABLE); 7377 7378 /* According to the BSpec vol1g, bit 12 (RCPBUNIT) clock 7379 * gating disable must be set. Failure to set it results in 7380 * flickering pixels due to Z write ordering failures after 7381 * some amount of runtime in the Mesa "fire" demo, and Unigine 7382 * Sanctuary and Tropics, and apparently anything else with 7383 * alpha test or pixel discard. 7384 * 7385 * According to the spec, bit 11 (RCCUNIT) must also be set, 7386 * but we didn't debug actual testcases to find it out. 7387 * 7388 * WaDisableRCCUnitClockGating:snb 7389 * WaDisableRCPBUnitClockGating:snb 7390 */ 7391 I915_WRITE(GEN6_UCGCTL2, 7392 GEN6_RCPBUNIT_CLOCK_GATE_DISABLE | 7393 GEN6_RCCUNIT_CLOCK_GATE_DISABLE); 7394 7395 /* WaStripsFansDisableFastClipPerformanceFix:snb */ 7396 I915_WRITE(_3D_CHICKEN3, 7397 _MASKED_BIT_ENABLE(_3D_CHICKEN3_SF_DISABLE_FASTCLIP_CULL)); 7398 7399 /* 7400 * Bspec says: 7401 * "This bit must be set if 3DSTATE_CLIP clip mode is set to normal and 7402 * 3DSTATE_SF number of SF output attributes is more than 16." 7403 */ 7404 I915_WRITE(_3D_CHICKEN3, 7405 _MASKED_BIT_ENABLE(_3D_CHICKEN3_SF_DISABLE_PIPELINED_ATTR_FETCH)); 7406 7407 /* 7408 * According to the spec the following bits should be 7409 * set in order to enable memory self-refresh and fbc: 7410 * The bit21 and bit22 of 0x42000 7411 * The bit21 and bit22 of 0x42004 7412 * The bit5 and bit7 of 0x42020 7413 * The bit14 of 0x70180 7414 * The bit14 of 0x71180 7415 * 7416 * WaFbcAsynchFlipDisableFbcQueue:snb 7417 */ 7418 I915_WRITE(ILK_DISPLAY_CHICKEN1, 7419 I915_READ(ILK_DISPLAY_CHICKEN1) | 7420 ILK_FBCQ_DIS | ILK_PABSTRETCH_DIS); 7421 I915_WRITE(ILK_DISPLAY_CHICKEN2, 7422 I915_READ(ILK_DISPLAY_CHICKEN2) | 7423 ILK_DPARB_GATE | ILK_VSDPFD_FULL); 7424 I915_WRITE(ILK_DSPCLK_GATE_D, 7425 I915_READ(ILK_DSPCLK_GATE_D) | 7426 ILK_DPARBUNIT_CLOCK_GATE_ENABLE | 7427 ILK_DPFDUNIT_CLOCK_GATE_ENABLE); 7428 7429 g4x_disable_trickle_feed(dev_priv); 7430 7431 cpt_init_clock_gating(dev_priv); 7432 7433 gen6_check_mch_setup(dev_priv); 7434 } 7435 7436 static void gen7_setup_fixed_func_scheduler(struct drm_i915_private *dev_priv) 7437 { 7438 uint32_t reg = I915_READ(GEN7_FF_THREAD_MODE); 7439 7440 /* 7441 * WaVSThreadDispatchOverride:ivb,vlv 7442 * 7443 * This actually overrides the dispatch 7444 * mode for all thread types. 7445 */ 7446 reg &= ~GEN7_FF_SCHED_MASK; 7447 reg |= GEN7_FF_TS_SCHED_HW; 7448 reg |= GEN7_FF_VS_SCHED_HW; 7449 reg |= GEN7_FF_DS_SCHED_HW; 7450 7451 I915_WRITE(GEN7_FF_THREAD_MODE, reg); 7452 } 7453 7454 static void lpt_init_clock_gating(struct drm_i915_private *dev_priv) 7455 { 7456 /* 7457 * TODO: this bit should only be enabled when really needed, then 7458 * disabled when not needed anymore in order to save power. 7459 */ 7460 if (HAS_PCH_LPT_LP(dev_priv)) 7461 I915_WRITE(SOUTH_DSPCLK_GATE_D, 7462 I915_READ(SOUTH_DSPCLK_GATE_D) | 7463 PCH_LP_PARTITION_LEVEL_DISABLE); 7464 7465 /* WADPOClockGatingDisable:hsw */ 7466 I915_WRITE(TRANS_CHICKEN1(PIPE_A), 7467 I915_READ(TRANS_CHICKEN1(PIPE_A)) | 7468 TRANS_CHICKEN1_DP0UNIT_GC_DISABLE); 7469 } 7470 7471 static void lpt_suspend_hw(struct drm_i915_private *dev_priv) 7472 { 7473 if (HAS_PCH_LPT_LP(dev_priv)) { 7474 uint32_t val = I915_READ(SOUTH_DSPCLK_GATE_D); 7475 7476 val &= ~PCH_LP_PARTITION_LEVEL_DISABLE; 7477 I915_WRITE(SOUTH_DSPCLK_GATE_D, val); 7478 } 7479 } 7480 7481 static void gen8_set_l3sqc_credits(struct drm_i915_private *dev_priv, 7482 int general_prio_credits, 7483 int high_prio_credits) 7484 { 7485 u32 misccpctl; 7486 7487 /* WaTempDisableDOPClkGating:bdw */ 7488 misccpctl = I915_READ(GEN7_MISCCPCTL); 7489 I915_WRITE(GEN7_MISCCPCTL, misccpctl & ~GEN7_DOP_CLOCK_GATE_ENABLE); 7490 7491 I915_WRITE(GEN8_L3SQCREG1, 7492 L3_GENERAL_PRIO_CREDITS(general_prio_credits) | 7493 L3_HIGH_PRIO_CREDITS(high_prio_credits)); 7494 7495 /* 7496 * Wait at least 100 clocks before re-enabling clock gating. 7497 * See the definition of L3SQCREG1 in BSpec. 7498 */ 7499 POSTING_READ(GEN8_L3SQCREG1); 7500 udelay(1); 7501 I915_WRITE(GEN7_MISCCPCTL, misccpctl); 7502 } 7503 7504 static void kabylake_init_clock_gating(struct drm_i915_private *dev_priv) 7505 { 7506 gen9_init_clock_gating(dev_priv); 7507 7508 /* WaDisableSDEUnitClockGating:kbl */ 7509 if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_B0)) 7510 I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) | 7511 GEN8_SDEUNIT_CLOCK_GATE_DISABLE); 7512 7513 /* WaDisableGamClockGating:kbl */ 7514 if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_B0)) 7515 I915_WRITE(GEN6_UCGCTL1, I915_READ(GEN6_UCGCTL1) | 7516 GEN6_GAMUNIT_CLOCK_GATE_DISABLE); 7517 7518 /* WaFbcNukeOnHostModify:kbl */ 7519 I915_WRITE(ILK_DPFC_CHICKEN, I915_READ(ILK_DPFC_CHICKEN) | 7520 ILK_DPFC_NUKE_ON_ANY_MODIFICATION); 7521 } 7522 7523 static void skylake_init_clock_gating(struct drm_i915_private *dev_priv) 7524 { 7525 gen9_init_clock_gating(dev_priv); 7526 7527 /* WAC6entrylatency:skl */ 7528 I915_WRITE(FBC_LLC_READ_CTRL, I915_READ(FBC_LLC_READ_CTRL) | 7529 FBC_LLC_FULLY_OPEN); 7530 7531 /* WaFbcNukeOnHostModify:skl */ 7532 I915_WRITE(ILK_DPFC_CHICKEN, I915_READ(ILK_DPFC_CHICKEN) | 7533 ILK_DPFC_NUKE_ON_ANY_MODIFICATION); 7534 } 7535 7536 static void broadwell_init_clock_gating(struct drm_i915_private *dev_priv) 7537 { 7538 enum i915_pipe pipe; 7539 7540 ilk_init_lp_watermarks(dev_priv); 7541 7542 /* WaSwitchSolVfFArbitrationPriority:bdw */ 7543 I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL); 7544 7545 /* WaPsrDPAMaskVBlankInSRD:bdw */ 7546 I915_WRITE(CHICKEN_PAR1_1, 7547 I915_READ(CHICKEN_PAR1_1) | DPA_MASK_VBLANK_SRD); 7548 7549 /* WaPsrDPRSUnmaskVBlankInSRD:bdw */ 7550 for_each_pipe(dev_priv, pipe) { 7551 I915_WRITE(CHICKEN_PIPESL_1(pipe), 7552 I915_READ(CHICKEN_PIPESL_1(pipe)) | 7553 BDW_DPRS_MASK_VBLANK_SRD); 7554 } 7555 7556 /* WaVSRefCountFullforceMissDisable:bdw */ 7557 /* WaDSRefCountFullforceMissDisable:bdw */ 7558 I915_WRITE(GEN7_FF_THREAD_MODE, 7559 I915_READ(GEN7_FF_THREAD_MODE) & 7560 ~(GEN8_FF_DS_REF_CNT_FFME | GEN7_FF_VS_REF_CNT_FFME)); 7561 7562 I915_WRITE(GEN6_RC_SLEEP_PSMI_CONTROL, 7563 _MASKED_BIT_ENABLE(GEN8_RC_SEMA_IDLE_MSG_DISABLE)); 7564 7565 /* WaDisableSDEUnitClockGating:bdw */ 7566 I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) | 7567 GEN8_SDEUNIT_CLOCK_GATE_DISABLE); 7568 7569 /* WaProgramL3SqcReg1Default:bdw */ 7570 gen8_set_l3sqc_credits(dev_priv, 30, 2); 7571 7572 /* 7573 * WaGttCachingOffByDefault:bdw 7574 * GTT cache may not work with big pages, so if those 7575 * are ever enabled GTT cache may need to be disabled. 7576 */ 7577 I915_WRITE(HSW_GTT_CACHE_EN, GTT_CACHE_EN_ALL); 7578 7579 /* WaKVMNotificationOnConfigChange:bdw */ 7580 I915_WRITE(CHICKEN_PAR2_1, I915_READ(CHICKEN_PAR2_1) 7581 | KVM_CONFIG_CHANGE_NOTIFICATION_SELECT); 7582 7583 lpt_init_clock_gating(dev_priv); 7584 7585 /* WaDisableDopClockGating:bdw 7586 * 7587 * Also see the CHICKEN2 write in bdw_init_workarounds() to disable DOP 7588 * clock gating. 7589 */ 7590 I915_WRITE(GEN6_UCGCTL1, 7591 I915_READ(GEN6_UCGCTL1) | GEN6_EU_TCUNIT_CLOCK_GATE_DISABLE); 7592 } 7593 7594 static void haswell_init_clock_gating(struct drm_i915_private *dev_priv) 7595 { 7596 ilk_init_lp_watermarks(dev_priv); 7597 7598 /* L3 caching of data atomics doesn't work -- disable it. */ 7599 I915_WRITE(HSW_SCRATCH1, HSW_SCRATCH1_L3_DATA_ATOMICS_DISABLE); 7600 I915_WRITE(HSW_ROW_CHICKEN3, 7601 _MASKED_BIT_ENABLE(HSW_ROW_CHICKEN3_L3_GLOBAL_ATOMICS_DISABLE)); 7602 7603 /* This is required by WaCatErrorRejectionIssue:hsw */ 7604 I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG, 7605 I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) | 7606 GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB); 7607 7608 /* WaVSRefCountFullforceMissDisable:hsw */ 7609 I915_WRITE(GEN7_FF_THREAD_MODE, 7610 I915_READ(GEN7_FF_THREAD_MODE) & ~GEN7_FF_VS_REF_CNT_FFME); 7611 7612 /* WaDisable_RenderCache_OperationalFlush:hsw */ 7613 I915_WRITE(CACHE_MODE_0_GEN7, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); 7614 7615 /* enable HiZ Raw Stall Optimization */ 7616 I915_WRITE(CACHE_MODE_0_GEN7, 7617 _MASKED_BIT_DISABLE(HIZ_RAW_STALL_OPT_DISABLE)); 7618 7619 /* WaDisable4x2SubspanOptimization:hsw */ 7620 I915_WRITE(CACHE_MODE_1, 7621 _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE)); 7622 7623 /* 7624 * BSpec recommends 8x4 when MSAA is used, 7625 * however in practice 16x4 seems fastest. 7626 * 7627 * Note that PS/WM thread counts depend on the WIZ hashing 7628 * disable bit, which we don't touch here, but it's good 7629 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). 7630 */ 7631 I915_WRITE(GEN7_GT_MODE, 7632 _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4)); 7633 7634 /* WaSampleCChickenBitEnable:hsw */ 7635 I915_WRITE(HALF_SLICE_CHICKEN3, 7636 _MASKED_BIT_ENABLE(HSW_SAMPLE_C_PERFORMANCE)); 7637 7638 /* WaSwitchSolVfFArbitrationPriority:hsw */ 7639 I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL); 7640 7641 /* WaRsPkgCStateDisplayPMReq:hsw */ 7642 I915_WRITE(CHICKEN_PAR1_1, 7643 I915_READ(CHICKEN_PAR1_1) | FORCE_ARB_IDLE_PLANES); 7644 7645 lpt_init_clock_gating(dev_priv); 7646 } 7647 7648 static void ivybridge_init_clock_gating(struct drm_i915_private *dev_priv) 7649 { 7650 uint32_t snpcr; 7651 7652 ilk_init_lp_watermarks(dev_priv); 7653 7654 I915_WRITE(ILK_DSPCLK_GATE_D, ILK_VRHUNIT_CLOCK_GATE_DISABLE); 7655 7656 /* WaDisableEarlyCull:ivb */ 7657 I915_WRITE(_3D_CHICKEN3, 7658 _MASKED_BIT_ENABLE(_3D_CHICKEN_SF_DISABLE_OBJEND_CULL)); 7659 7660 /* WaDisableBackToBackFlipFix:ivb */ 7661 I915_WRITE(IVB_CHICKEN3, 7662 CHICKEN3_DGMG_REQ_OUT_FIX_DISABLE | 7663 CHICKEN3_DGMG_DONE_FIX_DISABLE); 7664 7665 /* WaDisablePSDDualDispatchEnable:ivb */ 7666 if (IS_IVB_GT1(dev_priv)) 7667 I915_WRITE(GEN7_HALF_SLICE_CHICKEN1, 7668 _MASKED_BIT_ENABLE(GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE)); 7669 7670 /* WaDisable_RenderCache_OperationalFlush:ivb */ 7671 I915_WRITE(CACHE_MODE_0_GEN7, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); 7672 7673 /* Apply the WaDisableRHWOOptimizationForRenderHang:ivb workaround. */ 7674 I915_WRITE(GEN7_COMMON_SLICE_CHICKEN1, 7675 GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC); 7676 7677 /* WaApplyL3ControlAndL3ChickenMode:ivb */ 7678 I915_WRITE(GEN7_L3CNTLREG1, 7679 GEN7_WA_FOR_GEN7_L3_CONTROL); 7680 I915_WRITE(GEN7_L3_CHICKEN_MODE_REGISTER, 7681 GEN7_WA_L3_CHICKEN_MODE); 7682 if (IS_IVB_GT1(dev_priv)) 7683 I915_WRITE(GEN7_ROW_CHICKEN2, 7684 _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE)); 7685 else { 7686 /* must write both registers */ 7687 I915_WRITE(GEN7_ROW_CHICKEN2, 7688 _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE)); 7689 I915_WRITE(GEN7_ROW_CHICKEN2_GT2, 7690 _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE)); 7691 } 7692 7693 /* WaForceL3Serialization:ivb */ 7694 I915_WRITE(GEN7_L3SQCREG4, I915_READ(GEN7_L3SQCREG4) & 7695 ~L3SQ_URB_READ_CAM_MATCH_DISABLE); 7696 7697 /* 7698 * According to the spec, bit 13 (RCZUNIT) must be set on IVB. 7699 * This implements the WaDisableRCZUnitClockGating:ivb workaround. 7700 */ 7701 I915_WRITE(GEN6_UCGCTL2, 7702 GEN6_RCZUNIT_CLOCK_GATE_DISABLE); 7703 7704 /* This is required by WaCatErrorRejectionIssue:ivb */ 7705 I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG, 7706 I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) | 7707 GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB); 7708 7709 g4x_disable_trickle_feed(dev_priv); 7710 7711 gen7_setup_fixed_func_scheduler(dev_priv); 7712 7713 if (0) { /* causes HiZ corruption on ivb:gt1 */ 7714 /* enable HiZ Raw Stall Optimization */ 7715 I915_WRITE(CACHE_MODE_0_GEN7, 7716 _MASKED_BIT_DISABLE(HIZ_RAW_STALL_OPT_DISABLE)); 7717 } 7718 7719 /* WaDisable4x2SubspanOptimization:ivb */ 7720 I915_WRITE(CACHE_MODE_1, 7721 _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE)); 7722 7723 /* 7724 * BSpec recommends 8x4 when MSAA is used, 7725 * however in practice 16x4 seems fastest. 7726 * 7727 * Note that PS/WM thread counts depend on the WIZ hashing 7728 * disable bit, which we don't touch here, but it's good 7729 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). 7730 */ 7731 I915_WRITE(GEN7_GT_MODE, 7732 _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4)); 7733 7734 snpcr = I915_READ(GEN6_MBCUNIT_SNPCR); 7735 snpcr &= ~GEN6_MBC_SNPCR_MASK; 7736 snpcr |= GEN6_MBC_SNPCR_MED; 7737 I915_WRITE(GEN6_MBCUNIT_SNPCR, snpcr); 7738 7739 if (!HAS_PCH_NOP(dev_priv)) 7740 cpt_init_clock_gating(dev_priv); 7741 7742 gen6_check_mch_setup(dev_priv); 7743 } 7744 7745 static void valleyview_init_clock_gating(struct drm_i915_private *dev_priv) 7746 { 7747 /* WaDisableEarlyCull:vlv */ 7748 I915_WRITE(_3D_CHICKEN3, 7749 _MASKED_BIT_ENABLE(_3D_CHICKEN_SF_DISABLE_OBJEND_CULL)); 7750 7751 /* WaDisableBackToBackFlipFix:vlv */ 7752 I915_WRITE(IVB_CHICKEN3, 7753 CHICKEN3_DGMG_REQ_OUT_FIX_DISABLE | 7754 CHICKEN3_DGMG_DONE_FIX_DISABLE); 7755 7756 /* WaPsdDispatchEnable:vlv */ 7757 /* WaDisablePSDDualDispatchEnable:vlv */ 7758 I915_WRITE(GEN7_HALF_SLICE_CHICKEN1, 7759 _MASKED_BIT_ENABLE(GEN7_MAX_PS_THREAD_DEP | 7760 GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE)); 7761 7762 /* WaDisable_RenderCache_OperationalFlush:vlv */ 7763 I915_WRITE(CACHE_MODE_0_GEN7, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); 7764 7765 /* WaForceL3Serialization:vlv */ 7766 I915_WRITE(GEN7_L3SQCREG4, I915_READ(GEN7_L3SQCREG4) & 7767 ~L3SQ_URB_READ_CAM_MATCH_DISABLE); 7768 7769 /* WaDisableDopClockGating:vlv */ 7770 I915_WRITE(GEN7_ROW_CHICKEN2, 7771 _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE)); 7772 7773 /* This is required by WaCatErrorRejectionIssue:vlv */ 7774 I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG, 7775 I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) | 7776 GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB); 7777 7778 gen7_setup_fixed_func_scheduler(dev_priv); 7779 7780 /* 7781 * According to the spec, bit 13 (RCZUNIT) must be set on IVB. 7782 * This implements the WaDisableRCZUnitClockGating:vlv workaround. 7783 */ 7784 I915_WRITE(GEN6_UCGCTL2, 7785 GEN6_RCZUNIT_CLOCK_GATE_DISABLE); 7786 7787 /* WaDisableL3Bank2xClockGate:vlv 7788 * Disabling L3 clock gating- MMIO 940c[25] = 1 7789 * Set bit 25, to disable L3_BANK_2x_CLK_GATING */ 7790 I915_WRITE(GEN7_UCGCTL4, 7791 I915_READ(GEN7_UCGCTL4) | GEN7_L3BANK2X_CLOCK_GATE_DISABLE); 7792 7793 /* 7794 * BSpec says this must be set, even though 7795 * WaDisable4x2SubspanOptimization isn't listed for VLV. 7796 */ 7797 I915_WRITE(CACHE_MODE_1, 7798 _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE)); 7799 7800 /* 7801 * BSpec recommends 8x4 when MSAA is used, 7802 * however in practice 16x4 seems fastest. 7803 * 7804 * Note that PS/WM thread counts depend on the WIZ hashing 7805 * disable bit, which we don't touch here, but it's good 7806 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). 7807 */ 7808 I915_WRITE(GEN7_GT_MODE, 7809 _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4)); 7810 7811 /* 7812 * WaIncreaseL3CreditsForVLVB0:vlv 7813 * This is the hardware default actually. 7814 */ 7815 I915_WRITE(GEN7_L3SQCREG1, VLV_B0_WA_L3SQCREG1_VALUE); 7816 7817 /* 7818 * WaDisableVLVClockGating_VBIIssue:vlv 7819 * Disable clock gating on th GCFG unit to prevent a delay 7820 * in the reporting of vblank events. 7821 */ 7822 I915_WRITE(VLV_GUNIT_CLOCK_GATE, GCFG_DIS); 7823 } 7824 7825 static void cherryview_init_clock_gating(struct drm_i915_private *dev_priv) 7826 { 7827 /* WaVSRefCountFullforceMissDisable:chv */ 7828 /* WaDSRefCountFullforceMissDisable:chv */ 7829 I915_WRITE(GEN7_FF_THREAD_MODE, 7830 I915_READ(GEN7_FF_THREAD_MODE) & 7831 ~(GEN8_FF_DS_REF_CNT_FFME | GEN7_FF_VS_REF_CNT_FFME)); 7832 7833 /* WaDisableSemaphoreAndSyncFlipWait:chv */ 7834 I915_WRITE(GEN6_RC_SLEEP_PSMI_CONTROL, 7835 _MASKED_BIT_ENABLE(GEN8_RC_SEMA_IDLE_MSG_DISABLE)); 7836 7837 /* WaDisableCSUnitClockGating:chv */ 7838 I915_WRITE(GEN6_UCGCTL1, I915_READ(GEN6_UCGCTL1) | 7839 GEN6_CSUNIT_CLOCK_GATE_DISABLE); 7840 7841 /* WaDisableSDEUnitClockGating:chv */ 7842 I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) | 7843 GEN8_SDEUNIT_CLOCK_GATE_DISABLE); 7844 7845 /* 7846 * WaProgramL3SqcReg1Default:chv 7847 * See gfxspecs/Related Documents/Performance Guide/ 7848 * LSQC Setting Recommendations. 7849 */ 7850 gen8_set_l3sqc_credits(dev_priv, 38, 2); 7851 7852 /* 7853 * GTT cache may not work with big pages, so if those 7854 * are ever enabled GTT cache may need to be disabled. 7855 */ 7856 I915_WRITE(HSW_GTT_CACHE_EN, GTT_CACHE_EN_ALL); 7857 } 7858 7859 static void g4x_init_clock_gating(struct drm_i915_private *dev_priv) 7860 { 7861 uint32_t dspclk_gate; 7862 7863 I915_WRITE(RENCLK_GATE_D1, 0); 7864 I915_WRITE(RENCLK_GATE_D2, VF_UNIT_CLOCK_GATE_DISABLE | 7865 GS_UNIT_CLOCK_GATE_DISABLE | 7866 CL_UNIT_CLOCK_GATE_DISABLE); 7867 I915_WRITE(RAMCLK_GATE_D, 0); 7868 dspclk_gate = VRHUNIT_CLOCK_GATE_DISABLE | 7869 OVRUNIT_CLOCK_GATE_DISABLE | 7870 OVCUNIT_CLOCK_GATE_DISABLE; 7871 if (IS_GM45(dev_priv)) 7872 dspclk_gate |= DSSUNIT_CLOCK_GATE_DISABLE; 7873 I915_WRITE(DSPCLK_GATE_D, dspclk_gate); 7874 7875 /* WaDisableRenderCachePipelinedFlush */ 7876 I915_WRITE(CACHE_MODE_0, 7877 _MASKED_BIT_ENABLE(CM0_PIPELINED_RENDER_FLUSH_DISABLE)); 7878 7879 /* WaDisable_RenderCache_OperationalFlush:g4x */ 7880 I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); 7881 7882 g4x_disable_trickle_feed(dev_priv); 7883 } 7884 7885 static void crestline_init_clock_gating(struct drm_i915_private *dev_priv) 7886 { 7887 I915_WRITE(RENCLK_GATE_D1, I965_RCC_CLOCK_GATE_DISABLE); 7888 I915_WRITE(RENCLK_GATE_D2, 0); 7889 I915_WRITE(DSPCLK_GATE_D, 0); 7890 I915_WRITE(RAMCLK_GATE_D, 0); 7891 I915_WRITE16(DEUC, 0); 7892 I915_WRITE(MI_ARB_STATE, 7893 _MASKED_BIT_ENABLE(MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE)); 7894 7895 /* WaDisable_RenderCache_OperationalFlush:gen4 */ 7896 I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); 7897 } 7898 7899 static void broadwater_init_clock_gating(struct drm_i915_private *dev_priv) 7900 { 7901 I915_WRITE(RENCLK_GATE_D1, I965_RCZ_CLOCK_GATE_DISABLE | 7902 I965_RCC_CLOCK_GATE_DISABLE | 7903 I965_RCPB_CLOCK_GATE_DISABLE | 7904 I965_ISC_CLOCK_GATE_DISABLE | 7905 I965_FBC_CLOCK_GATE_DISABLE); 7906 I915_WRITE(RENCLK_GATE_D2, 0); 7907 I915_WRITE(MI_ARB_STATE, 7908 _MASKED_BIT_ENABLE(MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE)); 7909 7910 /* WaDisable_RenderCache_OperationalFlush:gen4 */ 7911 I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); 7912 } 7913 7914 static void gen3_init_clock_gating(struct drm_i915_private *dev_priv) 7915 { 7916 u32 dstate = I915_READ(D_STATE); 7917 7918 dstate |= DSTATE_PLL_D3_OFF | DSTATE_GFX_CLOCK_GATING | 7919 DSTATE_DOT_CLOCK_GATING; 7920 I915_WRITE(D_STATE, dstate); 7921 7922 if (IS_PINEVIEW(dev_priv)) 7923 I915_WRITE(ECOSKPD, _MASKED_BIT_ENABLE(ECO_GATING_CX_ONLY)); 7924 7925 /* IIR "flip pending" means done if this bit is set */ 7926 I915_WRITE(ECOSKPD, _MASKED_BIT_DISABLE(ECO_FLIP_DONE)); 7927 7928 /* interrupts should cause a wake up from C3 */ 7929 I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_AGPBUSY_INT_EN)); 7930 7931 /* On GEN3 we really need to make sure the ARB C3 LP bit is set */ 7932 I915_WRITE(MI_ARB_STATE, _MASKED_BIT_ENABLE(MI_ARB_C3_LP_WRITE_ENABLE)); 7933 7934 I915_WRITE(MI_ARB_STATE, 7935 _MASKED_BIT_ENABLE(MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE)); 7936 } 7937 7938 static void i85x_init_clock_gating(struct drm_i915_private *dev_priv) 7939 { 7940 I915_WRITE(RENCLK_GATE_D1, SV_CLOCK_GATE_DISABLE); 7941 7942 /* interrupts should cause a wake up from C3 */ 7943 I915_WRITE(MI_STATE, _MASKED_BIT_ENABLE(MI_AGPBUSY_INT_EN) | 7944 _MASKED_BIT_DISABLE(MI_AGPBUSY_830_MODE)); 7945 7946 I915_WRITE(MEM_MODE, 7947 _MASKED_BIT_ENABLE(MEM_DISPLAY_TRICKLE_FEED_DISABLE)); 7948 } 7949 7950 static void i830_init_clock_gating(struct drm_i915_private *dev_priv) 7951 { 7952 I915_WRITE(MEM_MODE, 7953 _MASKED_BIT_ENABLE(MEM_DISPLAY_A_TRICKLE_FEED_DISABLE) | 7954 _MASKED_BIT_ENABLE(MEM_DISPLAY_B_TRICKLE_FEED_DISABLE)); 7955 } 7956 7957 void intel_init_clock_gating(struct drm_i915_private *dev_priv) 7958 { 7959 dev_priv->display.init_clock_gating(dev_priv); 7960 } 7961 7962 void intel_suspend_hw(struct drm_i915_private *dev_priv) 7963 { 7964 if (HAS_PCH_LPT(dev_priv)) 7965 lpt_suspend_hw(dev_priv); 7966 } 7967 7968 static void nop_init_clock_gating(struct drm_i915_private *dev_priv) 7969 { 7970 DRM_DEBUG_KMS("No clock gating settings or workarounds applied.\n"); 7971 } 7972 7973 /** 7974 * intel_init_clock_gating_hooks - setup the clock gating hooks 7975 * @dev_priv: device private 7976 * 7977 * Setup the hooks that configure which clocks of a given platform can be 7978 * gated and also apply various GT and display specific workarounds for these 7979 * platforms. Note that some GT specific workarounds are applied separately 7980 * when GPU contexts or batchbuffers start their execution. 7981 */ 7982 void intel_init_clock_gating_hooks(struct drm_i915_private *dev_priv) 7983 { 7984 if (IS_SKYLAKE(dev_priv)) 7985 dev_priv->display.init_clock_gating = skylake_init_clock_gating; 7986 else if (IS_KABYLAKE(dev_priv)) 7987 dev_priv->display.init_clock_gating = kabylake_init_clock_gating; 7988 else if (IS_BROXTON(dev_priv)) 7989 dev_priv->display.init_clock_gating = bxt_init_clock_gating; 7990 else if (IS_GEMINILAKE(dev_priv)) 7991 dev_priv->display.init_clock_gating = glk_init_clock_gating; 7992 else if (IS_BROADWELL(dev_priv)) 7993 dev_priv->display.init_clock_gating = broadwell_init_clock_gating; 7994 else if (IS_CHERRYVIEW(dev_priv)) 7995 dev_priv->display.init_clock_gating = cherryview_init_clock_gating; 7996 else if (IS_HASWELL(dev_priv)) 7997 dev_priv->display.init_clock_gating = haswell_init_clock_gating; 7998 else if (IS_IVYBRIDGE(dev_priv)) 7999 dev_priv->display.init_clock_gating = ivybridge_init_clock_gating; 8000 else if (IS_VALLEYVIEW(dev_priv)) 8001 dev_priv->display.init_clock_gating = valleyview_init_clock_gating; 8002 else if (IS_GEN6(dev_priv)) 8003 dev_priv->display.init_clock_gating = gen6_init_clock_gating; 8004 else if (IS_GEN5(dev_priv)) 8005 dev_priv->display.init_clock_gating = ironlake_init_clock_gating; 8006 else if (IS_G4X(dev_priv)) 8007 dev_priv->display.init_clock_gating = g4x_init_clock_gating; 8008 else if (IS_I965GM(dev_priv)) 8009 dev_priv->display.init_clock_gating = crestline_init_clock_gating; 8010 else if (IS_I965G(dev_priv)) 8011 dev_priv->display.init_clock_gating = broadwater_init_clock_gating; 8012 else if (IS_GEN3(dev_priv)) 8013 dev_priv->display.init_clock_gating = gen3_init_clock_gating; 8014 else if (IS_I85X(dev_priv) || IS_I865G(dev_priv)) 8015 dev_priv->display.init_clock_gating = i85x_init_clock_gating; 8016 else if (IS_GEN2(dev_priv)) 8017 dev_priv->display.init_clock_gating = i830_init_clock_gating; 8018 else { 8019 MISSING_CASE(INTEL_DEVID(dev_priv)); 8020 dev_priv->display.init_clock_gating = nop_init_clock_gating; 8021 } 8022 } 8023 8024 /* Set up chip specific power management-related functions */ 8025 void intel_init_pm(struct drm_i915_private *dev_priv) 8026 { 8027 intel_fbc_init(dev_priv); 8028 8029 /* For cxsr */ 8030 if (IS_PINEVIEW(dev_priv)) 8031 i915_pineview_get_mem_freq(dev_priv); 8032 else if (IS_GEN5(dev_priv)) 8033 i915_ironlake_get_mem_freq(dev_priv); 8034 8035 /* For FIFO watermark updates */ 8036 if (INTEL_GEN(dev_priv) >= 9) { 8037 skl_setup_wm_latency(dev_priv); 8038 dev_priv->display.initial_watermarks = skl_initial_wm; 8039 dev_priv->display.atomic_update_watermarks = skl_atomic_update_crtc_wm; 8040 dev_priv->display.compute_global_watermarks = skl_compute_wm; 8041 } else if (HAS_PCH_SPLIT(dev_priv)) { 8042 ilk_setup_wm_latency(dev_priv); 8043 8044 if ((IS_GEN5(dev_priv) && dev_priv->wm.pri_latency[1] && 8045 dev_priv->wm.spr_latency[1] && dev_priv->wm.cur_latency[1]) || 8046 (!IS_GEN5(dev_priv) && dev_priv->wm.pri_latency[0] && 8047 dev_priv->wm.spr_latency[0] && dev_priv->wm.cur_latency[0])) { 8048 dev_priv->display.compute_pipe_wm = ilk_compute_pipe_wm; 8049 dev_priv->display.compute_intermediate_wm = 8050 ilk_compute_intermediate_wm; 8051 dev_priv->display.initial_watermarks = 8052 ilk_initial_watermarks; 8053 dev_priv->display.optimize_watermarks = 8054 ilk_optimize_watermarks; 8055 } else { 8056 DRM_DEBUG_KMS("Failed to read display plane latency. " 8057 "Disable CxSR\n"); 8058 } 8059 } else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) { 8060 vlv_setup_wm_latency(dev_priv); 8061 dev_priv->display.compute_pipe_wm = vlv_compute_pipe_wm; 8062 dev_priv->display.compute_intermediate_wm = vlv_compute_intermediate_wm; 8063 dev_priv->display.initial_watermarks = vlv_initial_watermarks; 8064 dev_priv->display.optimize_watermarks = vlv_optimize_watermarks; 8065 dev_priv->display.atomic_update_watermarks = vlv_atomic_update_fifo; 8066 } else if (IS_PINEVIEW(dev_priv)) { 8067 if (!intel_get_cxsr_latency(IS_PINEVIEW_G(dev_priv), 8068 dev_priv->is_ddr3, 8069 dev_priv->fsb_freq, 8070 dev_priv->mem_freq)) { 8071 DRM_INFO("failed to find known CxSR latency " 8072 "(found ddr%s fsb freq %d, mem freq %d), " 8073 "disabling CxSR\n", 8074 (dev_priv->is_ddr3 == 1) ? "3" : "2", 8075 dev_priv->fsb_freq, dev_priv->mem_freq); 8076 /* Disable CxSR and never update its watermark again */ 8077 intel_set_memory_cxsr(dev_priv, false); 8078 dev_priv->display.update_wm = NULL; 8079 } else 8080 dev_priv->display.update_wm = pineview_update_wm; 8081 } else if (IS_G4X(dev_priv)) { 8082 dev_priv->display.update_wm = g4x_update_wm; 8083 } else if (IS_GEN4(dev_priv)) { 8084 dev_priv->display.update_wm = i965_update_wm; 8085 } else if (IS_GEN3(dev_priv)) { 8086 dev_priv->display.update_wm = i9xx_update_wm; 8087 dev_priv->display.get_fifo_size = i9xx_get_fifo_size; 8088 } else if (IS_GEN2(dev_priv)) { 8089 if (INTEL_INFO(dev_priv)->num_pipes == 1) { 8090 dev_priv->display.update_wm = i845_update_wm; 8091 dev_priv->display.get_fifo_size = i845_get_fifo_size; 8092 } else { 8093 dev_priv->display.update_wm = i9xx_update_wm; 8094 dev_priv->display.get_fifo_size = i830_get_fifo_size; 8095 } 8096 } else { 8097 DRM_ERROR("unexpected fall-through in intel_init_pm\n"); 8098 } 8099 } 8100 8101 static inline int gen6_check_mailbox_status(struct drm_i915_private *dev_priv) 8102 { 8103 uint32_t flags = 8104 I915_READ_FW(GEN6_PCODE_MAILBOX) & GEN6_PCODE_ERROR_MASK; 8105 8106 switch (flags) { 8107 case GEN6_PCODE_SUCCESS: 8108 return 0; 8109 case GEN6_PCODE_UNIMPLEMENTED_CMD: 8110 case GEN6_PCODE_ILLEGAL_CMD: 8111 return -ENXIO; 8112 case GEN6_PCODE_MIN_FREQ_TABLE_GT_RATIO_OUT_OF_RANGE: 8113 case GEN7_PCODE_MIN_FREQ_TABLE_GT_RATIO_OUT_OF_RANGE: 8114 return -EOVERFLOW; 8115 case GEN6_PCODE_TIMEOUT: 8116 return -ETIMEDOUT; 8117 default: 8118 MISSING_CASE(flags); 8119 return 0; 8120 } 8121 } 8122 8123 static inline int gen7_check_mailbox_status(struct drm_i915_private *dev_priv) 8124 { 8125 uint32_t flags = 8126 I915_READ_FW(GEN6_PCODE_MAILBOX) & GEN6_PCODE_ERROR_MASK; 8127 8128 switch (flags) { 8129 case GEN6_PCODE_SUCCESS: 8130 return 0; 8131 case GEN6_PCODE_ILLEGAL_CMD: 8132 return -ENXIO; 8133 case GEN7_PCODE_TIMEOUT: 8134 return -ETIMEDOUT; 8135 case GEN7_PCODE_ILLEGAL_DATA: 8136 return -EINVAL; 8137 case GEN7_PCODE_MIN_FREQ_TABLE_GT_RATIO_OUT_OF_RANGE: 8138 return -EOVERFLOW; 8139 default: 8140 MISSING_CASE(flags); 8141 return 0; 8142 } 8143 } 8144 8145 int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u32 mbox, u32 *val) 8146 { 8147 int status; 8148 8149 WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); 8150 8151 /* GEN6_PCODE_* are outside of the forcewake domain, we can 8152 * use te fw I915_READ variants to reduce the amount of work 8153 * required when reading/writing. 8154 */ 8155 8156 if (I915_READ_FW(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) { 8157 DRM_DEBUG_DRIVER("warning: pcode (read) mailbox access failed\n"); 8158 return -EAGAIN; 8159 } 8160 8161 I915_WRITE_FW(GEN6_PCODE_DATA, *val); 8162 I915_WRITE_FW(GEN6_PCODE_DATA1, 0); 8163 I915_WRITE_FW(GEN6_PCODE_MAILBOX, GEN6_PCODE_READY | mbox); 8164 8165 if (intel_wait_for_register_fw(dev_priv, 8166 GEN6_PCODE_MAILBOX, GEN6_PCODE_READY, 0, 8167 500)) { 8168 DRM_ERROR("timeout waiting for pcode read (%d) to finish\n", mbox); 8169 return -ETIMEDOUT; 8170 } 8171 8172 *val = I915_READ_FW(GEN6_PCODE_DATA); 8173 I915_WRITE_FW(GEN6_PCODE_DATA, 0); 8174 8175 if (INTEL_GEN(dev_priv) > 6) 8176 status = gen7_check_mailbox_status(dev_priv); 8177 else 8178 status = gen6_check_mailbox_status(dev_priv); 8179 8180 if (status) { 8181 DRM_DEBUG_DRIVER("warning: pcode (read) mailbox access failed: %d\n", 8182 status); 8183 return status; 8184 } 8185 8186 return 0; 8187 } 8188 8189 int sandybridge_pcode_write(struct drm_i915_private *dev_priv, 8190 u32 mbox, u32 val) 8191 { 8192 int status; 8193 8194 WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); 8195 8196 /* GEN6_PCODE_* are outside of the forcewake domain, we can 8197 * use te fw I915_READ variants to reduce the amount of work 8198 * required when reading/writing. 8199 */ 8200 8201 if (I915_READ_FW(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) { 8202 DRM_DEBUG_DRIVER("warning: pcode (write) mailbox access failed\n"); 8203 return -EAGAIN; 8204 } 8205 8206 I915_WRITE_FW(GEN6_PCODE_DATA, val); 8207 I915_WRITE_FW(GEN6_PCODE_DATA1, 0); 8208 I915_WRITE_FW(GEN6_PCODE_MAILBOX, GEN6_PCODE_READY | mbox); 8209 8210 if (intel_wait_for_register_fw(dev_priv, 8211 GEN6_PCODE_MAILBOX, GEN6_PCODE_READY, 0, 8212 500)) { 8213 DRM_ERROR("timeout waiting for pcode write (%d) to finish\n", mbox); 8214 return -ETIMEDOUT; 8215 } 8216 8217 I915_WRITE_FW(GEN6_PCODE_DATA, 0); 8218 8219 if (INTEL_GEN(dev_priv) > 6) 8220 status = gen7_check_mailbox_status(dev_priv); 8221 else 8222 status = gen6_check_mailbox_status(dev_priv); 8223 8224 if (status) { 8225 DRM_DEBUG_DRIVER("warning: pcode (write) mailbox access failed: %d\n", 8226 status); 8227 return status; 8228 } 8229 8230 return 0; 8231 } 8232 8233 static bool skl_pcode_try_request(struct drm_i915_private *dev_priv, u32 mbox, 8234 u32 request, u32 reply_mask, u32 reply, 8235 u32 *status) 8236 { 8237 u32 val = request; 8238 8239 *status = sandybridge_pcode_read(dev_priv, mbox, &val); 8240 8241 return *status || ((val & reply_mask) == reply); 8242 } 8243 8244 /** 8245 * skl_pcode_request - send PCODE request until acknowledgment 8246 * @dev_priv: device private 8247 * @mbox: PCODE mailbox ID the request is targeted for 8248 * @request: request ID 8249 * @reply_mask: mask used to check for request acknowledgment 8250 * @reply: value used to check for request acknowledgment 8251 * @timeout_base_ms: timeout for polling with preemption enabled 8252 * 8253 * Keep resending the @request to @mbox until PCODE acknowledges it, PCODE 8254 * reports an error or an overall timeout of @timeout_base_ms+50 ms expires. 8255 * The request is acknowledged once the PCODE reply dword equals @reply after 8256 * applying @reply_mask. Polling is first attempted with preemption enabled 8257 * for @timeout_base_ms and if this times out for another 50 ms with 8258 * preemption disabled. 8259 * 8260 * Returns 0 on success, %-ETIMEDOUT in case of a timeout, <0 in case of some 8261 * other error as reported by PCODE. 8262 */ 8263 int skl_pcode_request(struct drm_i915_private *dev_priv, u32 mbox, u32 request, 8264 u32 reply_mask, u32 reply, int timeout_base_ms) 8265 { 8266 u32 status; 8267 int ret; 8268 8269 WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); 8270 8271 #define COND skl_pcode_try_request(dev_priv, mbox, request, reply_mask, reply, \ 8272 &status) 8273 8274 /* 8275 * Prime the PCODE by doing a request first. Normally it guarantees 8276 * that a subsequent request, at most @timeout_base_ms later, succeeds. 8277 * _wait_for() doesn't guarantee when its passed condition is evaluated 8278 * first, so send the first request explicitly. 8279 */ 8280 if (COND) { 8281 ret = 0; 8282 goto out; 8283 } 8284 ret = _wait_for(COND, timeout_base_ms * 1000, 10); 8285 if (!ret) 8286 goto out; 8287 8288 /* 8289 * The above can time out if the number of requests was low (2 in the 8290 * worst case) _and_ PCODE was busy for some reason even after a 8291 * (queued) request and @timeout_base_ms delay. As a workaround retry 8292 * the poll with preemption disabled to maximize the number of 8293 * requests. Increase the timeout from @timeout_base_ms to 50ms to 8294 * account for interrupts that could reduce the number of these 8295 * requests, and for any quirks of the PCODE firmware that delays 8296 * the request completion. 8297 */ 8298 DRM_DEBUG_KMS("PCODE timeout, retrying with preemption disabled\n"); 8299 WARN_ON_ONCE(timeout_base_ms > 3); 8300 preempt_disable(); 8301 ret = wait_for_atomic(COND, 50); 8302 preempt_enable(); 8303 8304 out: 8305 return ret ? ret : status; 8306 #undef COND 8307 } 8308 8309 static int byt_gpu_freq(struct drm_i915_private *dev_priv, int val) 8310 { 8311 /* 8312 * N = val - 0xb7 8313 * Slow = Fast = GPLL ref * N 8314 */ 8315 return DIV_ROUND_CLOSEST(dev_priv->rps.gpll_ref_freq * (val - 0xb7), 1000); 8316 } 8317 8318 static int byt_freq_opcode(struct drm_i915_private *dev_priv, int val) 8319 { 8320 return DIV_ROUND_CLOSEST(1000 * val, dev_priv->rps.gpll_ref_freq) + 0xb7; 8321 } 8322 8323 static int chv_gpu_freq(struct drm_i915_private *dev_priv, int val) 8324 { 8325 /* 8326 * N = val / 2 8327 * CU (slow) = CU2x (fast) / 2 = GPLL ref * N / 2 8328 */ 8329 return DIV_ROUND_CLOSEST(dev_priv->rps.gpll_ref_freq * val, 2 * 2 * 1000); 8330 } 8331 8332 static int chv_freq_opcode(struct drm_i915_private *dev_priv, int val) 8333 { 8334 /* CHV needs even values */ 8335 return DIV_ROUND_CLOSEST(2 * 1000 * val, dev_priv->rps.gpll_ref_freq) * 2; 8336 } 8337 8338 int intel_gpu_freq(struct drm_i915_private *dev_priv, int val) 8339 { 8340 if (IS_GEN9(dev_priv)) 8341 return DIV_ROUND_CLOSEST(val * GT_FREQUENCY_MULTIPLIER, 8342 GEN9_FREQ_SCALER); 8343 else if (IS_CHERRYVIEW(dev_priv)) 8344 return chv_gpu_freq(dev_priv, val); 8345 else if (IS_VALLEYVIEW(dev_priv)) 8346 return byt_gpu_freq(dev_priv, val); 8347 else 8348 return val * GT_FREQUENCY_MULTIPLIER; 8349 } 8350 8351 int intel_freq_opcode(struct drm_i915_private *dev_priv, int val) 8352 { 8353 if (IS_GEN9(dev_priv)) 8354 return DIV_ROUND_CLOSEST(val * GEN9_FREQ_SCALER, 8355 GT_FREQUENCY_MULTIPLIER); 8356 else if (IS_CHERRYVIEW(dev_priv)) 8357 return chv_freq_opcode(dev_priv, val); 8358 else if (IS_VALLEYVIEW(dev_priv)) 8359 return byt_freq_opcode(dev_priv, val); 8360 else 8361 return DIV_ROUND_CLOSEST(val, GT_FREQUENCY_MULTIPLIER); 8362 } 8363 8364 struct request_boost { 8365 struct work_struct work; 8366 struct drm_i915_gem_request *req; 8367 }; 8368 8369 static void __intel_rps_boost_work(struct work_struct *work) 8370 { 8371 struct request_boost *boost = container_of(work, struct request_boost, work); 8372 struct drm_i915_gem_request *req = boost->req; 8373 8374 if (!i915_gem_request_completed(req)) 8375 gen6_rps_boost(req->i915, NULL, req->emitted_jiffies); 8376 8377 i915_gem_request_put(req); 8378 kfree(boost); 8379 } 8380 8381 void intel_queue_rps_boost_for_request(struct drm_i915_gem_request *req) 8382 { 8383 struct request_boost *boost; 8384 8385 if (req == NULL || INTEL_GEN(req->i915) < 6) 8386 return; 8387 8388 if (i915_gem_request_completed(req)) 8389 return; 8390 8391 boost = kmalloc(sizeof(*boost), M_DRM, GFP_ATOMIC); 8392 if (boost == NULL) 8393 return; 8394 8395 boost->req = i915_gem_request_get(req); 8396 8397 INIT_WORK(&boost->work, __intel_rps_boost_work); 8398 queue_work(req->i915->wq, &boost->work); 8399 } 8400 8401 void intel_pm_setup(struct drm_i915_private *dev_priv) 8402 { 8403 lockinit(&dev_priv->rps.hw_lock, "i915 rps.hw_lock", 0, LK_CANRECURSE); 8404 lockinit(&dev_priv->rps.client_lock, "i915rcl", 0, 0); 8405 8406 INIT_DELAYED_WORK(&dev_priv->rps.autoenable_work, 8407 __intel_autoenable_gt_powersave); 8408 INIT_LIST_HEAD(&dev_priv->rps.clients); 8409 8410 dev_priv->pm.suspended = false; 8411 atomic_set(&dev_priv->pm.wakeref_count, 0); 8412 } 8413 8414 static u64 vlv_residency_raw(struct drm_i915_private *dev_priv, 8415 const i915_reg_t reg) 8416 { 8417 u32 lower, upper, tmp; 8418 int loop = 2; 8419 8420 /* The register accessed do not need forcewake. We borrow 8421 * uncore lock to prevent concurrent access to range reg. 8422 */ 8423 spin_lock_irq(&dev_priv->uncore.lock); 8424 8425 /* vlv and chv residency counters are 40 bits in width. 8426 * With a control bit, we can choose between upper or lower 8427 * 32bit window into this counter. 8428 * 8429 * Although we always use the counter in high-range mode elsewhere, 8430 * userspace may attempt to read the value before rc6 is initialised, 8431 * before we have set the default VLV_COUNTER_CONTROL value. So always 8432 * set the high bit to be safe. 8433 */ 8434 I915_WRITE_FW(VLV_COUNTER_CONTROL, 8435 _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH)); 8436 upper = I915_READ_FW(reg); 8437 do { 8438 tmp = upper; 8439 8440 I915_WRITE_FW(VLV_COUNTER_CONTROL, 8441 _MASKED_BIT_DISABLE(VLV_COUNT_RANGE_HIGH)); 8442 lower = I915_READ_FW(reg); 8443 8444 I915_WRITE_FW(VLV_COUNTER_CONTROL, 8445 _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH)); 8446 upper = I915_READ_FW(reg); 8447 } while (upper != tmp && --loop); 8448 8449 /* Everywhere else we always use VLV_COUNTER_CONTROL with the 8450 * VLV_COUNT_RANGE_HIGH bit set - so it is safe to leave it set 8451 * now. 8452 */ 8453 8454 spin_unlock_irq(&dev_priv->uncore.lock); 8455 8456 return lower | (u64)upper << 8; 8457 } 8458 8459 u64 intel_rc6_residency_us(struct drm_i915_private *dev_priv, 8460 const i915_reg_t reg) 8461 { 8462 u64 time_hw, units, div; 8463 8464 if (!intel_enable_rc6()) 8465 return 0; 8466 8467 intel_runtime_pm_get(dev_priv); 8468 8469 /* On VLV and CHV, residency time is in CZ units rather than 1.28us */ 8470 if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) { 8471 units = 1000; 8472 div = dev_priv->czclk_freq; 8473 8474 time_hw = vlv_residency_raw(dev_priv, reg); 8475 } else if (IS_GEN9_LP(dev_priv)) { 8476 units = 1000; 8477 div = 1200; /* 833.33ns */ 8478 8479 time_hw = I915_READ(reg); 8480 } else { 8481 units = 128000; /* 1.28us */ 8482 div = 100000; 8483 8484 time_hw = I915_READ(reg); 8485 } 8486 8487 intel_runtime_pm_put(dev_priv); 8488 return DIV_ROUND_UP_ULL(time_hw * units, div); 8489 } 8490